In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# ================================================
# KAGGLE LLM CLASSIFICATION COMPETITION
# FIRST SUBMISSION - SIMPLE BASELINE
# ================================================
# ================================================
# KAGGLE LLM CLASSIFICATION COMPETITION
# FIRST SUBMISSION - CORRECTED VERSION
# ================================================

print("\n" + "="*50)
print("LLM CLASSIFICATION COMPETITION - FIRST SUBMISSION")
print("="*50)

# Load the competition data
print("\nüìÇ Loading data...")
train_path = '/kaggle/input/llm-classification-finetuning/train.csv'
test_path = '/kaggle/input/llm-classification-finetuning/test.csv'

if not os.path.exists(train_path):
    print("‚ùå ERROR: train.csv not found!")
else:
    # Load the data
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    
    print(f"‚úÖ Data loaded successfully!")
    print(f"   Training samples: {len(train_df):,}")
    print(f"   Test samples: {len(test_df):,}")
    
    # Display basic info
    print("\nüìä Training data columns:")
    print(f"   {list(train_df.columns)}")
    
    print("\nüìä Test data columns:")
    print(f"   {list(test_df.columns)}")
    
    print("\nüìã Sample of training data:")
    print(train_df.head())
    
    print("\nüìã Sample of test data:")
    print(test_df.head())
    
    # ================================================
    # UNDERSTAND THE TARGET
    # ================================================
    print("\nüéØ Understanding target columns...")
    print("We have THREE target columns:")
    print("1. winner_model_a - Probability model A wins")
    print("2. winner_model_b - Probability model B wins") 
    print("3. winner_tie - Probability of tie")
    
    # Check target distribution in training data
    print("\nüìà Training target summary:")
    print(f"   winner_model_a mean: {train_df['winner_model_a'].mean():.3f}")
    print(f"   winner_model_b mean: {train_df['winner_model_b'].mean():.3f}")
    print(f"   winner_tie mean: {train_df['winner_tie'].mean():.3f}")
    
    # Show samples of actual targets
    print("\nüìä Sample targets from training data:")
    sample_targets = train_df[['winner_model_a', 'winner_model_b', 'winner_tie']].head()
    print(sample_targets)
    
    # ================================================
    # SIMPLE BASELINE MODEL
    # ================================================
    print("\n" + "="*50)
    print("CREATING SIMPLE BASELINE MODEL")
    print("="*50)
    
    # Strategy 1: Use average probabilities from training data
    print("\nü§ñ Strategy 1: Using training averages...")
    
    avg_model_a = train_df['winner_model_a'].mean()
    avg_model_b = train_df['winner_model_b'].mean() 
    avg_tie = train_df['winner_tie'].mean()
    
    print(f"   Average probabilities from training:")
    print(f"   Model A wins: {avg_model_a:.3f}")
    print(f"   Model B wins: {avg_model_b:.3f}")
    print(f"   Tie: {avg_tie:.3f}")
    
    # Create submission with average probabilities
    submission_avg = pd.DataFrame({
        'id': test_df['id'],
        'winner_model_a': [avg_model_a] * len(test_df),
        'winner_model_b': [avg_model_b] * len(test_df),
        'winner_tie': [avg_tie] * len(test_df)
    })
    
    # Strategy 2: Length-based heuristic
    print("\nüìè Strategy 2: Length-based heuristic...")
    
    def get_length_based_probabilities(response_a, response_b):
        """Simple heuristic based on response length"""
        len_a = len(str(response_a))
        len_b = len(str(response_b))
        
        # Calculate length ratio
        total_len = len_a + len_b
        if total_len == 0:
            return 0.33, 0.33, 0.34
        
        prob_a = len_a / total_len * 0.8 + 0.1  # Scale to reasonable range
        prob_b = len_b / total_len * 0.8 + 0.1
        prob_tie = 1.0 - prob_a - prob_b
        
        # Ensure valid probabilities
        prob_tie = max(0.1, min(0.8, prob_tie))
        scale = 1.0 / (prob_a + prob_b + prob_tie)
        prob_a *= scale
        prob_b *= scale
        prob_tie *= scale
        
        return prob_a, prob_b, prob_tie
    
    # Apply to test data
    length_predictions = test_df.apply(
        lambda row: get_length_based_probabilities(row['response_a'], row['response_b']),
        axis=1
    )
    
    submission_length = pd.DataFrame({
        'id': test_df['id'],
        'winner_model_a': [p[0] for p in length_predictions],
        'winner_model_b': [p[1] for p in length_predictions],
        'winner_tie': [p[2] for p in length_predictions]
    })
    
    # ================================================
    # CHOOSE FINAL SUBMISSION
    # ================================================
    print("\n" + "="*50)
    print("FINAL SUBMISSION CREATION")
    print("="*50)
    
    # Let's use length-based (more interesting)
    final_submission = submission_length.copy()
    
    print("\nüìã Final submission preview:")
    print(final_submission.head())
    
    # Verify probabilities sum to 1
    final_submission['sum_check'] = (
        final_submission['winner_model_a'] + 
        final_submission['winner_model_b'] + 
        final_submission['winner_tie']
    )
    
    print(f"\n‚úÖ Submission verification:")
    print(f"   Shape: {final_submission.shape}")
    print(f"   All probabilities sum to 1? {np.allclose(final_submission['sum_check'], 1.0, rtol=1e-10)}")
    print(f"   Min sum: {final_submission['sum_check'].min():.10f}")
    print(f"   Max sum: {final_submission['sum_check'].max():.10f}")
    
    # Show distribution of predictions
    print(f"\nüìä Prediction summary:")
    print(f"   Model A win probability: mean={final_submission['winner_model_a'].mean():.3f}, std={final_submission['winner_model_a'].std():.3f}")
    print(f"   Model B win probability: mean={final_submission['winner_model_b'].mean():.3f}, std={final_submission['winner_model_b'].std():.3f}")
    print(f"   Tie probability: mean={final_submission['winner_tie'].mean():.3f}, std={final_submission['winner_tie'].std():.3f}")
    
    # ================================================
    # SAVE SUBMISSION FILE
    # ================================================
    # Remove the check column
    final_submission = final_submission.drop('sum_check', axis=1)
    
    # Save to CSV
    output_path = '/kaggle/working/submission.csv'
    final_submission.to_csv(output_path, index=False)
    
    print(f"\nüíæ Submission saved to: {output_path}")
    print(f"   File size: {os.path.getsize(output_path):,} bytes")
    
    # Show first few lines of file
    print("\nüìÑ First 3 lines of submission.csv:")
    with open(output_path, 'r') as f:
        for i, line in enumerate(f):
            if i < 3:
                print(f"   {line.strip()}")
            else:
                break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


LLM CLASSIFICATION COMPETITION - FIRST SUBMISSION

üìÇ Loading data...
‚úÖ Data loaded successfully!
   Training samples: 57,477
   Test samples: 3

üìä Training data columns:
   ['id', 'model_a', 'model_b', 'prompt', 'response_a', 'response_b', 'winner_model_a', 'winner_model_b', 'winner_tie']

üìä Test data columns:
   ['id', 'prompt', 'response_a', 'response_b']

üìã Sample of training data:
       id             model_a              model_b  \
0   30192  gpt-4-1106-preview           gpt-4-0613   
1   53567           koala-13b           gpt-4-0613   
2   65089  gpt-3.5-turbo-0613       mistral-medium   
3   96401    llama-2-13b-chat  mistral-7b-instruct   
4  198779           koala-13b   gpt-3.5-turbo-0314   

                                              prompt  \
0  ["Is it morally right to try to have a certain...   
1  ["What is the difference between marriage lice...   
2  ["explain function calling. how would you call...   
3  ["How can I create a test set for a very rare