# Team7 Assiginment2

## Step1. Setting and Dataset Load

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [3]:
import joblib, os, numpy as np, pandas as pd
import torch

LOCAL, KAGGLE = "..", "/kaggle/input/llm-classification-finetuning"
DATA = LOCAL if os.path.exists("../datasets/train.csv") else KAGGLE
train = pd.read_csv(f"{DATA}/datasets/train.csv")
test  = pd.read_csv(f"{DATA}/datasets/test.csv")
sample = pd.read_csv(f"{DATA}/datasets/sample_submission.csv")

need = {"prompt","response_a","response_b","winner_model_a","winner_model_b","winner_tie"}
assert need.issubset(set(train.columns)), f"column: {need - set(train.columns)} is missing in train.csv"
print("DATA:", DATA, train.shape, test.shape)

# target (y)
# 0: model_a win, 1: model_b win, 2: tie
y = train[["winner_model_a", "winner_model_b", "winner_tie"]].values.argmax(1)

device = "cuda" if torch.cuda.is_available() else "cpu"
random_state = 20010815
val_size = 0.2

DATA: .. (57477, 9) (3, 4)


In [4]:

### Global Functions ###
import time
import random

# Set random seeds for reproducibility
np.random.seed(random_state)
random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(random_state)

from sentence_transformers import SentenceTransformer
def load_model(candidates, idx=0, device="cpu"):
    # Load
    last_err = None
    path = candidates[idx]
    try:
        print("try:", path)
        model = SentenceTransformer(path, device=device)
        print("loaded model from:", path)
        return model, path
    except Exception as e:
        last_err = e
    raise RuntimeError("Failed to load model. In Kaggle, you need to upload the model folder to Datasets and then link it via 'Add data'. Last error: " + str(last_err))


def build_feat(P, A, B):
    AB_diff = A - B
    AB_adiff = np.abs(AB_diff)
    AB_mul = A * B
    PA_mul = P * A
    PB_mul = P * B
    return np.hstack([P, A, B, AB_diff, AB_adiff, AB_mul, PA_mul, PB_mul])


def l2norm(a, eps=1e-12):
    n = np.linalg.norm(a, axis=1, keepdims=True)
    n = np.clip(n, eps, None)
    return a / n

def encode_texts(model, texts, batch_size=256):
    vecs = []
    total_texts = len(texts)
    total_batches = (total_texts + batch_size - 1) // batch_size

    for i in range(0, len(texts), batch_size):
        start_time = time.time()
        batch = texts[i:i+batch_size].tolist() if isinstance(texts, pd.Series) else texts[i:i+batch_size]
        v = model.encode(batch, batch_size=len(batch), convert_to_numpy=True, normalize_embeddings=False, show_progress_bar=False)
        vecs.append(v)

        batch_num = (i // batch_size) + 1
        print(f"{batch_num}/{total_batches} | time: {time.time() - start_time:.2f}s", end='\r', flush=True)
    V = np.vstack(vecs)
    return l2norm(V)


def create_and_save_submission(predictions, filename, test_df, sample_df):
    """
    Creates a Kaggle submission file from model predictions.
    Then, it normalizes the probabilities, performs validation checks, and saves the file.
    Args:
        predictions (np.array): return value of predict_proba() (N, 3)
        filename (str): csv filename to save the submission.
        test_df (pd.DataFrame): dataframe containing 'id' column.
        sample_df (pd.DataFrame): dataframe to align columns with sample submission.
    """
    print(f"Creating submission file: {filename}...")
    
    # 1. Save Submission File
    sub_df = pd.DataFrame({
        "id": test_df["id"],
        "winner_model_a": predictions[:, 0],
        "winner_model_b": predictions[:, 1],
        "winner_tie":     predictions[:, 2],
    })

    # 2. Normalization check (safety)
    probs = sub_df[["winner_model_a", "winner_model_b", "winner_tie"]].values
    row_sums = probs.sum(axis=1, keepdims=True)
    probs = probs / np.clip(row_sums, 1e-15, None)
    sub_df[["winner_model_a", "winner_model_b", "winner_tie"]] = probs

    # 3. Align columns with sample submission
    try:
        sub_df = sub_df[sample_df.columns]
    except KeyError as e:
        print(f"Warning: Columns in sample_df not found. Saving with default columns. Error: {e}")

    # 4. Save
    sub_df.to_csv(filename, index=False)

    # 5. Assertions to check file integrity
    try:
        chk = pd.read_csv(filename)
        
        assert list(chk.columns) == list(sample_df.columns), \
            f"Column mismatch. Expected: {list(sample_df.columns)}, Got: {list(chk.columns)}"
        
        assert not chk.isna().any().any(), "NaN values found in submission file."
        
        prob_cols = ["winner_model_a", "winner_model_b", "winner_tie"]
        assert np.allclose(chk[prob_cols].sum(1).values, 1.0), \
            "Probabilities do not sum to 1.0 for all rows."
            
        print(f"Successfully saved and verified: {filename} (Shape: {sub_df.shape})")
        
    except FileNotFoundError:
        print(f"Error: File not found after saving: {filename}")
    except AssertionError as e:
        print(f"Error: Submission file verification failed! {e}")
    
    return sub_df

def build_strong_lexical_features(df):
    """Builds the full set of lexical and bias features."""
    rows = []
    cols = ["prompt", "response_a", "response_b"]
    
    for p, a, b in zip(df[cols[0]], df[cols[1]], df[cols[2]]):
        ps, as_, bs = stats_strong(p), stats_strong(a), stats_strong(b)
        rows.append({
            "p_len_char": ps["len_char"], "p_len_tok": ps["len_tok"], "p_num_sent": ps["num_sent"],
            "a_len_char": as_["len_char"], "a_len_tok": as_["len_tok"], "a_num_sent": as_["num_sent"],
            "a_num_code": as_["num_code"], "a_num_list": as_["num_list"], "a_num_upper": as_["num_upper"],
            "a_num_punct": as_["num_punct"], "a_avg_tok_len": as_["avg_tok_len"],
            "b_len_char": bs["len_char"], "b_len_tok": bs["len_tok"], "b_num_sent": bs["num_sent"],
            "b_num_code": bs["num_code"], "b_num_list": bs["num_list"], "b_num_upper": bs["num_upper"],
            "b_num_punct": bs["num_punct"], "b_avg_tok_len": bs["avg_tok_len"],
            # A-B Differences
            "d_len_char": as_["len_char"] - bs["len_char"],
            "d_len_tok": as_["len_tok"] - bs["len_tok"],
            "d_num_sent": as_["num_sent"] - bs["num_sent"],
            "d_num_code": as_["num_code"] - bs["num_code"],
            "d_num_list": as_["num_list"] - bs["num_list"],
            "d_num_upper": as_["num_upper"] - bs["num_upper"],
            "d_num_punct": as_["num_punct"] - bs["num_punct"],
            "d_avg_tok_len": as_["avg_tok_len"] - bs["avg_tok_len"],
            # Ratios
            "r_len_char": (as_["len_char"] + 1) / (bs["len_char"] + 1),
            "r_len_tok": (as_["len_tok"] + 1) / (bs["len_tok"] + 1),
            "r_num_sent": (as_["num_sent"] + 1) / (bs["num_sent"] + 1),
        })
    return pd.DataFrame(rows)

# Define the "Strong" lexical feature builder
def stats_strong(s):
    """Calculates a comprehensive set of lexical statistics."""
    if not isinstance(s, str): s = ""
    toks = s.split()
    return {
        "len_char": len(s),
        "len_tok": len(toks),
        "num_sent": sum(s.count(x) for x in [".", "!", "?"]),
        "num_code": s.count("`"),
        "num_list": s.count("- ") + s.count("* "),
        "num_upper": sum(ch.isupper() for ch in s),
        "num_punct": sum(ch in ",;:()" for ch in s),
        "avg_tok_len": (sum(len(t) for t in toks) / len(toks)) if toks else 0.0,
    }

def preprocess_function(examples):
    # This formats the input as: [CLS] prompt [SEP] A: response_a [SEP] B: response_b [SEP]
    # This is a robust way to present the three pieces of text to the model
    
    # Combine response_a and response_b into a single string
    response_pair = [f"A: {a} {tokenizer.sep_token} B: {b}" for a, b in zip(examples['response_a'], examples['response_b'])]
    
    # Tokenize, using prompt as the first sequence and the combined response as the second
    tokenized_inputs = tokenizer(
        examples['prompt'],
        response_pair, # This will be the second sequence
        max_length=max_length,
        truncation=True, # Need to consider whitch option is better
        padding=False # DataCollator will handle dynamic padding
    )
    
    # Add labels
    tokenized_inputs["labels"] = examples["labels"]
    return tokenized_inputs


print("All functions loaded.")


All functions loaded.


### Step 2. Model Download

In [4]:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

MODELS = {
    "deberta-v3-base": "microsoft/deberta-v3-base",
    "e5-base-v2":      "intfloat/e5-base-v2",
}

BASE_DIR = "../models"
os.makedirs(BASE_DIR, exist_ok=True)

for name, hub_path in MODELS.items():
    save_path = os.path.join(BASE_DIR, name)
    if os.path.exists(save_path) and os.listdir(save_path):
        print(f"[skip] {name} already exists → {save_path}")
        continue

    print(f"[download] {name} from {hub_path}")
    try:
        # 1) SentenceTransformers 
        print(" Trying SentenceTransformer...")
        st_model = SentenceTransformer(hub_path)
        st_model.save(save_path)
        print(f" -> saved (sentence-transformers) to {save_path}")
        continue
    except Exception as e1:
        # 2) Hugging Face transformers
        print(" Trying HuggingFace transformers...")
        try:
            tokenizer = AutoTokenizer.from_pretrained(hub_path)
            model = AutoModelForSequenceClassification.from_pretrained(hub_path)
            tokenizer.save_pretrained(save_path)
            model.save_pretrained(save_path)
            print(f" -> saved (transformers) to {save_path}")
        except Exception as e2:
            print(f"[fail] {name}: {e2}")

print("=== Model Download Complete (existing ones skipped) ===")

[skip] deberta-v3-base already exists → ../models/deberta-v3-base
[skip] e5-base-v2 already exists → ../models/e5-base-v2
=== Model Download Complete (existing ones skipped) ===


### Candidate 1: DeBERTa + LoRA ###

In [5]:
### Candidate 1: DeBERTa + LoRA ###
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType
)
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from scipy.special import softmax

# MODEL_NAME = f"microsoft/deberta-v3-base"
MODEL_NAME = f"deberta-v3-base"
max_length = 512 # Max length(tokens) for DeBERTa

LORA_ADAPTER_DIR = f"../models/lora_adapter_{MODEL_NAME.split('/')[-1]}"

print(f"Using model: {MODEL_NAME}")
print(f"LoRA adapter will be saved to: {LORA_ADAPTER_DIR}")

train_df_lora = train.copy()
train_df_lora['labels'] = y

train_df, val_df = train_test_split(
    train_df_lora,
    test_size=val_size,

    stratify=train_df_lora['labels']
)
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")

# Convert to Hugging Face 'Dataset' object
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# Load the tokenizer for our model
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_path = f"../models/{MODEL_NAME}"
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
# model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)

def preprocess_function(examples):
    # This formats the input as: [CLS] prompt [SEP] A: response_a [SEP] B: response_b [SEP]
    # This is a robust way to present the three pieces of text to the model
    
    # Combine response_a and response_b into a single string
    response_pair = [f"A: {a} {tokenizer.sep_token} B: {b}" for a, b in zip(examples['response_a'], examples['response_b'])]
    
    # Tokenize, using prompt as the first sequence and the combined response as the second
    tokenized_inputs = tokenizer(
        examples['prompt'],
        response_pair, # This will be the second sequence
        max_length=max_length,
        truncation=True, # Need to consider whitch option is better
        padding=False # DataCollator will handle dynamic padding
    )
    
    # Add labels
    tokenized_inputs["labels"] = examples["labels"]
    return tokenized_inputs

print("Tokenizing datasets...")
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True, remove_columns=train_df.columns.tolist())
tokenized_val_dataset = val_dataset.map(preprocess_function, batched=True, remove_columns=val_df.columns.tolist())

# Data collator will dynamically pad batches to the max length *in that batch*
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

print("Data preparation complete.")


Using model: deberta-v3-base
LoRA adapter will be saved to: ../models/lora_adapter_deberta-v3-base
Training samples: 45981, Validation samples: 11496


Tokenizing datasets...



Map:   0%|                                                                                                                                                                         | 0/45981 [00:00<?, ? examples/s]


Map:   2%|███▍                                                                                                                                                        | 1000/45981 [00:00<00:24, 1870.88 examples/s]


Map:   4%|██████▊                                                                                                                                                     | 2000/45981 [00:01<00:22, 1916.83 examples/s]


Map:   7%|██████████▏                                                                                                                                                 | 3000/45981 [00:01<00:23, 1856.18 examples/s]


Map:   9%|█████████████▌                                                                                                                                              | 4000/45981 [00:02<00:21, 1935.92 examples/s]


Map:  11%|████████████████▉                                                                                                                                           | 5000/45981 [00:02<00:19, 2084.20 examples/s]


Map:  13%|████████████████████▎                                                                                                                                       | 6000/45981 [00:03<00:23, 1732.82 examples/s]


Map:  15%|███████████████████████▋                                                                                                                                    | 7000/45981 [00:03<00:22, 1761.88 examples/s]


Map:  17%|███████████████████████████▏                                                                                                                                | 8000/45981 [00:04<00:24, 1573.91 examples/s]


Map:  20%|██████████████████████████████▌                                                                                                                             | 9000/45981 [00:05<00:21, 1734.93 examples/s]


Map:  22%|█████████████████████████████████▋                                                                                                                         | 10000/45981 [00:05<00:19, 1811.32 examples/s]


Map:  24%|█████████████████████████████████████                                                                                                                      | 11000/45981 [00:06<00:18, 1882.89 examples/s]


Map:  26%|████████████████████████████████████████▍                                                                                                                  | 12000/45981 [00:06<00:17, 1917.79 examples/s]


Map:  28%|███████████████████████████████████████████▊                                                                                                               | 13000/45981 [00:07<00:17, 1928.75 examples/s]


Map:  30%|███████████████████████████████████████████████▏                                                                                                           | 14000/45981 [00:07<00:15, 1999.56 examples/s]


Map:  33%|██████████████████████████████████████████████████▌                                                                                                        | 15000/45981 [00:07<00:14, 2142.02 examples/s]


Map:  35%|█████████████████████████████████████████████████████▉                                                                                                     | 16000/45981 [00:08<00:13, 2146.06 examples/s]


Map:  37%|█████████████████████████████████████████████████████████▎                                                                                                 | 17000/45981 [00:08<00:13, 2163.17 examples/s]


Map:  39%|████████████████████████████████████████████████████████████▋                                                                                              | 18000/45981 [00:09<00:12, 2205.78 examples/s]


Map:  41%|████████████████████████████████████████████████████████████████                                                                                           | 19000/45981 [00:09<00:11, 2306.02 examples/s]


Map:  43%|███████████████████████████████████████████████████████████████████▍                                                                                       | 20000/45981 [00:10<00:12, 2094.93 examples/s]


Map:  46%|██████████████████████████████████████████████████████████████████████▊                                                                                    | 21000/45981 [00:10<00:11, 2133.45 examples/s]


Map:  48%|██████████████████████████████████████████████████████████████████████████▏                                                                                | 22000/45981 [00:11<00:12, 1966.17 examples/s]


Map:  50%|█████████████████████████████████████████████████████████████████████████████▌                                                                             | 23000/45981 [00:11<00:11, 2066.30 examples/s]


Map:  52%|████████████████████████████████████████████████████████████████████████████████▉                                                                          | 24000/45981 [00:12<00:11, 1869.49 examples/s]


Map:  54%|████████████████████████████████████████████████████████████████████████████████████▎                                                                      | 25000/45981 [00:12<00:11, 1858.76 examples/s]


Map:  57%|███████████████████████████████████████████████████████████████████████████████████████▋                                                                   | 26000/45981 [00:13<00:12, 1661.39 examples/s]


Map:  59%|███████████████████████████████████████████████████████████████████████████████████████████                                                                | 27000/45981 [00:14<00:11, 1639.54 examples/s]


Map:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▍                                                            | 28000/45981 [00:14<00:10, 1686.18 examples/s]


Map:  63%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                                                         | 29000/45981 [00:15<00:10, 1677.27 examples/s]


Map:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                     | 30000/45981 [00:15<00:09, 1733.38 examples/s]


Map:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                  | 31000/45981 [00:16<00:08, 1740.72 examples/s]


Map:  70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                               | 32000/45981 [00:16<00:07, 1825.78 examples/s]


Map:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 33000/45981 [00:17<00:06, 1883.75 examples/s]


Map:  74%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                        | 34000/45981 [00:18<00:07, 1702.93 examples/s]


Map:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 35000/45981 [00:18<00:06, 1801.65 examples/s]


Map:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                 | 36000/45981 [00:19<00:06, 1508.98 examples/s]


Map:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                              | 37000/45981 [00:20<00:05, 1703.97 examples/s]


Map:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 38000/45981 [00:20<00:04, 1878.39 examples/s]


Map:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 39000/45981 [00:20<00:03, 1989.49 examples/s]


Map:  87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 40000/45981 [00:21<00:03, 1984.60 examples/s]


Map:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 41000/45981 [00:21<00:02, 1861.51 examples/s]


Map:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 42000/45981 [00:22<00:02, 1667.23 examples/s]


Map:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 43000/45981 [00:23<00:01, 1799.45 examples/s]


Map:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 44000/45981 [00:23<00:01, 1747.95 examples/s]


Map:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 45000/45981 [00:24<00:00, 1824.66 examples/s]


Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45981/45981 [00:24<00:00, 1817.02 examples/s]


Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45981/45981 [00:24<00:00, 1851.93 examples/s]





Map:   0%|                                                                                                                                                                         | 0/11496 [00:00<?, ? examples/s]


Map:   9%|█████████████▌                                                                                                                                              | 1000/11496 [00:00<00:04, 2429.58 examples/s]


Map:  17%|███████████████████████████▏                                                                                                                                | 2000/11496 [00:00<00:04, 1968.91 examples/s]


Map:  26%|████████████████████████████████████████▋                                                                                                                   | 3000/11496 [00:01<00:04, 2107.97 examples/s]


Map:  35%|██████████████████████████████████████████████████████▎                                                                                                     | 4000/11496 [00:01<00:03, 2254.07 examples/s]


Map:  43%|███████████████████████████████████████████████████████████████████▊                                                                                        | 5000/11496 [00:02<00:02, 2250.54 examples/s]


Map:  52%|█████████████████████████████████████████████████████████████████████████████████▍                                                                          | 6000/11496 [00:02<00:02, 2297.26 examples/s]


Map:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                             | 7000/11496 [00:03<00:02, 1868.39 examples/s]


Map:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                               | 8000/11496 [00:03<00:01, 1892.56 examples/s]


Map:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 9000/11496 [00:04<00:01, 1799.71 examples/s]


Map:  87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 10000/11496 [00:05<00:00, 1795.76 examples/s]


Map:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 11000/11496 [00:05<00:00, 1924.02 examples/s]


Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11496/11496 [00:05<00:00, 1895.08 examples/s]


Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11496/11496 [00:05<00:00, 1971.31 examples/s]

Data preparation complete.





In [6]:
# Fine-tuning with LoRA (NO Quantization)
# FINAL SOLUTION: Skip quantization entirely - it has compatibility issues with DeBERTa-v3
# Use gradient checkpointing for memory efficiency

# Check if LoRA adapter already exists
if os.path.exists(LORA_ADAPTER_DIR) and os.path.exists(os.path.join(LORA_ADAPTER_DIR, "adapter_config.json")):
    print(f"LoRA adapter already exists at: {LORA_ADAPTER_DIR}")
    print("Skipping training and loading existing adapter...")
    
    # Load base model
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=3,
        local_files_only=True
    )
    
    # Load the saved LoRA adapter
    from peft import PeftModel
    peft_model = PeftModel.from_pretrained(model, LORA_ADAPTER_DIR)
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(LORA_ADAPTER_DIR)
    
    # Create a minimal trainer for prediction only
    training_args = TrainingArguments(
        output_dir=f"../models/{MODEL_NAME.split('/')[-1]}-checkpoints",
        per_device_eval_batch_size=8,
        fp16=False,
        report_to="none",
    )
    
    trainer = Trainer(
        model=peft_model,
        args=training_args,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    
    print("Successfully loaded existing LoRA adapter.")
else:
    print("No existing LoRA adapter found. Starting training from scratch...")
    
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=3,
        local_files_only=True
    )

    # Enable gradient checkpointing to save memory
    model.gradient_checkpointing_enable()

    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "query_proj", 
            "key_proj", 
            "value_proj",
            "dense"
        ],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_CLS,
        inference_mode=False
    )

    # Apply LoRA to the model
    peft_model = get_peft_model(model, lora_config)
    peft_model.print_trainable_parameters()

    # --- Define Custom Compute Metrics ---
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probs = softmax(logits, axis=1)
        
        eps = 1e-15
        probs = np.clip(probs, eps, 1 - eps)
        
        loss = log_loss(labels, probs)
        return {"log_loss": loss}

    # --- Define Training Arguments ---
    training_args = TrainingArguments(
        output_dir=f"../models/{MODEL_NAME.split('/')[-1]}-checkpoints",
        num_train_epochs=2,
        per_device_train_batch_size=4,  # Reduced for memory
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,  # Effective batch size = 4*2 = 8
        learning_rate=2e-5,
        weight_decay=0.01,

        eval_strategy="steps",
        eval_steps=200,
        save_strategy="steps",
        save_steps=200,

        load_best_model_at_end=True,
        metric_for_best_model="log_loss",
        greater_is_better=False,

        logging_steps=100,
        fp16=False,  # Disable fp16 due to gradient checkpointing conflict
        report_to="none",
        gradient_checkpointing=True,  # Enable gradient checkpointing
    )

    # --- Initialize Trainer ---
    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_val_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    # --- Start Training ---
    print("Starting fine-tuning...")
    trainer.train()

    # --- Save the final LoRA adapter ---
    # This saves only the small, trainable adapter weights
    os.makedirs(LORA_ADAPTER_DIR, exist_ok=True)
    trainer.model.save_pretrained(LORA_ADAPTER_DIR)

    # Also save the tokenizer
    tokenizer.save_pretrained(LORA_ADAPTER_DIR)

    print(f"Training complete. LoRA adapter saved to: {LORA_ADAPTER_DIR}")

# --- Predict on Test Data and Create Kaggle Submission ---
print("\n=== Generating Kaggle Submission ===")

# Make predictions on the tokenized test dataset
def preprocess_test_function(examples):
    # This formats the input as: [CLS] prompt [SEP] A: response_a [SEP] B: response_b [SEP]
    
    # Combine response_a and response_b into a single string
    response_pair = [f"A: {a} {tokenizer.sep_token} B: {b}" for a, b in zip(examples['response_a'], examples['response_b'])]
    
    # Tokenize, using prompt as the first sequence and the combined response as the second
    tokenized_inputs = tokenizer(
        examples['prompt'],
        response_pair, # This will be the second sequence
        max_length=max_length,
        truncation=True, # Need to consider whitch option is better
        padding=False # DataCollator will handle dynamic padding
    )
    return tokenized_inputs
print("Tokenizing test dataset...")
test_dataset = Dataset.from_pandas(test)
tokenized_test_dataset = test_dataset.map(preprocess_test_function, batched=True, remove_columns=test.columns.tolist())

print("Test data tokenization complete.")


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at ../models/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


No existing LoRA adapter found. Starting training from scratch...


trainable params: 2,681,091 || all params: 187,105,542 || trainable%: 1.4329


  trainer = Trainer(


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Starting fine-tuning...


Step,Training Loss,Validation Loss,Log Loss,Runtime,Samples Per Second,Steps Per Second
200,1.1008,1.097194,1.097194,312.5493,36.781,4.598
400,1.1022,1.096977,1.096977,195.4145,58.829,7.354
600,1.0994,1.096022,1.096022,195.2757,58.871,7.359
800,1.1007,1.093219,1.093219,195.3978,58.834,7.354
1000,1.0902,1.091726,1.091726,195.198,58.894,7.362
1200,1.0898,1.08828,1.08828,195.1037,58.923,7.365
1400,1.0903,1.089821,1.089821,194.9681,58.963,7.37
1600,1.0996,1.089369,1.089369,194.8131,59.01,7.376
1800,1.089,1.086575,1.086575,194.9558,58.967,7.371
2000,1.0964,1.085338,1.085338,194.9643,58.965,7.371


Training complete. LoRA adapter saved to: ../models/lora_adapter_deberta-v3-base

=== Generating Kaggle Submission ===
Tokenizing test dataset...



Map:   0%|                                                                                                                                                  | 0/3 [00:00<?, ? examples/s]


Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 180.77 examples/s]

Test data tokenization complete.





In [7]:
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import log_loss
import numpy as np

print("\n=== Candidate 1 (DeBERTa + LoRA): Calibration ===")

# Get validation predictions from the existing trainer
val_predictions = trainer.predict(tokenized_val_dataset)
val_logits = val_predictions.predictions
val_probs_before = softmax(val_logits, axis=1)

# Get true labels from val_df
y_val_c1 = val_df['labels'].values

# Calculate log loss BEFORE calibration
logloss_before = log_loss(y_val_c1, val_probs_before)
print(f"Validation LogLoss BEFORE Calibration: {logloss_before:.6f}")

# Apply Isotonic Calibration per class
# We'll calibrate each class probability separately
print("Applying Isotonic calibration...")

calibrators = []
val_probs_calibrated = np.zeros_like(val_probs_before)

for class_idx in range(3):
    # Get probabilities for this class
    class_probs = val_probs_before[:, class_idx]
    
    # Create binary labels (1 if true class, 0 otherwise)
    y_binary = (y_val_c1 == class_idx).astype(int)
    
    # Fit isotonic regression
    iso = IsotonicRegression(out_of_bounds='clip')
    iso.fit(class_probs, y_binary)
    
    # Calibrate
    val_probs_calibrated[:, class_idx] = iso.predict(class_probs)
    
    calibrators.append(iso)

# Normalize probabilities to sum to 1
row_sums = val_probs_calibrated.sum(axis=1, keepdims=True)
val_probs_calibrated = val_probs_calibrated / np.clip(row_sums, 1e-15, None)

# Calculate log loss AFTER calibration
logloss_after = log_loss(y_val_c1, val_probs_calibrated)
print(f"Validation LogLoss AFTER Calibration: {logloss_after:.6f}")
print(f"Improvement: {logloss_before - logloss_after:.6f}")

# Now predict on test set with calibration
print("\nGenerating calibrated predictions for test set...")

# Get test predictions
test_predictions = trainer.predict(tokenized_test_dataset)
test_logits = test_predictions.predictions
test_probs_uncalibrated = softmax(test_logits, axis=1)

# Apply calibration
test_probs_calibrated = np.zeros_like(test_probs_uncalibrated)
for class_idx in range(3):
    class_probs = test_probs_uncalibrated[:, class_idx]
    test_probs_calibrated[:, class_idx] = calibrators[class_idx].predict(class_probs)

joblib.dump(calibrators, '../models/candidate_1_calibrators.pkl')
print("Candidate 1 Calibrators SAVED to ../models/candidate_1_calibrators.pkl")


=== Candidate 1 (DeBERTa + LoRA): Calibration ===


Validation LogLoss BEFORE Calibration: 1.081630
Applying Isotonic calibration...
Validation LogLoss AFTER Calibration: 1.077994
Improvement: 0.003636

Generating calibrated predictions for test set...


Candidate 1 Calibrators SAVED to ../models/candidate_1_calibrators.pkl


### Candidate 2: PLM + LightGBM(XGBoost)

In [8]:
### Candidate 2: PLM + LightGBM(XGBoost) ###
import lightgbm as lgb
import xgboost as xgb
import joblib # For saving models
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import time
import os
import torch

# Choose the model to run: "LGBM" or "XGBOOST"

MODEL_NAME = "e5-base-v2" 
BEST_EMBEDDING_MODEL_PATH = f"../models/{MODEL_NAME}"
print(f"Using embedding model: {BEST_EMBEDDING_MODEL_PATH}")

# Load the chosen embedding model
try:
    # We pass a list containing only our chosen model path
    sbert_model, model_src = load_model([BEST_EMBEDDING_MODEL_PATH], idx=0, device=device)
    # print(f"Successfully loaded model from: {model_src}")
except Exception as e:
    print(f"Failed to load model from {BEST_EMBEDDING_MODEL_PATH}. Error: {e}")

print("Encoding texts")
start_time = time.time()

# Encode training data
prompt_emb = encode_texts(sbert_model, train["prompt"])
print("Prompt encoding complete.")
a_emb = encode_texts(sbert_model, train["response_a"])
print("Response A encoding complete.")
b_emb = encode_texts(sbert_model, train["response_b"])
print("Response B encoding complete.")

# Build training features
X_c2 = build_feat(prompt_emb, a_emb, b_emb) # X for candidate 2

# Encode test data
print("Encoding test data...")
prompt_emb_te = encode_texts(sbert_model, test["prompt"])
a_emb_te = encode_texts(sbert_model, test["response_a"])
b_emb_te = encode_texts(sbert_model, test["response_b"])
print("Test encoding complete.")

# Build test features
X_test_c2 = build_feat(prompt_emb_te, a_emb_te, b_emb_te)

# Clean up model from memory
del sbert_model, prompt_emb, a_emb, b_emb, prompt_emb_te, a_emb_te, b_emb_te

print(f"Feature extraction complete. Time taken: {time.time() - start_time:.2f}s")
print(f"Train features shape (X_c2): {X_c2.shape}")
print(f"Test features shape (X_test_c2): {X_test_c2.shape}")

# Create Train/Validation Split
X_tr, X_va, y_tr, y_va = train_test_split(X_c2, y, test_size=val_size, stratify=y, random_state=random_state)
print(f"Data split into: Train {X_tr.shape}, Validation {X_va.shape}")


Using embedding model: ../models/e5-base-v2
try: ../models/e5-base-v2


loaded model from: ../models/e5-base-v2
Encoding texts


1/225 | time: 2.13s

2/225 | time: 2.09s

3/225 | time: 2.10s

4/225 | time: 2.09s

5/225 | time: 2.09s

6/225 | time: 2.09s

7/225 | time: 2.11s

8/225 | time: 2.11s

9/225 | time: 2.12s

10/225 | time: 2.09s

11/225 | time: 2.12s

12/225 | time: 2.11s

13/225 | time: 2.13s

14/225 | time: 2.12s

15/225 | time: 2.12s

16/225 | time: 2.10s

17/225 | time: 2.09s

18/225 | time: 2.10s

19/225 | time: 2.12s

20/225 | time: 2.09s

21/225 | time: 2.12s

22/225 | time: 2.09s

23/225 | time: 2.08s

24/225 | time: 2.12s

25/225 | time: 2.12s

26/225 | time: 2.08s

27/225 | time: 2.09s

28/225 | time: 2.12s

29/225 | time: 2.08s

30/225 | time: 2.09s

31/225 | time: 2.09s

32/225 | time: 2.12s

33/225 | time: 2.12s

34/225 | time: 2.09s

35/225 | time: 2.10s

36/225 | time: 2.09s

37/225 | time: 2.09s

38/225 | time: 2.09s

39/225 | time: 2.12s

40/225 | time: 2.09s

41/225 | time: 2.12s

42/225 | time: 2.12s

43/225 | time: 2.09s

44/225 | time: 2.11s

45/225 | time: 2.11s

46/225 | time: 2.14s

47/225 | time: 2.12s

48/225 | time: 2.08s

49/225 | time: 2.09s

50/225 | time: 2.09s

51/225 | time: 2.08s

52/225 | time: 2.08s

53/225 | time: 2.09s

54/225 | time: 2.09s

55/225 | time: 2.11s

56/225 | time: 2.12s

57/225 | time: 2.12s

58/225 | time: 2.08s

59/225 | time: 2.11s

60/225 | time: 2.13s

61/225 | time: 2.11s

62/225 | time: 2.09s

63/225 | time: 2.12s

64/225 | time: 2.09s

65/225 | time: 2.09s

66/225 | time: 2.09s

67/225 | time: 2.11s

68/225 | time: 2.10s

69/225 | time: 2.08s

70/225 | time: 2.11s

71/225 | time: 2.10s

72/225 | time: 2.09s

73/225 | time: 2.11s

74/225 | time: 2.12s

75/225 | time: 2.12s

76/225 | time: 2.09s

77/225 | time: 2.09s

78/225 | time: 2.09s

79/225 | time: 2.08s

80/225 | time: 2.09s

81/225 | time: 2.12s

82/225 | time: 2.08s

83/225 | time: 2.09s

84/225 | time: 2.11s

85/225 | time: 2.10s

86/225 | time: 2.09s

87/225 | time: 2.08s

88/225 | time: 2.09s

89/225 | time: 2.09s

90/225 | time: 2.09s

91/225 | time: 2.09s

92/225 | time: 2.10s

93/225 | time: 2.09s

94/225 | time: 2.09s

95/225 | time: 2.09s

96/225 | time: 2.09s

97/225 | time: 2.11s

98/225 | time: 2.09s

99/225 | time: 2.11s

100/225 | time: 2.12s

101/225 | time: 2.09s

102/225 | time: 2.11s

103/225 | time: 2.09s

104/225 | time: 2.11s

105/225 | time: 2.09s

106/225 | time: 2.09s

107/225 | time: 2.11s

108/225 | time: 2.10s

109/225 | time: 2.12s

110/225 | time: 2.09s

111/225 | time: 2.09s

112/225 | time: 2.09s

113/225 | time: 2.11s

114/225 | time: 2.08s

115/225 | time: 2.11s

116/225 | time: 2.11s

117/225 | time: 2.08s

118/225 | time: 2.08s

119/225 | time: 2.09s

120/225 | time: 2.08s

121/225 | time: 2.09s

122/225 | time: 2.12s

123/225 | time: 2.09s

124/225 | time: 2.11s

125/225 | time: 2.12s

126/225 | time: 2.13s

127/225 | time: 2.09s

128/225 | time: 2.08s

129/225 | time: 2.08s

130/225 | time: 2.09s

131/225 | time: 2.10s

132/225 | time: 2.08s

133/225 | time: 2.10s

134/225 | time: 2.09s

135/225 | time: 2.11s

136/225 | time: 2.10s

137/225 | time: 2.12s

138/225 | time: 2.09s

139/225 | time: 2.09s

140/225 | time: 2.09s

141/225 | time: 2.09s

142/225 | time: 2.09s

143/225 | time: 2.09s

144/225 | time: 2.09s

145/225 | time: 2.09s

146/225 | time: 2.09s

147/225 | time: 2.12s

148/225 | time: 2.12s

149/225 | time: 2.12s

150/225 | time: 2.09s

151/225 | time: 2.11s

152/225 | time: 2.10s

153/225 | time: 2.12s

154/225 | time: 2.13s

155/225 | time: 2.12s

156/225 | time: 2.08s

157/225 | time: 2.09s

158/225 | time: 2.09s

159/225 | time: 2.09s

160/225 | time: 2.12s

161/225 | time: 2.09s

162/225 | time: 2.09s

163/225 | time: 2.09s

164/225 | time: 2.10s

165/225 | time: 2.09s

166/225 | time: 2.12s

167/225 | time: 2.10s

168/225 | time: 2.09s

169/225 | time: 2.09s

170/225 | time: 2.12s

171/225 | time: 2.09s

172/225 | time: 2.09s

173/225 | time: 2.09s

174/225 | time: 2.09s

175/225 | time: 2.11s

176/225 | time: 2.11s

177/225 | time: 2.12s

178/225 | time: 2.09s

179/225 | time: 2.11s

180/225 | time: 2.09s

181/225 | time: 2.09s

182/225 | time: 2.12s

183/225 | time: 2.09s

184/225 | time: 2.09s

185/225 | time: 2.09s

186/225 | time: 2.11s

187/225 | time: 2.09s

188/225 | time: 2.09s

189/225 | time: 2.12s

190/225 | time: 2.11s

191/225 | time: 2.11s

192/225 | time: 2.08s

193/225 | time: 2.09s

194/225 | time: 2.11s

195/225 | time: 2.08s

196/225 | time: 2.10s

197/225 | time: 2.09s

198/225 | time: 2.11s

199/225 | time: 2.09s

200/225 | time: 2.09s

201/225 | time: 2.10s

202/225 | time: 2.08s

203/225 | time: 2.09s

204/225 | time: 2.11s

205/225 | time: 2.12s

206/225 | time: 2.09s

207/225 | time: 2.11s

208/225 | time: 2.12s

209/225 | time: 2.11s

210/225 | time: 2.09s

211/225 | time: 2.09s

212/225 | time: 2.09s

213/225 | time: 2.12s

214/225 | time: 2.10s

215/225 | time: 2.09s

216/225 | time: 2.12s

217/225 | time: 2.11s

218/225 | time: 2.10s

219/225 | time: 2.08s

220/225 | time: 2.12s

221/225 | time: 2.08s

222/225 | time: 2.12s

223/225 | time: 2.12s

224/225 | time: 2.08s

225/225 | time: 1.12s

Prompt encoding complete.


1/225 | time: 2.13s

2/225 | time: 2.16s

3/225 | time: 2.12s

4/225 | time: 2.15s

5/225 | time: 2.15s

6/225 | time: 2.15s

7/225 | time: 2.12s

8/225 | time: 2.12s

9/225 | time: 2.14s

10/225 | time: 2.15s

11/225 | time: 2.15s

12/225 | time: 2.12s

13/225 | time: 2.12s

14/225 | time: 2.12s

15/225 | time: 2.15s

16/225 | time: 2.12s

17/225 | time: 2.12s

18/225 | time: 2.13s

19/225 | time: 2.15s

20/225 | time: 2.12s

21/225 | time: 2.12s

22/225 | time: 2.12s

23/225 | time: 2.12s

24/225 | time: 2.12s

25/225 | time: 2.15s

26/225 | time: 2.14s

27/225 | time: 2.13s

28/225 | time: 2.12s

29/225 | time: 2.14s

30/225 | time: 2.15s

31/225 | time: 2.14s

32/225 | time: 2.16s

33/225 | time: 2.12s

34/225 | time: 2.12s

35/225 | time: 2.12s

36/225 | time: 2.12s

37/225 | time: 2.12s

38/225 | time: 2.12s

39/225 | time: 2.15s

40/225 | time: 2.12s

41/225 | time: 2.15s

42/225 | time: 2.15s

43/225 | time: 2.15s

44/225 | time: 2.15s

45/225 | time: 2.15s

46/225 | time: 2.12s

47/225 | time: 2.12s

48/225 | time: 2.12s

49/225 | time: 2.12s

50/225 | time: 2.11s

51/225 | time: 2.12s

52/225 | time: 2.15s

53/225 | time: 2.13s

54/225 | time: 2.14s

55/225 | time: 2.12s

56/225 | time: 2.12s

57/225 | time: 2.13s

58/225 | time: 2.12s

59/225 | time: 2.15s

60/225 | time: 2.12s

61/225 | time: 2.12s

62/225 | time: 2.12s

63/225 | time: 2.12s

64/225 | time: 2.15s

65/225 | time: 2.12s

66/225 | time: 2.12s

67/225 | time: 2.15s

68/225 | time: 2.13s

69/225 | time: 2.14s

70/225 | time: 2.12s

71/225 | time: 2.15s

72/225 | time: 2.12s

73/225 | time: 2.15s

74/225 | time: 2.15s

75/225 | time: 2.12s

76/225 | time: 2.14s

77/225 | time: 2.15s

78/225 | time: 2.13s

79/225 | time: 2.12s

80/225 | time: 2.12s

81/225 | time: 2.12s

82/225 | time: 2.12s

83/225 | time: 2.13s

84/225 | time: 2.15s

85/225 | time: 2.15s

86/225 | time: 2.12s

87/225 | time: 2.15s

88/225 | time: 2.15s

89/225 | time: 2.13s

90/225 | time: 2.12s

91/225 | time: 2.13s

92/225 | time: 2.13s

93/225 | time: 2.15s

94/225 | time: 2.12s

95/225 | time: 2.15s

96/225 | time: 2.15s

97/225 | time: 2.12s

98/225 | time: 2.12s

99/225 | time: 2.15s

100/225 | time: 2.12s

101/225 | time: 2.12s

102/225 | time: 2.15s

103/225 | time: 2.12s

104/225 | time: 2.15s

105/225 | time: 2.12s

106/225 | time: 2.12s

107/225 | time: 2.15s

108/225 | time: 2.12s

109/225 | time: 2.12s

110/225 | time: 2.12s

111/225 | time: 2.15s

112/225 | time: 2.15s

113/225 | time: 2.13s

114/225 | time: 2.15s

115/225 | time: 2.11s

116/225 | time: 2.15s

117/225 | time: 2.15s

118/225 | time: 2.14s

119/225 | time: 2.13s

120/225 | time: 2.12s

121/225 | time: 2.12s

122/225 | time: 2.12s

123/225 | time: 2.12s

124/225 | time: 2.16s

125/225 | time: 2.12s

126/225 | time: 2.13s

127/225 | time: 2.12s

128/225 | time: 2.12s

129/225 | time: 2.12s

130/225 | time: 2.12s

131/225 | time: 2.15s

132/225 | time: 2.12s

133/225 | time: 2.12s

134/225 | time: 2.12s

135/225 | time: 2.15s

136/225 | time: 2.12s

137/225 | time: 2.12s

138/225 | time: 2.12s

139/225 | time: 2.15s

140/225 | time: 2.13s

141/225 | time: 2.15s

142/225 | time: 2.16s

143/225 | time: 2.15s

144/225 | time: 2.12s

145/225 | time: 2.12s

146/225 | time: 2.15s

147/225 | time: 2.15s

148/225 | time: 2.11s

149/225 | time: 2.14s

150/225 | time: 2.13s

151/225 | time: 2.13s

152/225 | time: 2.16s

153/225 | time: 2.14s

154/225 | time: 2.16s

155/225 | time: 2.13s

156/225 | time: 2.14s

157/225 | time: 2.12s

158/225 | time: 2.13s

159/225 | time: 2.13s

160/225 | time: 2.12s

161/225 | time: 2.15s

162/225 | time: 2.13s

163/225 | time: 2.12s

164/225 | time: 2.12s

165/225 | time: 2.11s

166/225 | time: 2.12s

167/225 | time: 2.12s

168/225 | time: 2.12s

169/225 | time: 2.12s

170/225 | time: 2.15s

171/225 | time: 2.13s

172/225 | time: 2.13s

173/225 | time: 2.12s

174/225 | time: 2.12s

175/225 | time: 2.12s

176/225 | time: 2.12s

177/225 | time: 2.15s

178/225 | time: 2.14s

179/225 | time: 2.16s

180/225 | time: 2.13s

181/225 | time: 2.12s

182/225 | time: 2.13s

183/225 | time: 2.15s

184/225 | time: 2.14s

185/225 | time: 2.12s

186/225 | time: 2.12s

187/225 | time: 2.12s

188/225 | time: 2.13s

189/225 | time: 2.13s

190/225 | time: 2.12s

191/225 | time: 2.12s

192/225 | time: 2.12s

193/225 | time: 2.15s

194/225 | time: 2.13s

195/225 | time: 2.12s

196/225 | time: 2.12s

197/225 | time: 2.12s

198/225 | time: 2.12s

199/225 | time: 2.15s

200/225 | time: 2.13s

201/225 | time: 2.15s

202/225 | time: 2.15s

203/225 | time: 2.12s

204/225 | time: 2.46s

205/225 | time: 2.13s

206/225 | time: 2.13s

207/225 | time: 2.15s

208/225 | time: 2.13s

209/225 | time: 2.12s

210/225 | time: 2.13s

211/225 | time: 2.12s

212/225 | time: 2.12s

213/225 | time: 2.12s

214/225 | time: 2.12s

215/225 | time: 2.12s

216/225 | time: 2.12s

217/225 | time: 2.15s

218/225 | time: 2.12s

219/225 | time: 2.12s

220/225 | time: 2.12s

221/225 | time: 2.15s

222/225 | time: 2.12s

223/225 | time: 2.15s

224/225 | time: 2.13s

225/225 | time: 1.13s

Response A encoding complete.


1/225 | time: 2.12s

2/225 | time: 2.16s

3/225 | time: 2.12s

4/225 | time: 2.15s

5/225 | time: 2.15s

6/225 | time: 2.12s

7/225 | time: 2.12s

8/225 | time: 2.12s

9/225 | time: 2.12s

10/225 | time: 2.15s

11/225 | time: 2.15s

12/225 | time: 2.15s

13/225 | time: 2.12s

14/225 | time: 2.12s

15/225 | time: 2.13s

16/225 | time: 2.12s

17/225 | time: 2.15s

18/225 | time: 2.13s

19/225 | time: 2.11s

20/225 | time: 2.13s

21/225 | time: 2.12s

22/225 | time: 2.12s

23/225 | time: 2.14s

24/225 | time: 2.12s

25/225 | time: 2.12s

26/225 | time: 2.12s

27/225 | time: 2.15s

28/225 | time: 2.15s

29/225 | time: 2.16s

30/225 | time: 2.12s

31/225 | time: 2.12s

32/225 | time: 2.12s

33/225 | time: 2.12s

34/225 | time: 2.12s

35/225 | time: 2.12s

36/225 | time: 2.14s

37/225 | time: 2.15s

38/225 | time: 2.12s

39/225 | time: 2.12s

40/225 | time: 2.12s

41/225 | time: 2.11s

42/225 | time: 2.12s

43/225 | time: 2.15s

44/225 | time: 2.12s

45/225 | time: 2.12s

46/225 | time: 2.14s

47/225 | time: 2.12s

48/225 | time: 2.12s

49/225 | time: 2.15s

50/225 | time: 2.12s

51/225 | time: 2.15s

52/225 | time: 2.12s

53/225 | time: 2.13s

54/225 | time: 2.12s

55/225 | time: 2.12s

56/225 | time: 2.12s

57/225 | time: 2.14s

58/225 | time: 2.13s

59/225 | time: 2.12s

60/225 | time: 2.13s

61/225 | time: 2.12s

62/225 | time: 2.15s

63/225 | time: 2.16s

64/225 | time: 2.15s

65/225 | time: 2.12s

66/225 | time: 2.13s

67/225 | time: 2.13s

68/225 | time: 2.14s

69/225 | time: 2.12s

70/225 | time: 2.15s

71/225 | time: 2.16s

72/225 | time: 2.15s

73/225 | time: 2.12s

74/225 | time: 2.15s

75/225 | time: 2.15s

76/225 | time: 2.13s

77/225 | time: 2.13s

78/225 | time: 2.15s

79/225 | time: 2.14s

80/225 | time: 2.13s

81/225 | time: 2.12s

82/225 | time: 2.12s

83/225 | time: 2.13s

84/225 | time: 2.13s

85/225 | time: 2.16s

86/225 | time: 2.12s

87/225 | time: 2.12s

88/225 | time: 2.16s

89/225 | time: 2.12s

90/225 | time: 2.15s

91/225 | time: 2.12s

92/225 | time: 2.12s

93/225 | time: 2.15s

94/225 | time: 2.12s

95/225 | time: 2.13s

96/225 | time: 2.12s

97/225 | time: 2.12s

98/225 | time: 2.12s

99/225 | time: 2.13s

100/225 | time: 2.12s

101/225 | time: 2.15s

102/225 | time: 2.14s

103/225 | time: 2.12s

104/225 | time: 2.12s

105/225 | time: 2.12s

106/225 | time: 2.15s

107/225 | time: 2.13s

108/225 | time: 2.15s

109/225 | time: 2.12s

110/225 | time: 2.14s

111/225 | time: 2.14s

112/225 | time: 2.12s

113/225 | time: 2.12s

114/225 | time: 2.12s

115/225 | time: 2.12s

116/225 | time: 2.12s

117/225 | time: 2.15s

118/225 | time: 2.12s

119/225 | time: 2.13s

120/225 | time: 2.12s

121/225 | time: 2.13s

122/225 | time: 2.11s

123/225 | time: 2.16s

124/225 | time: 2.16s

125/225 | time: 2.12s

126/225 | time: 2.13s

127/225 | time: 2.15s

128/225 | time: 2.15s

129/225 | time: 2.15s

130/225 | time: 2.13s

131/225 | time: 2.13s

132/225 | time: 2.13s

133/225 | time: 2.12s

134/225 | time: 2.13s

135/225 | time: 2.12s

136/225 | time: 2.12s

137/225 | time: 2.17s

138/225 | time: 2.13s

139/225 | time: 2.13s

140/225 | time: 2.13s

141/225 | time: 2.12s

142/225 | time: 2.15s

143/225 | time: 2.12s

144/225 | time: 2.13s

145/225 | time: 2.12s

146/225 | time: 2.12s

147/225 | time: 2.13s

148/225 | time: 2.15s

149/225 | time: 2.12s

150/225 | time: 2.16s

151/225 | time: 2.15s

152/225 | time: 2.15s

153/225 | time: 2.12s

154/225 | time: 2.12s

155/225 | time: 2.15s

156/225 | time: 2.16s

157/225 | time: 2.12s

158/225 | time: 2.15s

159/225 | time: 2.14s

160/225 | time: 2.12s

161/225 | time: 2.12s

162/225 | time: 2.13s

163/225 | time: 2.15s

164/225 | time: 2.12s

165/225 | time: 2.12s

166/225 | time: 2.14s

167/225 | time: 2.15s

168/225 | time: 2.13s

169/225 | time: 2.12s

170/225 | time: 2.12s

171/225 | time: 2.13s

172/225 | time: 2.12s

173/225 | time: 2.12s

174/225 | time: 2.16s

175/225 | time: 2.15s

176/225 | time: 2.16s

177/225 | time: 2.15s

178/225 | time: 2.15s

179/225 | time: 2.12s

180/225 | time: 2.13s

181/225 | time: 2.14s

182/225 | time: 2.13s

183/225 | time: 2.13s

184/225 | time: 2.13s

185/225 | time: 2.12s

186/225 | time: 2.13s

187/225 | time: 2.12s

188/225 | time: 2.15s

189/225 | time: 2.14s

190/225 | time: 2.12s

191/225 | time: 2.15s

192/225 | time: 2.15s

193/225 | time: 2.12s

194/225 | time: 2.16s

195/225 | time: 2.12s

196/225 | time: 2.12s

197/225 | time: 2.13s

198/225 | time: 2.11s

199/225 | time: 2.14s

200/225 | time: 2.12s

201/225 | time: 2.12s

202/225 | time: 2.12s

203/225 | time: 2.18s

204/225 | time: 2.13s

205/225 | time: 2.12s

206/225 | time: 2.12s

207/225 | time: 2.14s

208/225 | time: 2.15s

209/225 | time: 2.12s

210/225 | time: 2.13s

211/225 | time: 2.15s

212/225 | time: 2.12s

213/225 | time: 2.12s

214/225 | time: 2.12s

215/225 | time: 2.13s

216/225 | time: 2.15s

217/225 | time: 2.15s

218/225 | time: 2.16s

219/225 | time: 2.12s

220/225 | time: 2.12s

221/225 | time: 2.13s

222/225 | time: 2.13s

223/225 | time: 2.12s

224/225 | time: 2.13s

225/225 | time: 1.13s

Response B encoding complete.


Encoding test data...
1/1 | time: 0.01s

1/1 | time: 0.03s

1/1 | time: 0.03s

Test encoding complete.
Feature extraction complete. Time taken: 1431.44s
Train features shape (X_c2): (57477, 6144)
Test features shape (X_test_c2): (3, 6144)


Data split into: Train (45981, 6144), Validation (11496, 6144)


In [9]:
# Path to save the trained GBM model
GBM_CHOICE = "XGBOOST" 
CANDIDATE_2_MODEL_SAVE_PATH = f"../models/candidate_2_{GBM_CHOICE.lower()}_{BEST_EMBEDDING_MODEL_PATH.split('/')[-1]}.pkl"
print(f"--- Candidate 2: {GBM_CHOICE} ---")
print(f"Model will be saved to: {CANDIDATE_2_MODEL_SAVE_PATH}")

start_time = time.time()

# This dictionary will hold the best validation logloss
val_logloss = {}
clf_c2 = None

if GBM_CHOICE == "LGBM":
    # --- 2.1. LightGBM ---
    clf_c2 = lgb.LGBMClassifier(
        objective='multiclass',
        metric='multi_logloss',
        num_class=3,
        n_estimators=1000,
        learning_rate=0.05,
        n_jobs=-1,
        random_state=random_state,
        # device='gpu' if device == 'cuda' else 'cpu'
        device=device
    )
    
    clf_c2.fit(
        X_tr, y_tr,
        eval_set=[(X_va, y_va)],
        eval_metric='multi_logloss',
        callbacks=[lgb.early_stopping(100, verbose=True)]
    )
    
    va_pred = clf_c2.predict_proba(X_va)
    val_logloss['LGBM'] = log_loss(y_va, va_pred)
    print(f"--- LGBM Validation LogLoss: {val_logloss['LGBM']:.6f} ---")

elif GBM_CHOICE == "XGBOOST":
    # --- 2.2. XGBoost ---
    clf_c2 = xgb.XGBClassifier(
        objective='multi:softprob',
        eval_metric='mlogloss',
        num_class=3,
        n_estimators=1000,
        learning_rate=0.05,
        n_jobs=-1,
        random_state=random_state,
        device=device,
        early_stopping_rounds=100
    )
    
    clf_c2.fit(
        X_tr, y_tr,
        eval_set=[(X_va, y_va)],
        verbose=True
    )
    
    va_pred = clf_c2.predict_proba(X_va)
    val_logloss['XGBOOST'] = log_loss(y_va, va_pred)
    print(f"--- XGBOOST Validation LogLoss: {val_logloss['XGBOOST']:.6f} ---")

else:
    print(f"Error: Unknown GBM_CHOICE '{GBM_CHOICE}'. Please set to 'LGBM' or 'XGBOOST'.")

print(f"Training complete. Time taken: {time.time() - start_time:.2f}s")

if clf_c2 is not None:
    # Use joblib for cross-compatibility between LGBM/XGB
    joblib.dump(clf_c2, CANDIDATE_2_MODEL_SAVE_PATH)
    print(f"Model saved to: {CANDIDATE_2_MODEL_SAVE_PATH}")

--- Candidate 2: XGBOOST ---
Model will be saved to: ../models/candidate_2_xgboost_e5-base-v2.pkl


[0]	validation_0-mlogloss:1.09635


[1]	validation_0-mlogloss:1.09427


[2]	validation_0-mlogloss:1.09219


[3]	validation_0-mlogloss:1.09043


[4]	validation_0-mlogloss:1.08872


[5]	validation_0-mlogloss:1.08706


[6]	validation_0-mlogloss:1.08541


[7]	validation_0-mlogloss:1.08398


[8]	validation_0-mlogloss:1.08247


[9]	validation_0-mlogloss:1.08107


[10]	validation_0-mlogloss:1.07967


[11]	validation_0-mlogloss:1.07837


[12]	validation_0-mlogloss:1.07707


[13]	validation_0-mlogloss:1.07584


[14]	validation_0-mlogloss:1.07469


[15]	validation_0-mlogloss:1.07350


[16]	validation_0-mlogloss:1.07261


[17]	validation_0-mlogloss:1.07167


[18]	validation_0-mlogloss:1.07075


[19]	validation_0-mlogloss:1.06978


[20]	validation_0-mlogloss:1.06879


[21]	validation_0-mlogloss:1.06788


[22]	validation_0-mlogloss:1.06699


[23]	validation_0-mlogloss:1.06616


[24]	validation_0-mlogloss:1.06532


[25]	validation_0-mlogloss:1.06454


[26]	validation_0-mlogloss:1.06380


[27]	validation_0-mlogloss:1.06300


[28]	validation_0-mlogloss:1.06233


[29]	validation_0-mlogloss:1.06138


[30]	validation_0-mlogloss:1.06071


[31]	validation_0-mlogloss:1.06004


[32]	validation_0-mlogloss:1.05930


[33]	validation_0-mlogloss:1.05868


[34]	validation_0-mlogloss:1.05811


[35]	validation_0-mlogloss:1.05760


[36]	validation_0-mlogloss:1.05713


[37]	validation_0-mlogloss:1.05655


[38]	validation_0-mlogloss:1.05600


[39]	validation_0-mlogloss:1.05553


[40]	validation_0-mlogloss:1.05501


[41]	validation_0-mlogloss:1.05454


[42]	validation_0-mlogloss:1.05419


[43]	validation_0-mlogloss:1.05374


[44]	validation_0-mlogloss:1.05318


[45]	validation_0-mlogloss:1.05289


[46]	validation_0-mlogloss:1.05246


[47]	validation_0-mlogloss:1.05213


[48]	validation_0-mlogloss:1.05187


[49]	validation_0-mlogloss:1.05153


[50]	validation_0-mlogloss:1.05114


[51]	validation_0-mlogloss:1.05072


[52]	validation_0-mlogloss:1.05041


[53]	validation_0-mlogloss:1.05010


[54]	validation_0-mlogloss:1.04964


[55]	validation_0-mlogloss:1.04945


[56]	validation_0-mlogloss:1.04920


[57]	validation_0-mlogloss:1.04896


[58]	validation_0-mlogloss:1.04863


[59]	validation_0-mlogloss:1.04832


[60]	validation_0-mlogloss:1.04802


[61]	validation_0-mlogloss:1.04785


[62]	validation_0-mlogloss:1.04766


[63]	validation_0-mlogloss:1.04737


[64]	validation_0-mlogloss:1.04706


[65]	validation_0-mlogloss:1.04679


[66]	validation_0-mlogloss:1.04653


[67]	validation_0-mlogloss:1.04624


[68]	validation_0-mlogloss:1.04594


[69]	validation_0-mlogloss:1.04572


[70]	validation_0-mlogloss:1.04556


[71]	validation_0-mlogloss:1.04541


[72]	validation_0-mlogloss:1.04509


[73]	validation_0-mlogloss:1.04493


[74]	validation_0-mlogloss:1.04481


[75]	validation_0-mlogloss:1.04462


[76]	validation_0-mlogloss:1.04437


[77]	validation_0-mlogloss:1.04423


[78]	validation_0-mlogloss:1.04405


[79]	validation_0-mlogloss:1.04383


[80]	validation_0-mlogloss:1.04363


[81]	validation_0-mlogloss:1.04362


[82]	validation_0-mlogloss:1.04355


[83]	validation_0-mlogloss:1.04334


[84]	validation_0-mlogloss:1.04322


[85]	validation_0-mlogloss:1.04301


[86]	validation_0-mlogloss:1.04292


[87]	validation_0-mlogloss:1.04276


[88]	validation_0-mlogloss:1.04256


[89]	validation_0-mlogloss:1.04242


[90]	validation_0-mlogloss:1.04235


[91]	validation_0-mlogloss:1.04221


[92]	validation_0-mlogloss:1.04202


[93]	validation_0-mlogloss:1.04182


[94]	validation_0-mlogloss:1.04175


[95]	validation_0-mlogloss:1.04168


[96]	validation_0-mlogloss:1.04160


[97]	validation_0-mlogloss:1.04146


[98]	validation_0-mlogloss:1.04132


[99]	validation_0-mlogloss:1.04122


[100]	validation_0-mlogloss:1.04104


[101]	validation_0-mlogloss:1.04092


[102]	validation_0-mlogloss:1.04085


[103]	validation_0-mlogloss:1.04067


[104]	validation_0-mlogloss:1.04070


[105]	validation_0-mlogloss:1.04050


[106]	validation_0-mlogloss:1.04044


[107]	validation_0-mlogloss:1.04034


[108]	validation_0-mlogloss:1.04011


[109]	validation_0-mlogloss:1.04007


[110]	validation_0-mlogloss:1.04004


[111]	validation_0-mlogloss:1.03998


[112]	validation_0-mlogloss:1.03987


[113]	validation_0-mlogloss:1.03973


[114]	validation_0-mlogloss:1.03969


[115]	validation_0-mlogloss:1.03953


[116]	validation_0-mlogloss:1.03936


[117]	validation_0-mlogloss:1.03924


[118]	validation_0-mlogloss:1.03925


[119]	validation_0-mlogloss:1.03915


[120]	validation_0-mlogloss:1.03904


[121]	validation_0-mlogloss:1.03908


[122]	validation_0-mlogloss:1.03896


[123]	validation_0-mlogloss:1.03889


[124]	validation_0-mlogloss:1.03877


[125]	validation_0-mlogloss:1.03867


[126]	validation_0-mlogloss:1.03857


[127]	validation_0-mlogloss:1.03858


[128]	validation_0-mlogloss:1.03844


[129]	validation_0-mlogloss:1.03844


[130]	validation_0-mlogloss:1.03842


[131]	validation_0-mlogloss:1.03838


[132]	validation_0-mlogloss:1.03822


[133]	validation_0-mlogloss:1.03815


[134]	validation_0-mlogloss:1.03810


[135]	validation_0-mlogloss:1.03804


[136]	validation_0-mlogloss:1.03793


[137]	validation_0-mlogloss:1.03797


[138]	validation_0-mlogloss:1.03795


[139]	validation_0-mlogloss:1.03787


[140]	validation_0-mlogloss:1.03786


[141]	validation_0-mlogloss:1.03778


[142]	validation_0-mlogloss:1.03762


[143]	validation_0-mlogloss:1.03766


[144]	validation_0-mlogloss:1.03761


[145]	validation_0-mlogloss:1.03751


[146]	validation_0-mlogloss:1.03753


[147]	validation_0-mlogloss:1.03747


[148]	validation_0-mlogloss:1.03747


[149]	validation_0-mlogloss:1.03755


[150]	validation_0-mlogloss:1.03757


[151]	validation_0-mlogloss:1.03751


[152]	validation_0-mlogloss:1.03753


[153]	validation_0-mlogloss:1.03751


[154]	validation_0-mlogloss:1.03751


[155]	validation_0-mlogloss:1.03751


[156]	validation_0-mlogloss:1.03749


[157]	validation_0-mlogloss:1.03745


[158]	validation_0-mlogloss:1.03729


[159]	validation_0-mlogloss:1.03722


[160]	validation_0-mlogloss:1.03711


[161]	validation_0-mlogloss:1.03700


[162]	validation_0-mlogloss:1.03695


[163]	validation_0-mlogloss:1.03699


[164]	validation_0-mlogloss:1.03692


[165]	validation_0-mlogloss:1.03691


[166]	validation_0-mlogloss:1.03683


[167]	validation_0-mlogloss:1.03674


[168]	validation_0-mlogloss:1.03681


[169]	validation_0-mlogloss:1.03680


[170]	validation_0-mlogloss:1.03682


[171]	validation_0-mlogloss:1.03681


[172]	validation_0-mlogloss:1.03683


[173]	validation_0-mlogloss:1.03681


[174]	validation_0-mlogloss:1.03673


[175]	validation_0-mlogloss:1.03673


[176]	validation_0-mlogloss:1.03681


[177]	validation_0-mlogloss:1.03680


[178]	validation_0-mlogloss:1.03683


[179]	validation_0-mlogloss:1.03682


[180]	validation_0-mlogloss:1.03679


[181]	validation_0-mlogloss:1.03680


[182]	validation_0-mlogloss:1.03681


[183]	validation_0-mlogloss:1.03675


[184]	validation_0-mlogloss:1.03679


[185]	validation_0-mlogloss:1.03671


[186]	validation_0-mlogloss:1.03671


[187]	validation_0-mlogloss:1.03672


[188]	validation_0-mlogloss:1.03675


[189]	validation_0-mlogloss:1.03672


[190]	validation_0-mlogloss:1.03660


[191]	validation_0-mlogloss:1.03658


[192]	validation_0-mlogloss:1.03659


[193]	validation_0-mlogloss:1.03659


[194]	validation_0-mlogloss:1.03658


[195]	validation_0-mlogloss:1.03656


[196]	validation_0-mlogloss:1.03652


[197]	validation_0-mlogloss:1.03657


[198]	validation_0-mlogloss:1.03649


[199]	validation_0-mlogloss:1.03640


[200]	validation_0-mlogloss:1.03642


[201]	validation_0-mlogloss:1.03648


[202]	validation_0-mlogloss:1.03641


[203]	validation_0-mlogloss:1.03643


[204]	validation_0-mlogloss:1.03643


[205]	validation_0-mlogloss:1.03637


[206]	validation_0-mlogloss:1.03632


[207]	validation_0-mlogloss:1.03633


[208]	validation_0-mlogloss:1.03639


[209]	validation_0-mlogloss:1.03638


[210]	validation_0-mlogloss:1.03636


[211]	validation_0-mlogloss:1.03637


[212]	validation_0-mlogloss:1.03638


[213]	validation_0-mlogloss:1.03647


[214]	validation_0-mlogloss:1.03648


[215]	validation_0-mlogloss:1.03643


[216]	validation_0-mlogloss:1.03639


[217]	validation_0-mlogloss:1.03637


[218]	validation_0-mlogloss:1.03640


[219]	validation_0-mlogloss:1.03641


[220]	validation_0-mlogloss:1.03644


[221]	validation_0-mlogloss:1.03648


[222]	validation_0-mlogloss:1.03640


[223]	validation_0-mlogloss:1.03638


[224]	validation_0-mlogloss:1.03639


[225]	validation_0-mlogloss:1.03643


[226]	validation_0-mlogloss:1.03639


[227]	validation_0-mlogloss:1.03640


[228]	validation_0-mlogloss:1.03633


[229]	validation_0-mlogloss:1.03631


[230]	validation_0-mlogloss:1.03627


[231]	validation_0-mlogloss:1.03629


[232]	validation_0-mlogloss:1.03626


[233]	validation_0-mlogloss:1.03631


[234]	validation_0-mlogloss:1.03627


[235]	validation_0-mlogloss:1.03621


[236]	validation_0-mlogloss:1.03628


[237]	validation_0-mlogloss:1.03628


[238]	validation_0-mlogloss:1.03616


[239]	validation_0-mlogloss:1.03616


[240]	validation_0-mlogloss:1.03613


[241]	validation_0-mlogloss:1.03616


[242]	validation_0-mlogloss:1.03617


[243]	validation_0-mlogloss:1.03621


[244]	validation_0-mlogloss:1.03618


[245]	validation_0-mlogloss:1.03617


[246]	validation_0-mlogloss:1.03615


[247]	validation_0-mlogloss:1.03620


[248]	validation_0-mlogloss:1.03624


[249]	validation_0-mlogloss:1.03621


[250]	validation_0-mlogloss:1.03619


[251]	validation_0-mlogloss:1.03622


[252]	validation_0-mlogloss:1.03628


[253]	validation_0-mlogloss:1.03623


[254]	validation_0-mlogloss:1.03623


[255]	validation_0-mlogloss:1.03622


[256]	validation_0-mlogloss:1.03622


[257]	validation_0-mlogloss:1.03615


[258]	validation_0-mlogloss:1.03608


[259]	validation_0-mlogloss:1.03606


[260]	validation_0-mlogloss:1.03610


[261]	validation_0-mlogloss:1.03614


[262]	validation_0-mlogloss:1.03611


[263]	validation_0-mlogloss:1.03609


[264]	validation_0-mlogloss:1.03607


[265]	validation_0-mlogloss:1.03609


[266]	validation_0-mlogloss:1.03607


[267]	validation_0-mlogloss:1.03609


[268]	validation_0-mlogloss:1.03603


[269]	validation_0-mlogloss:1.03611


[270]	validation_0-mlogloss:1.03609


[271]	validation_0-mlogloss:1.03608


[272]	validation_0-mlogloss:1.03607


[273]	validation_0-mlogloss:1.03613


[274]	validation_0-mlogloss:1.03615


[275]	validation_0-mlogloss:1.03616


[276]	validation_0-mlogloss:1.03619


[277]	validation_0-mlogloss:1.03613


[278]	validation_0-mlogloss:1.03611


[279]	validation_0-mlogloss:1.03604


[280]	validation_0-mlogloss:1.03602


[281]	validation_0-mlogloss:1.03595


[282]	validation_0-mlogloss:1.03594


[283]	validation_0-mlogloss:1.03597


[284]	validation_0-mlogloss:1.03603


[285]	validation_0-mlogloss:1.03602


[286]	validation_0-mlogloss:1.03595


[287]	validation_0-mlogloss:1.03593


[288]	validation_0-mlogloss:1.03592


[289]	validation_0-mlogloss:1.03589


[290]	validation_0-mlogloss:1.03588


[291]	validation_0-mlogloss:1.03590


[292]	validation_0-mlogloss:1.03595


[293]	validation_0-mlogloss:1.03591


[294]	validation_0-mlogloss:1.03584


[295]	validation_0-mlogloss:1.03591


[296]	validation_0-mlogloss:1.03586


[297]	validation_0-mlogloss:1.03579


[298]	validation_0-mlogloss:1.03576


[299]	validation_0-mlogloss:1.03581


[300]	validation_0-mlogloss:1.03576


[301]	validation_0-mlogloss:1.03577


[302]	validation_0-mlogloss:1.03585


[303]	validation_0-mlogloss:1.03588


[304]	validation_0-mlogloss:1.03586


[305]	validation_0-mlogloss:1.03587


[306]	validation_0-mlogloss:1.03588


[307]	validation_0-mlogloss:1.03590


[308]	validation_0-mlogloss:1.03587


[309]	validation_0-mlogloss:1.03584


[310]	validation_0-mlogloss:1.03594


[311]	validation_0-mlogloss:1.03587


[312]	validation_0-mlogloss:1.03585


[313]	validation_0-mlogloss:1.03580


[314]	validation_0-mlogloss:1.03580


[315]	validation_0-mlogloss:1.03578


[316]	validation_0-mlogloss:1.03579


[317]	validation_0-mlogloss:1.03587


[318]	validation_0-mlogloss:1.03583


[319]	validation_0-mlogloss:1.03585


[320]	validation_0-mlogloss:1.03585


[321]	validation_0-mlogloss:1.03589


[322]	validation_0-mlogloss:1.03590


[323]	validation_0-mlogloss:1.03585


[324]	validation_0-mlogloss:1.03582


[325]	validation_0-mlogloss:1.03590


[326]	validation_0-mlogloss:1.03587


[327]	validation_0-mlogloss:1.03588


[328]	validation_0-mlogloss:1.03593


[329]	validation_0-mlogloss:1.03600


[330]	validation_0-mlogloss:1.03606


[331]	validation_0-mlogloss:1.03609


[332]	validation_0-mlogloss:1.03602


[333]	validation_0-mlogloss:1.03603


[334]	validation_0-mlogloss:1.03607


[335]	validation_0-mlogloss:1.03605


[336]	validation_0-mlogloss:1.03606


[337]	validation_0-mlogloss:1.03601


[338]	validation_0-mlogloss:1.03600


[339]	validation_0-mlogloss:1.03606


[340]	validation_0-mlogloss:1.03606


[341]	validation_0-mlogloss:1.03605


[342]	validation_0-mlogloss:1.03607


[343]	validation_0-mlogloss:1.03604


[344]	validation_0-mlogloss:1.03611


[345]	validation_0-mlogloss:1.03609


[346]	validation_0-mlogloss:1.03609


[347]	validation_0-mlogloss:1.03607


[348]	validation_0-mlogloss:1.03611


[349]	validation_0-mlogloss:1.03608


[350]	validation_0-mlogloss:1.03608


[351]	validation_0-mlogloss:1.03605


[352]	validation_0-mlogloss:1.03606


[353]	validation_0-mlogloss:1.03609


[354]	validation_0-mlogloss:1.03600


[355]	validation_0-mlogloss:1.03604


[356]	validation_0-mlogloss:1.03603


[357]	validation_0-mlogloss:1.03601


[358]	validation_0-mlogloss:1.03602


[359]	validation_0-mlogloss:1.03600


[360]	validation_0-mlogloss:1.03598


[361]	validation_0-mlogloss:1.03592


[362]	validation_0-mlogloss:1.03597


[363]	validation_0-mlogloss:1.03602


[364]	validation_0-mlogloss:1.03605


[365]	validation_0-mlogloss:1.03608


[366]	validation_0-mlogloss:1.03607


[367]	validation_0-mlogloss:1.03602


[368]	validation_0-mlogloss:1.03610


[369]	validation_0-mlogloss:1.03607


[370]	validation_0-mlogloss:1.03607


[371]	validation_0-mlogloss:1.03604


[372]	validation_0-mlogloss:1.03601


[373]	validation_0-mlogloss:1.03605


[374]	validation_0-mlogloss:1.03610


[375]	validation_0-mlogloss:1.03603


[376]	validation_0-mlogloss:1.03607


[377]	validation_0-mlogloss:1.03611


[378]	validation_0-mlogloss:1.03602


[379]	validation_0-mlogloss:1.03602


[380]	validation_0-mlogloss:1.03609


[381]	validation_0-mlogloss:1.03608


[382]	validation_0-mlogloss:1.03611


[383]	validation_0-mlogloss:1.03615


[384]	validation_0-mlogloss:1.03611


[385]	validation_0-mlogloss:1.03621


[386]	validation_0-mlogloss:1.03624


[387]	validation_0-mlogloss:1.03630


[388]	validation_0-mlogloss:1.03627


[389]	validation_0-mlogloss:1.03626


[390]	validation_0-mlogloss:1.03627


[391]	validation_0-mlogloss:1.03633


[392]	validation_0-mlogloss:1.03638


[393]	validation_0-mlogloss:1.03638


[394]	validation_0-mlogloss:1.03634


[395]	validation_0-mlogloss:1.03631


[396]	validation_0-mlogloss:1.03632


[397]	validation_0-mlogloss:1.03629


[398]	validation_0-mlogloss:1.03625


[399]	validation_0-mlogloss:1.03619


[400]	validation_0-mlogloss:1.03618


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


--- XGBOOST Validation LogLoss: 1.035757 ---
Training complete. Time taken: 120.23s
Model saved to: ../models/candidate_2_xgboost_e5-base-v2.pkl


In [10]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import log_loss
import numpy as np

print(f"\n=== Candidate 2 ({GBM_CHOICE}): Calibration ===")

# Get validation predictions BEFORE calibration
# We need to recreate the train/val split from earlier
X_tr_c2, X_va_c2, y_tr_c2, y_va_c2 = train_test_split(
    X_c2, y, test_size=val_size, stratify=y, random_state=random_state
)

# Get predictions from the validation model (clf_c2 trained on X_tr, X_va split)
if 'clf_c2' in locals() and clf_c2 is not None:
    va_pred_before = clf_c2.predict_proba(X_va_c2)
    logloss_before = log_loss(y_va_c2, va_pred_before)
    print(f"Validation LogLoss BEFORE Calibration: {logloss_before:.6f}")
    
    # Apply calibration (using isotonic regression)
    print("Applying Isotonic calibration...")
    calibrated_model_c2 = CalibratedClassifierCV(
        clf_c2,
        method='isotonic',  # isotonic or sigmoid
        cv='prefit',  # Model is already fitted
        ensemble=False
    )
    
    # Fit calibration on validation set
    calibrated_model_c2.fit(X_va_c2, y_va_c2)
    
    # Get calibrated predictions on validation set
    va_pred_after = calibrated_model_c2.predict_proba(X_va_c2)
    logloss_after = log_loss(y_va_c2, va_pred_after)
    print(f"Validation LogLoss AFTER Calibration: {logloss_after:.6f}")
    print(f"Improvement: {logloss_before - logloss_after:.6f}")
    
    # Now retrain on full data and apply calibration
    print("\nRetraining on full data for final submission...")
    
    # Get best iteration
    try:
        best_iter = clf_c2.best_iteration_ or 1000
    except AttributeError:
        best_iter = 1000
    
    X_train_full, X_cal, y_train_full, y_cal = train_test_split(
        X_c2, y, test_size=val_size, stratify=y, random_state=random_state
    )
    
    if GBM_CHOICE == "LGBM":
        clf_c2_for_calib = lgb.LGBMClassifier(
            objective='multiclass', metric='multi_logloss', num_class=3,
            n_estimators=best_iter, learning_rate=0.05,
            n_jobs=-1, random_state=random_state, device=device
        )
    elif GBM_CHOICE == "XGBOOST":
        clf_c2_for_calib = xgb.XGBClassifier(
            objective='multi:softprob', eval_metric='mlogloss', num_class=3,
            n_estimators=best_iter, learning_rate=0.05,
            n_jobs=-1, random_state=random_state, device=device
        )
    
    clf_c2_for_calib.fit(X_train_full, y_train_full)
    
    # Apply calibration on the hold-out 20%
    calibrated_final_c2 = CalibratedClassifierCV(
        clf_c2_for_calib,
        method='isotonic',
        cv='prefit',
        ensemble=False
    )
    calibrated_final_c2.fit(X_cal, y_cal)
    
    C2_CALIBRATED_MODEL_PATH = f"../models/candidate_2_{GBM_CHOICE}_{MODEL_NAME}_CALIBRATED.pkl"
    joblib.dump(calibrated_final_c2, C2_CALIBRATED_MODEL_PATH)
    print(f"Candidate 2 Calibrated Model SAVED to {C2_CALIBRATED_MODEL_PATH}")
    
    print(f"\n=== Candidate 2 Summary ===")
    print(f"Before Calibration - Val LogLoss: {logloss_before:.6f}")
    print(f"After Calibration  - Val LogLoss: {logloss_after:.6f}")
    print(f"Final submission saved with calibration.")
else:
    print("Error: clf_c2 model not found. Please run the training cell first.")


=== Candidate 2 (XGBOOST): Calibration ===


Validation LogLoss BEFORE Calibration: 1.035757
Applying Isotonic calibration...




Validation LogLoss AFTER Calibration: 1.030071
Improvement: 0.005686

Retraining on full data for final submission...




Candidate 2 Calibrated Model SAVED to ../models/candidate_2_XGBOOST_e5-base-v2_CALIBRATED.pkl

=== Candidate 2 Summary ===
Before Calibration - Val LogLoss: 1.035757
After Calibration  - Val LogLoss: 1.030071
Final submission saved with calibration.


### Candidate 3: All Features + MLP

In [11]:
### Candidate 3: All Features + MLP ###
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import joblib
import os

# Define the "Strong" lexical feature builder
def stats_strong(s):
    """Calculates a comprehensive set of lexical statistics."""
    if not isinstance(s, str): s = ""
    toks = s.split()
    return {
        "len_char": len(s),
        "len_tok": len(toks),
        "num_sent": sum(s.count(x) for x in [".", "!", "?"]),
        "num_code": s.count("`"),
        "num_list": s.count("- ") + s.count("* "),
        "num_upper": sum(ch.isupper() for ch in s),
        "num_punct": sum(ch in ",;:()" for ch in s),
        "avg_tok_len": (sum(len(t) for t in toks) / len(toks)) if toks else 0.0,
    }

def build_strong_lexical_features(df):
    """Builds the full set of lexical and bias features."""
    rows = []
    cols = ["prompt", "response_a", "response_b"]
    
    for p, a, b in zip(df[cols[0]], df[cols[1]], df[cols[2]]):
        ps, as_, bs = stats_strong(p), stats_strong(a), stats_strong(b)
        rows.append({
            "p_len_char": ps["len_char"], "p_len_tok": ps["len_tok"], "p_num_sent": ps["num_sent"],
            "a_len_char": as_["len_char"], "a_len_tok": as_["len_tok"], "a_num_sent": as_["num_sent"],
            "a_num_code": as_["num_code"], "a_num_list": as_["num_list"], "a_num_upper": as_["num_upper"],
            "a_num_punct": as_["num_punct"], "a_avg_tok_len": as_["avg_tok_len"],
            "b_len_char": bs["len_char"], "b_len_tok": bs["len_tok"], "b_num_sent": bs["num_sent"],
            "b_num_code": bs["num_code"], "b_num_list": bs["num_list"], "b_num_upper": bs["num_upper"],
            "b_num_punct": bs["num_punct"], "b_avg_tok_len": bs["avg_tok_len"],
            # A-B Differences
            "d_len_char": as_["len_char"] - bs["len_char"],
            "d_len_tok": as_["len_tok"] - bs["len_tok"],
            "d_num_sent": as_["num_sent"] - bs["num_sent"],
            "d_num_code": as_["num_code"] - bs["num_code"],
            "d_num_list": as_["num_list"] - bs["num_list"],
            "d_num_upper": as_["num_upper"] - bs["num_upper"],
            "d_num_punct": as_["num_punct"] - bs["num_punct"],
            "d_avg_tok_len": as_["avg_tok_len"] - bs["avg_tok_len"],
            # Ratios
            "r_len_char": (as_["len_char"] + 1) / (bs["len_char"] + 1),
            "r_len_tok": (as_["len_tok"] + 1) / (bs["len_tok"] + 1),
            "r_num_sent": (as_["num_sent"] + 1) / (bs["num_sent"] + 1),
        })
    return pd.DataFrame(rows)

print("--- Candidate 3: All Features + MLP ---")
print("Building strong lexical features...")

# Generate Lexical Features
X_lex_strong = build_strong_lexical_features(train)
X_test_lex_strong = build_strong_lexical_features(test)

print(f"Strong lexical features shape: {X_lex_strong.shape}")

--- Candidate 3: All Features + MLP ---
Building strong lexical features...


Strong lexical features shape: (57477, 30)


In [12]:
import joblib # For saving models
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import time
import os
import torch

MODEL_NAME = "e5-base-v2" 
BEST_EMBEDDING_MODEL_PATH = f"../models/{MODEL_NAME}"
print(f"Using embedding model: {BEST_EMBEDDING_MODEL_PATH}")

# Load the chosen embedding model
try:
    # We pass a list containing only our chosen model path
    sbert_model, model_src = load_model([BEST_EMBEDDING_MODEL_PATH], idx=0, device=device)
    # print(f"Successfully loaded model from: {model_src}")
except Exception as e:
    print(f"Failed to load model from {BEST_EMBEDDING_MODEL_PATH}. Error: {e}")

print("Encoding texts")
start_time = time.time()

# Encode training data
prompt_emb = encode_texts(sbert_model, train["prompt"])
print("Prompt encoding complete.")
a_emb = encode_texts(sbert_model, train["response_a"])
print("Response A encoding complete.")
b_emb = encode_texts(sbert_model, train["response_b"])
print("Response B encoding complete.")

# Build training features
X_c3 = build_feat(prompt_emb, a_emb, b_emb) # X for candidate 3

# Encode test data
print("Encoding test data...")
prompt_emb_te = encode_texts(sbert_model, test["prompt"])
a_emb_te = encode_texts(sbert_model, test["response_a"])
b_emb_te = encode_texts(sbert_model, test["response_b"])
print("Encoding complete for test data.")

# Build test features
X_test_c3 = build_feat(prompt_emb_te, a_emb_te, b_emb_te)

# Clean up model from memory
del sbert_model, prompt_emb, a_emb, b_emb, prompt_emb_te, a_emb_te, b_emb_te

print(f"Feature extraction complete. Time taken: {time.time() - start_time:.2f}s")
print(f"Train features shape (X_c3): {X_c3.shape}")
print(f"Test features shape (X_test_c3): {X_test_c3.shape}")

# Create Train/Validation Split
X_tr, X_va, y_tr, y_va = train_test_split(X_c3, y, test_size=val_size, stratify=y)
print(f"Data split into: Train {X_tr.shape}, Validation {X_va.shape}")

Using embedding model: ../models/e5-base-v2
try: ../models/e5-base-v2
loaded model from: ../models/e5-base-v2
Encoding texts


1/225 | time: 2.09s

2/225 | time: 2.08s

3/225 | time: 2.10s

4/225 | time: 2.12s

5/225 | time: 2.12s

6/225 | time: 2.10s

7/225 | time: 2.09s

8/225 | time: 2.11s

9/225 | time: 2.09s

10/225 | time: 2.12s

11/225 | time: 2.14s

12/225 | time: 2.13s

13/225 | time: 2.09s

14/225 | time: 2.12s

15/225 | time: 2.10s

16/225 | time: 2.10s

17/225 | time: 2.13s

18/225 | time: 2.14s

19/225 | time: 2.11s

20/225 | time: 2.13s

21/225 | time: 2.10s

22/225 | time: 2.10s

23/225 | time: 2.10s

24/225 | time: 2.10s

25/225 | time: 2.10s

26/225 | time: 2.12s

27/225 | time: 2.12s

28/225 | time: 2.10s

29/225 | time: 2.09s

30/225 | time: 2.12s

31/225 | time: 2.13s

32/225 | time: 2.09s

33/225 | time: 2.10s

34/225 | time: 2.10s

35/225 | time: 2.14s

36/225 | time: 2.10s

37/225 | time: 2.12s

38/225 | time: 2.10s

39/225 | time: 2.10s

40/225 | time: 2.10s

41/225 | time: 2.10s

42/225 | time: 2.13s

43/225 | time: 2.10s

44/225 | time: 2.10s

45/225 | time: 2.10s

46/225 | time: 2.12s

47/225 | time: 2.10s

48/225 | time: 2.09s

49/225 | time: 2.12s

50/225 | time: 2.12s

51/225 | time: 2.13s

52/225 | time: 2.10s

53/225 | time: 2.13s

54/225 | time: 2.10s

55/225 | time: 2.10s

56/225 | time: 2.10s

57/225 | time: 2.12s

58/225 | time: 2.12s

59/225 | time: 2.12s

60/225 | time: 2.11s

61/225 | time: 2.10s

62/225 | time: 2.11s

63/225 | time: 2.10s

64/225 | time: 2.13s

65/225 | time: 2.10s

66/225 | time: 2.12s

67/225 | time: 2.10s

68/225 | time: 2.10s

69/225 | time: 2.13s

70/225 | time: 2.10s

71/225 | time: 2.10s

72/225 | time: 2.10s

73/225 | time: 2.12s

74/225 | time: 2.10s

75/225 | time: 2.10s

76/225 | time: 2.10s

77/225 | time: 2.11s

78/225 | time: 2.10s

79/225 | time: 2.13s

80/225 | time: 2.10s

81/225 | time: 2.10s

82/225 | time: 2.13s

83/225 | time: 2.10s

84/225 | time: 2.10s

85/225 | time: 2.10s

86/225 | time: 2.09s

87/225 | time: 2.10s

88/225 | time: 2.10s

89/225 | time: 2.10s

90/225 | time: 2.10s

91/225 | time: 2.13s

92/225 | time: 2.13s

93/225 | time: 2.13s

94/225 | time: 2.10s

95/225 | time: 2.13s

96/225 | time: 2.10s

97/225 | time: 2.10s

98/225 | time: 2.13s

99/225 | time: 2.10s

100/225 | time: 2.10s

101/225 | time: 2.11s

102/225 | time: 2.09s

103/225 | time: 2.10s

104/225 | time: 2.10s

105/225 | time: 2.12s

106/225 | time: 2.12s

107/225 | time: 2.09s

108/225 | time: 2.11s

109/225 | time: 2.10s

110/225 | time: 2.10s

111/225 | time: 2.10s

112/225 | time: 2.13s

113/225 | time: 2.10s

114/225 | time: 2.10s

115/225 | time: 2.10s

116/225 | time: 2.10s

117/225 | time: 2.12s

118/225 | time: 2.10s

119/225 | time: 2.13s

120/225 | time: 2.12s

121/225 | time: 2.10s

122/225 | time: 2.13s

123/225 | time: 2.10s

124/225 | time: 2.12s

125/225 | time: 2.13s

126/225 | time: 2.10s

127/225 | time: 2.10s

128/225 | time: 2.09s

129/225 | time: 2.12s

130/225 | time: 2.10s

131/225 | time: 2.12s

132/225 | time: 2.10s

133/225 | time: 2.10s

134/225 | time: 2.13s

135/225 | time: 2.14s

136/225 | time: 2.10s

137/225 | time: 2.10s

138/225 | time: 2.10s

139/225 | time: 2.10s

140/225 | time: 2.10s

141/225 | time: 2.10s

142/225 | time: 2.12s

143/225 | time: 2.12s

144/225 | time: 2.10s

145/225 | time: 2.10s

146/225 | time: 2.12s

147/225 | time: 2.10s

148/225 | time: 2.10s

149/225 | time: 2.10s

150/225 | time: 2.13s

151/225 | time: 2.10s

152/225 | time: 2.10s

153/225 | time: 2.10s

154/225 | time: 2.13s

155/225 | time: 2.10s

156/225 | time: 2.09s

157/225 | time: 2.10s

158/225 | time: 2.09s

159/225 | time: 2.11s

160/225 | time: 2.11s

161/225 | time: 2.11s

162/225 | time: 2.12s

163/225 | time: 2.10s

164/225 | time: 2.11s

165/225 | time: 2.10s

166/225 | time: 2.10s

167/225 | time: 2.10s

168/225 | time: 2.10s

169/225 | time: 2.10s

170/225 | time: 2.10s

171/225 | time: 2.11s

172/225 | time: 2.10s

173/225 | time: 2.12s

174/225 | time: 2.12s

175/225 | time: 2.10s

176/225 | time: 2.10s

177/225 | time: 2.11s

178/225 | time: 2.12s

179/225 | time: 2.11s

180/225 | time: 2.13s

181/225 | time: 2.13s

182/225 | time: 2.10s

183/225 | time: 2.10s

184/225 | time: 2.10s

185/225 | time: 2.10s

186/225 | time: 2.13s

187/225 | time: 2.13s

188/225 | time: 2.18s

189/225 | time: 2.14s

190/225 | time: 2.10s

191/225 | time: 2.10s

192/225 | time: 2.10s

193/225 | time: 2.10s

194/225 | time: 2.10s

195/225 | time: 2.10s

196/225 | time: 2.12s

197/225 | time: 2.10s

198/225 | time: 2.10s

199/225 | time: 2.13s

200/225 | time: 2.10s

201/225 | time: 2.11s

202/225 | time: 2.10s

203/225 | time: 2.13s

204/225 | time: 2.13s

205/225 | time: 2.14s

206/225 | time: 2.10s

207/225 | time: 2.10s

208/225 | time: 2.10s

209/225 | time: 2.10s

210/225 | time: 2.12s

211/225 | time: 2.10s

212/225 | time: 2.10s

213/225 | time: 2.13s

214/225 | time: 2.10s

215/225 | time: 2.10s

216/225 | time: 2.10s

217/225 | time: 2.10s

218/225 | time: 2.11s

219/225 | time: 2.10s

220/225 | time: 2.10s

221/225 | time: 2.10s

222/225 | time: 2.11s

223/225 | time: 2.11s

224/225 | time: 2.10s

225/225 | time: 1.12s

Prompt encoding complete.


1/225 | time: 2.13s

2/225 | time: 2.13s

3/225 | time: 2.12s

4/225 | time: 2.12s

5/225 | time: 2.14s

6/225 | time: 2.15s

7/225 | time: 2.12s

8/225 | time: 2.12s

9/225 | time: 2.15s

10/225 | time: 2.12s

11/225 | time: 2.13s

12/225 | time: 2.13s

13/225 | time: 2.14s

14/225 | time: 2.12s

15/225 | time: 2.12s

16/225 | time: 2.12s

17/225 | time: 2.12s

18/225 | time: 2.16s

19/225 | time: 2.15s

20/225 | time: 2.15s

21/225 | time: 2.15s

22/225 | time: 2.12s

23/225 | time: 2.12s

24/225 | time: 2.12s

25/225 | time: 2.15s

26/225 | time: 2.12s

27/225 | time: 2.12s

28/225 | time: 2.14s

29/225 | time: 2.15s

30/225 | time: 2.12s

31/225 | time: 2.12s

32/225 | time: 2.12s

33/225 | time: 2.12s

34/225 | time: 2.12s

35/225 | time: 2.12s

36/225 | time: 2.12s

37/225 | time: 2.16s

38/225 | time: 2.12s

39/225 | time: 2.12s

40/225 | time: 2.13s

41/225 | time: 2.14s

42/225 | time: 2.15s

43/225 | time: 2.13s

44/225 | time: 2.15s

45/225 | time: 2.16s

46/225 | time: 2.13s

47/225 | time: 2.15s

48/225 | time: 2.12s

49/225 | time: 2.13s

50/225 | time: 2.12s

51/225 | time: 2.12s

52/225 | time: 2.12s

53/225 | time: 2.13s

54/225 | time: 2.15s

55/225 | time: 2.12s

56/225 | time: 2.12s

57/225 | time: 2.13s

58/225 | time: 2.12s

59/225 | time: 2.15s

60/225 | time: 2.14s

61/225 | time: 2.13s

62/225 | time: 2.15s

63/225 | time: 2.12s

64/225 | time: 2.11s

65/225 | time: 2.12s

66/225 | time: 2.12s

67/225 | time: 2.12s

68/225 | time: 2.14s

69/225 | time: 2.12s

70/225 | time: 2.13s

71/225 | time: 2.13s

72/225 | time: 2.13s

73/225 | time: 2.15s

74/225 | time: 2.15s

75/225 | time: 2.12s

76/225 | time: 2.15s

77/225 | time: 2.13s

78/225 | time: 2.15s

79/225 | time: 2.15s

80/225 | time: 2.13s

81/225 | time: 2.12s

82/225 | time: 2.15s

83/225 | time: 2.13s

84/225 | time: 2.15s

85/225 | time: 2.13s

86/225 | time: 2.12s

87/225 | time: 2.12s

88/225 | time: 2.15s

89/225 | time: 2.16s

90/225 | time: 2.14s

91/225 | time: 2.12s

92/225 | time: 2.13s

93/225 | time: 2.12s

94/225 | time: 2.14s

95/225 | time: 2.12s

96/225 | time: 2.12s

97/225 | time: 2.12s

98/225 | time: 2.13s

99/225 | time: 2.14s

100/225 | time: 2.12s

101/225 | time: 2.12s

102/225 | time: 2.12s

103/225 | time: 2.12s

104/225 | time: 2.13s

105/225 | time: 2.14s

106/225 | time: 2.15s

107/225 | time: 2.12s

108/225 | time: 2.12s

109/225 | time: 2.12s

110/225 | time: 2.12s

111/225 | time: 2.13s

112/225 | time: 2.13s

113/225 | time: 2.13s

114/225 | time: 2.15s

115/225 | time: 2.12s

116/225 | time: 2.12s

117/225 | time: 2.12s

118/225 | time: 2.12s

119/225 | time: 2.14s

120/225 | time: 2.12s

121/225 | time: 2.12s

122/225 | time: 2.15s

123/225 | time: 2.13s

124/225 | time: 2.13s

125/225 | time: 2.12s

126/225 | time: 2.13s

127/225 | time: 2.15s

128/225 | time: 2.16s

129/225 | time: 2.15s

130/225 | time: 2.13s

131/225 | time: 2.12s

132/225 | time: 2.15s

133/225 | time: 2.13s

134/225 | time: 2.13s

135/225 | time: 2.13s

136/225 | time: 2.12s

137/225 | time: 2.13s

138/225 | time: 2.15s

139/225 | time: 2.13s

140/225 | time: 2.13s

141/225 | time: 2.12s

142/225 | time: 2.15s

143/225 | time: 2.12s

144/225 | time: 2.15s

145/225 | time: 2.13s

146/225 | time: 2.15s

147/225 | time: 2.12s

148/225 | time: 2.12s

149/225 | time: 2.12s

150/225 | time: 2.13s

151/225 | time: 2.12s

152/225 | time: 2.12s

153/225 | time: 2.15s

154/225 | time: 2.12s

155/225 | time: 2.12s

156/225 | time: 2.12s

157/225 | time: 2.16s

158/225 | time: 2.18s

159/225 | time: 2.13s

160/225 | time: 2.12s

161/225 | time: 2.13s

162/225 | time: 2.15s

163/225 | time: 2.12s

164/225 | time: 2.12s

165/225 | time: 2.13s

166/225 | time: 2.15s

167/225 | time: 2.12s

168/225 | time: 2.13s

169/225 | time: 2.12s

170/225 | time: 2.16s

171/225 | time: 2.12s

172/225 | time: 2.12s

173/225 | time: 2.15s

174/225 | time: 2.15s

175/225 | time: 2.16s

176/225 | time: 2.12s

177/225 | time: 2.12s

178/225 | time: 2.15s

179/225 | time: 2.13s

180/225 | time: 2.15s

181/225 | time: 2.15s

182/225 | time: 2.12s

183/225 | time: 2.12s

184/225 | time: 2.14s

185/225 | time: 2.15s

186/225 | time: 2.14s

187/225 | time: 2.12s

188/225 | time: 2.13s

189/225 | time: 2.13s

190/225 | time: 2.14s

191/225 | time: 2.12s

192/225 | time: 2.14s

193/225 | time: 2.15s

194/225 | time: 2.15s

195/225 | time: 2.13s

196/225 | time: 2.12s

197/225 | time: 2.13s

198/225 | time: 2.15s

199/225 | time: 2.12s

200/225 | time: 2.12s

201/225 | time: 2.15s

202/225 | time: 2.12s

203/225 | time: 2.12s

204/225 | time: 2.13s

205/225 | time: 2.13s

206/225 | time: 2.12s

207/225 | time: 2.15s

208/225 | time: 2.15s

209/225 | time: 2.15s

210/225 | time: 2.12s

211/225 | time: 2.12s

212/225 | time: 2.15s

213/225 | time: 2.12s

214/225 | time: 2.12s

215/225 | time: 2.12s

216/225 | time: 2.12s

217/225 | time: 2.12s

218/225 | time: 2.15s

219/225 | time: 2.13s

220/225 | time: 2.12s

221/225 | time: 2.13s

222/225 | time: 2.12s

223/225 | time: 2.15s

224/225 | time: 2.16s

225/225 | time: 1.12s

Response A encoding complete.


1/225 | time: 2.12s

2/225 | time: 2.13s

3/225 | time: 2.12s

4/225 | time: 2.12s

5/225 | time: 2.15s

6/225 | time: 2.12s

7/225 | time: 2.14s

8/225 | time: 2.19s

9/225 | time: 2.14s

10/225 | time: 2.14s

11/225 | time: 2.12s

12/225 | time: 2.12s

13/225 | time: 2.11s

14/225 | time: 2.12s

15/225 | time: 2.13s

16/225 | time: 2.12s

17/225 | time: 2.12s

18/225 | time: 2.12s

19/225 | time: 2.15s

20/225 | time: 2.15s

21/225 | time: 2.12s

22/225 | time: 2.13s

23/225 | time: 2.12s

24/225 | time: 2.12s

25/225 | time: 2.13s

26/225 | time: 2.12s

27/225 | time: 2.13s

28/225 | time: 2.13s

29/225 | time: 2.12s

30/225 | time: 2.15s

31/225 | time: 2.15s

32/225 | time: 2.13s

33/225 | time: 2.13s

34/225 | time: 2.12s

35/225 | time: 2.15s

36/225 | time: 2.14s

37/225 | time: 2.13s

38/225 | time: 2.12s

39/225 | time: 2.12s

40/225 | time: 2.13s

41/225 | time: 2.12s

42/225 | time: 2.15s

43/225 | time: 2.16s

44/225 | time: 2.12s

45/225 | time: 2.15s

46/225 | time: 2.12s

47/225 | time: 2.15s

48/225 | time: 2.13s

49/225 | time: 2.16s

50/225 | time: 2.13s

51/225 | time: 2.13s

52/225 | time: 2.15s

53/225 | time: 2.13s

54/225 | time: 2.13s

55/225 | time: 2.12s

56/225 | time: 2.12s

57/225 | time: 2.12s

58/225 | time: 2.12s

59/225 | time: 2.15s

60/225 | time: 2.13s

61/225 | time: 2.12s

62/225 | time: 2.13s

63/225 | time: 2.37s

64/225 | time: 2.15s

65/225 | time: 2.12s

66/225 | time: 2.15s

67/225 | time: 2.12s

68/225 | time: 2.16s

69/225 | time: 2.16s

70/225 | time: 2.15s

71/225 | time: 2.12s

72/225 | time: 2.12s

73/225 | time: 2.12s

74/225 | time: 2.12s

75/225 | time: 2.12s

76/225 | time: 2.12s

77/225 | time: 2.12s

78/225 | time: 2.13s

79/225 | time: 2.12s

80/225 | time: 2.12s

81/225 | time: 2.12s

82/225 | time: 2.12s

83/225 | time: 2.12s

84/225 | time: 2.13s

85/225 | time: 2.16s

86/225 | time: 2.13s

87/225 | time: 2.15s

88/225 | time: 2.15s

89/225 | time: 2.12s

90/225 | time: 2.15s

91/225 | time: 2.13s

92/225 | time: 2.15s

93/225 | time: 2.11s

94/225 | time: 2.12s

95/225 | time: 2.12s

96/225 | time: 2.13s

97/225 | time: 2.12s

98/225 | time: 2.15s

99/225 | time: 2.13s

100/225 | time: 2.12s

101/225 | time: 2.13s

102/225 | time: 2.13s

103/225 | time: 2.13s

104/225 | time: 2.15s

105/225 | time: 2.15s

106/225 | time: 2.13s

107/225 | time: 2.13s

108/225 | time: 2.14s

109/225 | time: 2.13s

110/225 | time: 2.11s

111/225 | time: 2.19s

112/225 | time: 2.12s

113/225 | time: 2.12s

114/225 | time: 2.12s

115/225 | time: 2.14s

116/225 | time: 2.13s

117/225 | time: 2.12s

118/225 | time: 2.12s

119/225 | time: 2.12s

120/225 | time: 2.12s

121/225 | time: 2.15s

122/225 | time: 2.15s

123/225 | time: 2.16s

124/225 | time: 2.14s

125/225 | time: 2.12s

126/225 | time: 2.15s

127/225 | time: 2.15s

128/225 | time: 2.14s

129/225 | time: 2.15s

130/225 | time: 2.12s

131/225 | time: 2.15s

132/225 | time: 2.12s

133/225 | time: 2.12s

134/225 | time: 2.16s

135/225 | time: 2.12s

136/225 | time: 2.12s

137/225 | time: 2.12s

138/225 | time: 2.15s

139/225 | time: 2.13s

140/225 | time: 2.16s

141/225 | time: 2.12s

142/225 | time: 2.15s

143/225 | time: 2.12s

144/225 | time: 2.12s

145/225 | time: 2.12s

146/225 | time: 2.12s

147/225 | time: 2.13s

148/225 | time: 2.13s

149/225 | time: 2.12s

150/225 | time: 2.16s

151/225 | time: 2.13s

152/225 | time: 2.13s

153/225 | time: 2.15s

154/225 | time: 2.15s

155/225 | time: 2.13s

156/225 | time: 2.15s

157/225 | time: 2.12s

158/225 | time: 2.12s

159/225 | time: 2.12s

160/225 | time: 2.15s

161/225 | time: 2.15s

162/225 | time: 2.15s

163/225 | time: 2.12s

164/225 | time: 2.12s

165/225 | time: 2.15s

166/225 | time: 2.15s

167/225 | time: 2.15s

168/225 | time: 2.13s

169/225 | time: 2.12s

170/225 | time: 2.13s

171/225 | time: 2.12s

172/225 | time: 2.12s

173/225 | time: 2.12s

174/225 | time: 2.12s

175/225 | time: 2.15s

176/225 | time: 2.12s

177/225 | time: 2.15s

178/225 | time: 2.12s

179/225 | time: 2.13s

180/225 | time: 2.13s

181/225 | time: 2.17s

182/225 | time: 2.15s

183/225 | time: 2.13s

184/225 | time: 2.17s

185/225 | time: 2.12s

186/225 | time: 2.15s

187/225 | time: 2.12s

188/225 | time: 2.12s

189/225 | time: 2.12s

190/225 | time: 2.12s

191/225 | time: 2.15s

192/225 | time: 2.16s

193/225 | time: 2.12s

194/225 | time: 2.13s

195/225 | time: 2.15s

196/225 | time: 2.13s

197/225 | time: 2.13s

198/225 | time: 2.12s

199/225 | time: 2.13s

200/225 | time: 2.13s

201/225 | time: 2.14s

202/225 | time: 2.12s

203/225 | time: 2.17s

204/225 | time: 2.15s

205/225 | time: 2.16s

206/225 | time: 2.12s

207/225 | time: 2.14s

208/225 | time: 2.13s

209/225 | time: 2.15s

210/225 | time: 2.15s

211/225 | time: 2.12s

212/225 | time: 2.12s

213/225 | time: 2.12s

214/225 | time: 2.12s

215/225 | time: 2.12s

216/225 | time: 2.12s

217/225 | time: 2.12s

218/225 | time: 2.13s

219/225 | time: 2.13s

220/225 | time: 2.12s

221/225 | time: 2.13s

222/225 | time: 2.14s

223/225 | time: 2.12s

224/225 | time: 2.12s

225/225 | time: 1.12s

Response B encoding complete.


Encoding test data...
1/1 | time: 0.01s

1/1 | time: 0.03s

1/1 | time: 0.03s

Encoding complete for test data.
Feature extraction complete. Time taken: 1433.56s
Train features shape (X_c3): (57477, 6144)
Test features shape (X_test_c3): (3, 6144)


Data split into: Train (45981, 6144), Validation (11496, 6144)


In [13]:

# Handle NaNs/Infs for safety
X_lex_strong = X_lex_strong.fillna(0).replace([np.inf, -np.inf], 0)
X_test_lex_strong = X_test_lex_strong.fillna(0).replace([np.inf, -np.inf], 0)

# Convert embedding features (numpy) to DataFrame for safe processing
X_c3_safe = pd.DataFrame(X_c3).fillna(0).replace([np.inf, -np.inf], 0).values
X_test_c3_safe = pd.DataFrame(X_test_c3).fillna(0).replace([np.inf, -np.inf], 0).values

print("Combining lexical and embedding features...")
X_c3 = np.hstack([X_lex_strong, X_c3_safe])
X_test_c3 = np.hstack([X_test_lex_strong, X_test_c3_safe])
print(f"Combined train features shape (X_c3): {X_c3.shape}")
print(f"Combined test features shape (X_test_c3): {X_test_c3.shape}")

X_tr, X_va, y_tr, y_va = train_test_split(X_c3, y, test_size=val_size, stratify=y, random_state=random_state)

print("Scaling features...")
scaler_c3 = StandardScaler()
X_tr_sc = scaler_c3.fit_transform(X_tr)
X_va_sc = scaler_c3.transform(X_va)

# Train MLP Classifier
print("Training MLPClassifier...")
start_time = time.time()

clf_c3 = MLPClassifier(
    hidden_layer_sizes=(512, 256),
    activation="relu",
    solver="adam",
    alpha=1e-4,          # L2 regularization
    batch_size=512,
    learning_rate_init=1e-3,
    max_iter=100,        
    early_stopping=True,
    n_iter_no_change=5,
    random_state=random_state,
    verbose=True
)

clf_c3.fit(X_tr_sc, y_tr)

va_pred = clf_c3.predict_proba(X_va_sc)
val_logloss = log_loss(y_va, va_pred)

print(f"Training complete. Time taken: {time.time() - start_time:.2f}s")
print(f"--- MLP (All Features) Validation LogLoss: {val_logloss:.6f} ---")

CANDIDATE_3_MODEL_SAVE_PATH = "../models/candidate_3_mlp.pkl"
CANDIDATE_3_SCALER_SAVE_PATH = "../models/candidate_3_scaler.pkl"

joblib.dump(clf_c3, CANDIDATE_3_MODEL_SAVE_PATH)
joblib.dump(scaler_c3, CANDIDATE_3_SCALER_SAVE_PATH)

print(f"MLP model saved to: {CANDIDATE_3_MODEL_SAVE_PATH}")
print(f"Scaler saved to: {CANDIDATE_3_SCALER_SAVE_PATH}")


Combining lexical and embedding features...


Combined train features shape (X_c3): (57477, 6174)
Combined test features shape (X_test_c3): (3, 6174)


Scaling features...


Training MLPClassifier...


Iteration 1, loss = 1.12600698
Validation score: 0.463796


Iteration 2, loss = 0.82086238
Validation score: 0.447271


Iteration 3, loss = 0.53996426
Validation score: 0.430093


Iteration 4, loss = 0.26805010
Validation score: 0.429441


Iteration 5, loss = 0.12704382
Validation score: 0.448141


Iteration 6, loss = 0.06509657
Validation score: 0.437486


Iteration 7, loss = 0.04185474
Validation score: 0.436834
Validation score did not improve more than tol=0.000100 for 5 consecutive epochs. Stopping.
Training complete. Time taken: 47.07s
--- MLP (All Features) Validation LogLoss: 1.048909 ---


MLP model saved to: ../models/candidate_3_mlp.pkl
Scaler saved to: ../models/candidate_3_scaler.pkl


In [5]:
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import log_loss
import numpy as np

print("\n=== Candidate 3 (MLP All Features): Calibration ===")

# Get validation predictions BEFORE calibration
# We need to recreate the train/val split from earlier
X_tr_c3, X_va_c3, y_tr_c3, y_va_c3 = train_test_split(
    X_c3, y, test_size=val_size, stratify=y, random_state=random_state
)

# Scale validation data
scaler_c3_val = StandardScaler()
X_tr_sc_c3 = scaler_c3_val.fit_transform(X_tr_c3)
X_va_sc_c3 = scaler_c3_val.transform(X_va_c3)

# Get predictions from the validation model (clf_c3 trained on X_tr, X_va split)
if 'clf_c3' in locals() and clf_c3 is not None:
    va_pred_before = clf_c3.predict_proba(X_va_sc_c3)
    logloss_before = log_loss(y_va_c3, va_pred_before)
    print(f"Validation LogLoss BEFORE Calibration: {logloss_before:.6f}")
    
    # Apply calibration (using isotonic regression)
    print("Applying Isotonic calibration...")
    calibrated_model_c3 = CalibratedClassifierCV(
        clf_c3,
        method='isotonic',  # isotonic or sigmoid
        cv='prefit',  # Model is already fitted
        ensemble=False
    )
    
    # Fit calibration on validation set
    calibrated_model_c3.fit(X_va_sc_c3, y_va_c3)
    
    # Get calibrated predictions on validation set
    va_pred_after = calibrated_model_c3.predict_proba(X_va_sc_c3)
    logloss_after = log_loss(y_va_c3, va_pred_after)
    print(f"Validation LogLoss AFTER Calibration: {logloss_after:.6f}")
    print(f"Improvement: {logloss_before - logloss_after:.6f}")
    
    # Now retrain on full data and apply calibration
    print("\nRetraining on full data for final submission...")
    0
    X_train_full, X_cal, y_train_full, y_cal = train_test_split(
        X_c3, y, test_size=val_size, stratify=y, random_state=random_state
    )
    
    # Scale
    scaler_final_c3 = StandardScaler()
    X_train_full_sc = scaler_final_c3.fit_transform(X_train_full)
    X_cal_sc = scaler_final_c3.transform(X_cal)
    X_test_final_sc = scaler_final_c3.transform(X_test_c3)
    
    # Train MLP
    clf_c3_for_calib = MLPClassifier(
        hidden_layer_sizes=(512, 256), activation="relu", solver="adam",
        alpha=1e-4, batch_size=512, learning_rate_init=1e-3,
        max_iter=100, early_stopping=True, n_iter_no_change=5,
        random_state=random_state, verbose=False
    )
    clf_c3_for_calib.fit(X_train_full_sc, y_train_full)
    
    # Apply calibration on the hold-out 20%
    calibrated_final_c3 = CalibratedClassifierCV(
        clf_c3_for_calib,
        method='isotonic',
        cv='prefit',
        ensemble=False
    )
    calibrated_final_c3.fit(X_cal_sc, y_cal)
    
    C3_CALIBRATED_MODEL_PATH = "../models/candidate_3_MLP_CALIBRATED.pkl"
    C3_FINAL_SCALER_PATH = "../models/candidate_3_scaler_final.pkl"
    joblib.dump(calibrated_final_c3, C3_CALIBRATED_MODEL_PATH)
    joblib.dump(scaler_final_c3, C3_FINAL_SCALER_PATH)
    print(f"Candidate 3 Calibrated Model SAVED to {C3_CALIBRATED_MODEL_PATH}")
    print(f"Candidate 3 Final Scaler SAVED to {C3_FINAL_SCALER_PATH}")
    
    print(f"\n=== Candidate 3 Summary ===")
    print(f"Before Calibration - Val LogLoss: {logloss_before:.6f}")
    print(f"After Calibration  - Val LogLoss: {logloss_after:.6f}")
else:
    print("Error: clf_c3 model not found. Please run the training cell first.")


=== Candidate 3 (MLP All Features): Calibration ===


NameError: name 'train_test_split' is not defined

In [6]:
import joblib
import numpy as np
import pandas as pd
import torch
from scipy.special import softmax
from datasets import Dataset
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    DataCollatorWithPadding, 
    Trainer, 
    TrainingArguments
)
from peft import PeftModel


KAGGLE_MODEL_DIR = "../models"
KAGGLE_DATA_DIR = "../datasets"
BASE_DIR = KAGGLE_MODEL_DIR

try:
    # === 1. Candidate 1 (DeBERTa) ===
    print("\n[C1] Generating full training predictions for DeBERTa...")
    MODEL_NAME_C1 = "deberta-v3-base"
    LORA_ADAPTER_DIR = f"{BASE_DIR}/lora_adapter_{MODEL_NAME_C1}"
    C1_CALIBRATOR_PATH = f"{BASE_DIR}/candidate_1_calibrators.pkl"
    max_length = 512

    # # 1.1. 훈련된 모델과 토크나이저 로드 (01_local_training_output.ipynb의 결과물)
    # print("   [C1] Loading base model, LoRA adapter, and tokenizer...")
    # base_model_path = f"{BASE_DIR}/{MODEL_NAME_C1}"
    # base_model = AutoModelForSequenceClassification.from_pretrained(base_model_path, num_labels=3, local_files_only=True)
    # peft_model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_DIR)
    # peft_model.to(device)
    # peft_model.eval()
    # tokenizer_c1 = AutoTokenizer.from_pretrained(LORA_ADAPTER_DIR, local_files_only=True)

    # # 1.2. C1용 전처리 함수 (02_kaggle_inference.ipynb의 C1 로드 로직과 동일)
    # def preprocess_test_function_for_train(examples):
    #     response_pair = [f"A: {a} {tokenizer_c1.sep_token} B: {b}" for a, b in zip(examples['response_a'], examples['response_b'])]
    #     return tokenizer_c1(examples['prompt'], response_pair, max_length=max_length, truncation=True, padding='max_length')

    # # 1.3. *전체 train* 데이터를 토큰화
    # print("   [C1] Tokenizing full train dataset...")
    # full_train_dataset_c1 = Dataset.from_pandas(train)
    # tokenized_full_train_dataset_c1 = full_train_dataset_c1.map(
    #     preprocess_test_function_for_train, 
    #     batched=True, 
    #     remove_columns=train.columns.tolist()
    # )
    # tokenized_full_train_dataset_c1.set_format("torch")

    # # 1.4. 예측 수행 (Trainer 사용)
    # print("   [C1] Predicting on full train dataset...")
    # data_collator = DataCollatorWithPadding(tokenizer=tokenizer_c1)
    # trainer_args = TrainingArguments(output_dir="./temp_trainer", per_device_eval_batch_size=8, report_to="none")
    # trainer = Trainer(model=peft_model, args=trainer_args, data_collator=data_collator)
    
    # from torch.utils.data import DataLoader
    # dl = DataLoader(tokenized_full_train_dataset_c1, batch_size=8, collate_fn=data_collator)
    # all_logits_c1 = []
    # with torch.no_grad():
    #     for batch in dl:
    #         batch = {k: v.to(device) for k, v in batch.items()}
    #         outputs = peft_model(**batch)
    #         all_logits_c1.append(outputs.logits.cpu().numpy())
            
    # full_logits_c1 = np.vstack(all_logits_c1)
    # full_probs_c1_uncalibrated = softmax(full_logits_c1, axis=1)

    # # 1.5. 보정기(Calibrator) 로드 및 적용 (01_local_training_output.ipynb의 결과물)
    # print("   [C1] Calibrating predictions...")
    # calibrators_c1 = joblib.load(C1_CALIBRATOR_PATH)
    # full_train_pred_c1 = np.zeros_like(full_probs_c1_uncalibrated)
    # for class_idx in range(3):
    #     full_train_pred_c1[:, class_idx] = calibrators_c1[class_idx].predict(full_probs_c1_uncalibrated[:, class_idx])
    
    # row_sums_c1 = full_train_pred_c1.sum(axis=1, keepdims=True)
    # full_train_pred_c1 = full_train_pred_c1 / np.clip(row_sums_c1, 1e-15, None)
    
    # # 1.6. 저장
    # joblib.dump(full_train_pred_c1, f'{BASE_DIR}/full_train_pred_c1.pkl')
    # print(f"   ✅ Saved C1 predictions: {BASE_DIR}/full_train_pred_c1.pkl")
    # del peft_model, base_model, trainer, full_train_pred_c1 # 메모리 확보

    # === 2. C2/C3용 임베딩 (공통) ===
    print("\n[C2/C3] Generating full training embeddings (e5-base-v2)...")
    MODEL_NAME_C2_C3 = "e5-base-v2"
    C2_C3_MODEL_PATH = f"{BASE_DIR}/{MODEL_NAME_C2_C3}"
    
    sbert_model, _ = load_model([C2_C3_MODEL_PATH], idx=0, device=device)
    prompt_emb_full = encode_texts(sbert_model, train["prompt"])
    a_emb_full = encode_texts(sbert_model, train["response_a"])
    b_emb_full = encode_texts(sbert_model, train["response_b"])
    
    # 공통 임베딩 피처 생성
    X_emb_full = build_feat(prompt_emb_full, a_emb_full, b_emb_full)
    print("   [C2/C3] Embedding features generated.")
    del sbert_model, prompt_emb_full, a_emb_full, b_emb_full # 메모리 확보
    if device == 'cuda': torch.cuda.empty_cache()

    # === 3. Candidate 2 (XGBoost) ===
    print("\n[C2] Generating full training predictions for XGBoost...")
    GBM_CHOICE = "XGBOOST" # 01_local_training_output.ipynb에서 XGBoost를 사용함
    C2_CALIBRATED_MODEL_PATH = f"{BASE_DIR}/candidate_2_{GBM_CHOICE}_{MODEL_NAME_C2_C3}_CALIBRATED.pkl"

    # 3.1. 훈련된 보정 모델 로드 (01_local_training_output.ipynb의 결과물)
    calibrated_final_c2 = joblib.load(C2_CALIBRATED_MODEL_PATH)
    
    # 3.2. 예측 및 저장 (X_emb_full은 C2 피처와 동일)
    full_train_pred_c2 = calibrated_final_c2.predict_proba(X_emb_full)
    joblib.dump(full_train_pred_c2, f'{BASE_DIR}/full_train_pred_c2.pkl')
    print(f"   ✅ Saved C2 predictions: {BASE_DIR}/full_train_pred_c2.pkl")
    del calibrated_final_c2, full_train_pred_c2 # 메모리 확보

    # === 4. Candidate 3 (MLP) ===
    print("\n[C3] Generating full training predictions for MLP...")
    C3_CALIBRATED_MODEL_PATH = f"{BASE_DIR}/candidate_3_MLP_CALIBRATED.pkl"
    C3_FINAL_SCALER_PATH = f"{BASE_DIR}/candidate_3_scaler_final.pkl"

    # 4.1. 훈련된 보정 모델과 스케일러 로드 (01_local_training_output.ipynb의 결과물)
    calibrated_final_c3 = joblib.load(C3_CALIBRATED_MODEL_PATH)
    scaler_final_c3 = joblib.load(C3_FINAL_SCALER_PATH)

    # 4.2. 어휘 피처 생성
    print("   [C3] Building strong lexical features...")
    X_lex_full = build_strong_lexical_features(train)

    # 4.3. 피처 결합 및 스케일링
    X_lex_full = X_lex_full.fillna(0).replace([np.inf, -np.inf], 0)
    X_emb_full_safe = pd.DataFrame(X_emb_full).fillna(0).replace([np.inf, -np.inf], 0).values
    X_c3_full = np.hstack([X_lex_full, X_emb_full_safe])
    
    print("   [C3] Scaling full features...")
    X_c3_full_scaled = scaler_final_c3.transform(X_c3_full)

    # 4.4. 예측 및 저장
    print("   [C3] Predicting with calibrated MLP...")
    full_train_pred_c3 = calibrated_final_c3.predict_proba(X_c3_full_scaled)
    joblib.dump(full_train_pred_c3, f'{BASE_DIR}/full_train_pred_c3.pkl')
    print(f"   ✅ Saved C3 predictions: {BASE_DIR}/full_train_pred_c3.pkl")
    del calibrated_final_c3, scaler_final_c3, X_c3_full, X_c3_full_scaled, full_train_pred_c3 # 메모리 확보

    print("\n--- All full training predictions saved successfully! ---")

except FileNotFoundError as e:
    print(f"\n❌ ERROR: A required model file was not found: {e}")
    print("   Please ensure all models (.pkl, adapters) from '01_local_training_output' exist in the '{BASE_DIR}' directory.")
except NameError as e:
    print(f"\n❌ ERROR: A required function/variable was not found: {e}")
    print("   Please ensure all previous cells in '02_kaggle_inference.ipynb' were run.")
except Exception as e:
    print(f"\n❌ An unexpected error occurred: {e}")


[C1] Generating full training predictions for DeBERTa...

[C2/C3] Generating full training embeddings (e5-base-v2)...
try: ../models/e5-base-v2
loaded model from: ../models/e5-base-v2
   [C2/C3] Embedding features generated.

[C2] Generating full training predictions for XGBoost...


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


   ✅ Saved C2 predictions: ../models/full_train_pred_c2.pkl

[C3] Generating full training predictions for MLP...
   [C3] Building strong lexical features...
   [C3] Scaling full features...
   [C3] Predicting with calibrated MLP...
   ✅ Saved C3 predictions: ../models/full_train_pred_c3.pkl

--- All full training predictions saved successfully! ---


In [7]:
from sklearn.linear_model import LogisticRegression
import joblib
import numpy as np

ENSEMBLE_MODEL_PATH = f"{BASE_DIR}/meta_learner_stacked.pkl"

try:
    # Load FULL TRAIN SET predictions for C1, C2, C3
    full_train_pred_c1 = joblib.load(f'{BASE_DIR}/full_train_pred_c1.pkl')
    full_train_pred_c2 = joblib.load(f'{BASE_DIR}/full_train_pred_c2.pkl')
    full_train_pred_c3 = joblib.load(f'{BASE_DIR}/full_train_pred_c3.pkl')
    
    # 'y' should be loaded from the 'data_load' cell at the top of the notebook
    if 'y' not in locals():
        print("ERROR: 'y' (full training labels) not found. Cannot train meta-learner.")
        raise NameError("'y' is not defined.")
        
    print("Loaded C1, C2, C3 full training predictions.")
    
    # Build Meta-Features using FULL TRAIN SET predictions
    meta_features = np.hstack([full_train_pred_c1, full_train_pred_c2, full_train_pred_c3])
    y_full = y # Full labels
    
    print(f"Meta-features shape: {meta_features.shape}, Labels shape: {y_full.shape}")

    # Define the meta-learner (as used in 01_local_training)
    # Using multi_class='multinomial' is crucial for log_loss
    meta_learner = LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=random_state)
    
    print("Training meta-learner on full training data...")
    meta_learner.fit(meta_features, y_full)
    
    # Save the trained meta-learner
    joblib.dump(meta_learner, ENSEMBLE_MODEL_PATH)
    print(f"Meta-learner successfully trained and SAVED to: {ENSEMBLE_MODEL_PATH}")

except FileNotFoundError as e:
    print(f"ERROR: Could not load base model predictions: {e}")
    print("Cannot train or save meta-learner. Make sure .pkl files exist.")
except Exception as e:
    print(f"An unexpected error occurred during meta-learner training: {e}")

Loaded C1, C2, C3 full training predictions.
Meta-features shape: (57477, 9), Labels shape: (57477,)
Training meta-learner on full training data...
Meta-learner successfully trained and SAVED to: ../models/meta_learner_stacked.pkl


