In [1]:
!pip install --no-deps /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
!pip install --no-deps /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/huggingface_hub-0.34.4-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/accelerate-1.10.0-py3-none-any.whl

Processing /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.55.3
Processing /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
Processing /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
Installing collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.15.2
    Uninstalling peft-0.15.2:
      Successfully uninstalled peft-0.15.2
Successfully installed peft-0.17.1
Processing /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
Installing collected packages: datasets
  Attempting uninstall: datasets
    Found existing installa

In [2]:
import os
import sys
import gc
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

ROOT_PATH = os.getcwd()
if "/kaggle" in ROOT_PATH:
    ROOT_PATH = "/kaggle/input"
    sys.path.append(os.path.join(ROOT_PATH, "map-utilities"))

In [3]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

import torch
from datasets import Dataset
from peft import PeftModel
from transformers import BitsAndBytesConfig

from utils import (
    stringify_input,
    get_model_name,
    get_sequence_classifier,
    get_tokenizer,
    get_training_arguments,
    get_trainer,
)

2025-09-18 03:18:45.565206: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758165525.761607      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758165525.814822      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
le = joblib.load(os.path.join(ROOT_PATH, "map-utilities", "label_encoder.joblib"))
n_classes = len(le.classes_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
USE_LORA = True
USE_QLORA = False
MAX_LEN = 256
TOP_K = 10

MODEL_VARIATIONS = [
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen-3/transformers/14b/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen3-14b-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_qwen3_14b.csv",
        "use_lora": True,
        "use_qlora": False,
    },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-r1-0528/transformers/deepseek-r1-0528-qwen3-8b/1"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-r1-0528-qwen3-8b-qlora-4bit/transformers/default/1"
    #     ),
    #     "submission_file": "submission_deepseek_r1_0528_qwen3_8b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    # },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math/pytorch/deepseek-math-7b-instruct/1"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math-7b-instruct-qlora-4bit/transformers/default/2"
    #     ),
    #     "submission_file": "submission_deepseek_math_7b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    # },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma-2/transformers/gemma-2-9b-it/2"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma2-9b-it-qlora-4bit/transformers/default/2"
        ),
        "submission_file": "submission_gemma2_9b.csv",
        "use_lora": True,
        "use_qlora": False,
    },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen-3-embedding/transformers/4b/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen3-embedding-4b-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_qwen3_embedding_4b.csv",
        "use_lora": True,
        "use_qlora": False,
    },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "llama-3.1/transformers/8b-instruct/2"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "llama-3.1-8b-instruct-qlora-4bit/transformers/default/1"
    #     ),
    #     "submission_file": "submission_llama31_8b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    # },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "nvidia-acereason-nemotron-1.1-7b/transformers/default/1"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "acereason-nemotron-1.1-7b-qlora-4bit/transformers/default/2"
    #     ),
    #     "submission_file": "submission_acereason_7b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    # },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "ettin-encoder-1b/transformers/default/6"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "ettin-encoder-1b/transformers/default/6"
        ),
        "submission_file": "submission_ettin_1b.csv",
        "use_lora": False,
        "use_qlora": False,
    },
]

In [6]:
TRAIN_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "train.csv")
TEST_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "test.csv")

In [7]:
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

In [8]:
print("Training Shape:", train_df.shape)
print("Testing Shape:", test_df.shape)

Training Shape: (36696, 7)
Testing Shape: (3, 5)


In [9]:
idx = train_df.Category.str.contains("True", case=False)
tmp = train_df.loc[idx].copy()
tmp["c"] = tmp.groupby(["QuestionId", "MC_Answer"]).MC_Answer.transform("count")
tmp = tmp.sort_values("c", ascending=False)
tmp = tmp.drop_duplicates(["QuestionId"])
tmp = tmp[["QuestionId", "MC_Answer"]]
tmp["is_mc_answer_correct"] = True

train_df = train_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)

test_df = test_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)

  train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)
  test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)


In [10]:
def clear_memory():
    for obj in list(globals().keys()):
        if isinstance(globals()[obj], torch.nn.Module) or isinstance(globals()[obj], torch.Tensor):
            del globals()[obj]

    torch.cuda.empty_cache()
    gc.collect()
    torch.cuda.ipc_collect()

    time.sleep(1)

In [11]:
def predict_test_data(model_name, adapter_path, submission_file, use_lora, use_qlora):
    qlora_config = {
        "torch_dtype": torch.float16,
    }
    if "ettin" not in model_name.lower():
        qlora_config["device_map"] = "auto"

    if use_qlora:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_storage=torch.float16,
        )
        qlora_config["quantization_config"] = bnb_config

    seq_model = get_sequence_classifier(model_name, n_classes, qlora_config)
    tokenizer = get_tokenizer(model_name)

    def tokenize_function(examples):
        return tokenizer(examples["stringified_input"])

    if (
        "gemma" in model_name.lower()
        or "qwen" in model_name.lower()
        or "deepseek-math" in model_name.lower()
        or "llama-3.1" in model_name.lower()
    ):
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            tokenizer.pad_token_id = tokenizer.eos_token_id
        seq_model.config.pad_token_id = tokenizer.pad_token_id

    if use_lora:
        seq_model = PeftModel.from_pretrained(seq_model, adapter_path)

    test_df["stringified_input"] = test_df.apply(
        lambda row: stringify_input(row, model_name), axis=1
    )

    test_ds = Dataset.from_pandas(test_df[["stringified_input"]])
    test_ds = test_ds.map(tokenize_function, batched=True)

    training_args = get_training_arguments(bf16_support="/kaggle" not in ROOT_PATH)
    trainer = get_trainer(
        seq_model,
        tokenizer,
        training_args,
        test_ds,
        test_ds,
    )

    predictions = trainer.predict(test_ds)
    probs = torch.nn.functional.softmax(
        torch.tensor(predictions.predictions), dim=1
    ).numpy()

    topk = np.argsort(-probs, axis=1)[:, :TOP_K]

    flat_topk = topk.flatten()
    decoded_labels = le.inverse_transform(flat_topk)
    topk_labels = decoded_labels.reshape(topk.shape)

    joined_preds = ["|".join(row) for row in topk_labels]

    sub = pd.DataFrame(
        {"row_id": test_df.row_id.values, "Category:Misconception": joined_preds}
    )
    sub.to_csv(submission_file, index=False)

    prob_data = []
    for i in range(len(predictions)):
        prob_dict = {f"prob_{j}": probs[i, topk[i, j]] for j in range(TOP_K)}
        prob_dict['row_id'] = test_df.row_id.values[i]
        prob_dict['top_classes'] = " ".join(topk_labels[i, :TOP_K])
        prob_data.append(prob_dict)

    sub_prob = pd.DataFrame(prob_data)
    sub_prob.to_csv(submission_file.replace("submission_", "submission_probs_"), index=False)

    del seq_model, tokenizer, training_args, trainer
    del test_ds, predictions, probs, topk, flat_topk, decoded_labels, topk_labels, joined_preds, sub

    clear_memory()
    clear_memory()
    clear_memory()
    clear_memory()

In [12]:
for model_variation in MODEL_VARIATIONS:
    predict_test_data(
        model_name=model_variation["model_name"],
        adapter_path=model_variation["adapter_path"],
        submission_file=model_variation["submission_file"],
        use_lora=model_variation["use_lora"],
        use_qlora=model_variation["use_qlora"],
    )

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/qwen-3/transformers/14b/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/gemma-2/transformers/gemma-2-9b-it/2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/qwen-3-embedding/transformers/4b/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


### Ensemble Predictions

In [13]:
from collections import defaultdict


# https://www.kaggle.com/code/bibanh/lb-0-944-the-art-of-ensemble#4.-ENSEMBLE-EVERYTHING
def get_top_k_ensemble(list_of_predictions, k=3):
    predictions = []
    weights = []
    for i, lp in enumerate(list_of_predictions):
        predictions.append(lp.split("|"))
        w = 4
        weights.append(w)
    score = defaultdict(int)

    for i, lst in enumerate(predictions):
        weight = weights[i]
        for rank, item in enumerate(lst):
            score[item] += (len(lst) - rank) * weight

    sorted_items = sorted(score.items(), key=lambda x: -x[1])
    return ' '.join([item for item, _ in sorted_items[:k]])

In [14]:
dfs = {}
for model_variation in MODEL_VARIATIONS:
    model_name = model_variation["submission_file"].replace("submission_", "").replace(".csv", "")
    df = pd.read_csv(model_variation["submission_file"])
    dfs[model_name] = df

ensemble_df = dfs[model_name][['row_id']].copy()
for model_name in dfs:
    ensemble_df[f"predictions_{model_name}"] = dfs[model_name]["Category:Misconception"]

print("Ensemble df shape:", ensemble_df.shape)

Ensemble df shape: (3, 5)


In [15]:
ensemble_df["Category:Misconception"] = ensemble_df.apply(
    lambda row: get_top_k_ensemble(
        [
            row[
                f"predictions_{model_variations['submission_file'].replace('submission_', '').replace('.csv', '')}"
            ]
            for model_variations in MODEL_VARIATIONS
        ],
    ),
    axis=1,
)
ensemble_df[["row_id", "Category:Misconception"]].to_csv("submission.csv", index=False)
pd.read_csv("submission.csv")

Unnamed: 0,row_id,Category:Misconception
0,36696,True_Correct:NA True_Neither:NA True_Misconcep...
1,36697,False_Misconception:WNB False_Neither:NA False...
2,36698,True_Correct:NA True_Neither:NA True_Misconcep...


In [16]:
def extract_class_probabilities(row, model_suffix, top_k):
    """Extract class names and probabilities from a row"""
    # Get top classes
    classes_col = f"top_classes{model_suffix}"
    if classes_col in row:
        classes = row[classes_col].split("|")[:top_k]
    else:
        return {}
    # Get probabilities
    class_probs = {}
    for i in range(min(top_k, len(classes))):
        prob_col = f"prob_{i}{model_suffix}"
        if prob_col in row:
            class_probs[classes[i]] = row[prob_col]
    return class_probs


def ensemble_with_disagreement_handling(prob_files, model_weights, top_k=3):
    n_models = len(prob_files)
    prob_dfs = []
    final_predictions = []

    for file_path in prob_files:
        df = pd.read_csv(file_path)
        prob_dfs.append(df)

    # Merge on row_id
    merged_df = prob_dfs[0]
    for i, df in enumerate(prob_dfs[1:], 1):
        merged_df = pd.merge(
            merged_df, df, on="row_id", suffixes=("", f"_model{i + 1}")
        )

    for idx, row in merged_df.iterrows():
        # Extract probabilities from each model
        all_class_probs = []
        for i in range(n_models):
            suffix = f"_model{i + 1}" if i > 0 else ""
            class_probs = extract_class_probabilities(row, suffix, TOP_K)
            all_class_probs.append(class_probs)

        # Get all unique classes
        all_classes = set()
        for class_probs in all_class_probs:
            all_classes.update(class_probs.keys())

        # Calculate agreement and disagreement
        class_votes = defaultdict(int)
        class_total_prob = defaultdict(float)
        class_max_prob = defaultdict(float)

        for i, class_probs in enumerate(all_class_probs):
            weight = model_weights[i]

            for class_name, prob in class_probs.items():
                class_votes[class_name] += 1
                class_total_prob[class_name] += prob * weight
                class_max_prob[class_name] = max(
                    class_max_prob[class_name], prob * weight
                )

        final_scores = {}
        for class_name in all_classes:
            # Base score: weighted average probability
            base_score = class_total_prob[class_name]

            # Agreement : classes predicted by more models get boost
            agreement_bonus = class_votes[class_name] / n_models

            # Confidence bonus: classes with high max probability get boost
            confidence_bonus = class_max_prob[class_name]

            # Combined score
            final_scores[class_name] = (
                base_score * 0.6  # 60% base probs
                + agreement_bonus * 0.3  # 30% agreement
                + confidence_bonus * 0.1  # 10% confidence
            )

        # Sort and get top-k
        sorted_classes = sorted(final_scores.items(), key=lambda x: -x[1])
        top_classes = [class_name for class_name, _ in sorted_classes[:top_k]]

        final_predictions.append(" ".join(top_classes))

    return final_predictions

In [17]:
prob_files = []
weights = []
for i, model in enumerate(MODEL_VARIATIONS):
    prob_files.append(model["submission_file"].replace("submission_", "submission_probs_"))
    weights.append(4)

predictions = ensemble_with_disagreement_handling(
    prob_files,
    weights,
    top_k=3,
)

In [18]:
submission = pd.DataFrame({
    'row_id': test_df.row_id.values,
    'Category:Misconception': predictions
})
submission.to_csv('submission1.csv', index=False)
submission

Unnamed: 0,row_id,Category:Misconception
0,36696,True_Correct:NA True_Neither:NA True_Misconcep...
1,36697,False_Misconception:WNB False_Neither:NA False...
2,36698,True_Correct:NA True_Neither:NA True_Misconcep...
