In [1]:
!pip install --no-deps /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl

Processing /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0


In [2]:
import os
import sys
import gc

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

ROOT_PATH = os.getcwd()
if "/kaggle" in ROOT_PATH:
    ROOT_PATH = "/kaggle/input"
    sys.path.append(os.path.join(ROOT_PATH, "map-utilities"))

In [3]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

import torch
from datasets import Dataset
from peft import PeftModel
from transformers import BitsAndBytesConfig

from utils import (
    stringify_input,
    get_model_name,
    get_sequence_classifier,
    get_tokenizer,
    get_training_arguments,
    get_trainer,
)

2025-08-19 12:36:27.784007: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755606987.995563      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755606988.055996      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
le = joblib.load(os.path.join(ROOT_PATH, "map-utilities", "label_encoder.joblib"))
n_classes = len(le.classes_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
USE_LORA = True
USE_QLORA = False
MAX_LEN = 256

MODEL_VARIATIONS = [
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-r1-0528/transformers/deepseek-r1-0528-qwen3-8b/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-r1-0528-qwen3-8b-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_deepseek_r1_0528_qwen3_8b.csv"
    },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math/pytorch/deepseek-math-7b-instruct/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math-7b-instruct-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_deepseek_math_7b.csv"
    },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen-3/transformers/14b/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen3-14b-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_qwen3_14b.csv"
    },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma-2/transformers/gemma-2-9b-it/2"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma2-9b-it-qlora-4bit/transformers/default/1"
        ),
        "submission_file": "submission_gemma2_9b.csv"
    },
]

In [6]:
TRAIN_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "train.csv")
TEST_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "test.csv")

In [7]:
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

In [8]:
print("Training Shape:", train_df.shape)
print("Testing Shape:", test_df.shape)

Training Shape: (36696, 7)
Testing Shape: (3, 5)


In [9]:
idx = train_df.Category.str.contains("True", case=False)
tmp = train_df.loc[idx].copy()
tmp["c"] = tmp.groupby(["QuestionId", "MC_Answer"]).MC_Answer.transform("count")
tmp = tmp.sort_values("c", ascending=False)
tmp = tmp.drop_duplicates(["QuestionId"])
tmp = tmp[["QuestionId", "MC_Answer"]]
tmp["is_mc_answer_correct"] = True

train_df = train_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)

test_df = test_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)

  train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)
  test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)


In [10]:
qlora_config = {
    "torch_dtype": torch.float16,
    "device_map": "auto",
}
if USE_QLORA:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_storage=torch.float16,
    )
    qlora_config["quantization_config"] = bnb_config

In [11]:
def predict_test_data(model_name, adapter_path, submission_file):
    seq_model = get_sequence_classifier(model_name, n_classes, qlora_config)
    tokenizer = get_tokenizer(model_name)

    def tokenize_function(examples):
        return tokenizer(examples["stringified_input"], truncation=True)

    if (
        "gemma" in model_name.lower()
        or "qwen" in model_name.lower()
        or "deepseek-math" in model_name.lower()
    ):
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            tokenizer.pad_token_id = tokenizer.eos_token_id
        seq_model.config.pad_token_id = tokenizer.pad_token_id

    if USE_LORA:
        seq_model = PeftModel.from_pretrained(seq_model, adapter_path)

    test_df["stringified_input"] = test_df.apply(
        lambda row: stringify_input(row, model_name), axis=1
    )

    test_ds = Dataset.from_pandas(test_df[["stringified_input"]])
    test_ds = test_ds.map(tokenize_function, batched=True)

    training_args = get_training_arguments(bf16_support="/kaggle" not in ROOT_PATH)
    trainer = get_trainer(
        seq_model,
        tokenizer,
        training_args,
        test_ds,
        test_ds,
    )

    predictions = trainer.predict(test_ds)
    probs = torch.nn.functional.softmax(
        torch.tensor(predictions.predictions), dim=1
    ).numpy()

    top3 = np.argsort(-probs, axis=1)[:, :3]

    flat_top3 = top3.flatten()
    decoded_labels = le.inverse_transform(flat_top3)
    top3_labels = decoded_labels.reshape(top3.shape)

    joined_preds = ["|".join(row) for row in top3_labels]

    sub = pd.DataFrame(
        {"row_id": test_df.row_id.values, "Category:Misconception": joined_preds}
    )
    sub.to_csv(submission_file, index=False)

    del seq_model
    del tokenizer
    del training_args
    del trainer
    del test_ds
    del predictions
    del probs
    del top3
    del flat_top3
    del decoded_labels
    del top3_labels
    del joined_preds

    for obj in list(globals().keys()):
        if isinstance(globals()[obj], torch.nn.Module) or isinstance(globals()[obj], torch.Tensor):
            del globals()[obj]

    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    gc.collect()

In [12]:
for model_variation in MODEL_VARIATIONS:
    predict_test_data(
        model_name=model_variation["model_name"],
        adapter_path=model_variation["adapter_path"],
        submission_file=model_variation["submission_file"]
    )

Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-r1-0528/transformers/deepseek-r1-0528-qwen3-8b/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/qwen-3/transformers/14b/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/gemma-2/transformers/gemma-2-9b-it/2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  return Trainer(


### Ensemble Predictions

In [13]:
from collections import defaultdict


# https://www.kaggle.com/code/bibanh/lb-0-944-the-art-of-ensemble#4.-ENSEMBLE-EVERYTHING
def get_top_k_ensemble(list_of_predictions, k=3):
    predictions = []
    weights = []
    for lp in list_of_predictions:
        predictions.append(lp.split("|"))
        weights.append(4)
    score = defaultdict(int)

    for i, lst in enumerate(predictions):
        weight = weights[i]
        for rank, item in enumerate(lst):
            score[item] += (len(lst) - rank) * weight

    sorted_items = sorted(score.items(), key=lambda x: -x[1])
    return ' '.join([item for item, _ in sorted_items[:k]])

In [14]:
dfs = {}
for model_variation in MODEL_VARIATIONS:
    model_name = model_variation["submission_file"].replace("submission_", "").replace(".csv", "")
    df = pd.read_csv(model_variation["submission_file"])
    dfs[model_name] = df

ensemble_df = dfs[model_name][['row_id']].copy()
for model_name in dfs:
    ensemble_df[f"predictions_{model_name}"] = dfs[model_name]["Category:Misconception"]

print("Ensemble df shape:", ensemble_df.shape)

Ensemble df shape: (3, 5)


In [15]:
ensemble_df["Category:Misconception"] = ensemble_df.apply(
    lambda row: get_top_k_ensemble(
        [row[f"predictions_{model_variations['submission_file'].replace('submission_', '').replace('.csv', '')}"] for model_variations in MODEL_VARIATIONS],
    ),
    axis=1,
)
ensemble_df[['row_id', 'Category:Misconception']].to_csv('submission.csv', index = False)
pd.read_csv('submission.csv')

Unnamed: 0,row_id,Category:Misconception
0,36696,True_Correct:NA True_Neither:NA True_Misconcep...
1,36697,False_Misconception:WNB False_Misconception:In...
2,36698,True_Neither:NA True_Correct:NA True_Misconcep...
