In [1]:
!pip install --no-deps /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
!pip install --no-deps /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/huggingface_hub-0.34.4-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/accelerate-1.10.0-py3-none-any.whl

Processing /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.55.3
Processing /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
Processing /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
Installing collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.15.2
    Uninstalling peft-0.15.2:
      Successfully uninstalled peft-0.15.2
Successfully installed peft-0.17.1
Processing /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
Installing collected packages: datasets
  Attempting uninstall: datasets
    Found existing installa

In [2]:
import os
import sys
import gc
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

ROOT_PATH = os.getcwd()
if "/kaggle" in ROOT_PATH:
    ROOT_PATH = "/kaggle/input"
    sys.path.append(os.path.join(ROOT_PATH, "map-utilities"))

In [3]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

import torch
from datasets import Dataset
from peft import PeftModel
from transformers import BitsAndBytesConfig

from utils import (
    stringify_input,
    get_model_name,
    get_sequence_classifier,
    get_tokenizer,
    get_training_arguments,
    get_trainer,
)

2025-09-29 01:12:25.697552: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759108346.070557      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759108346.176016      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
le = joblib.load(os.path.join(ROOT_PATH, "map-utilities", "label_encoder.joblib"))
n_classes = len(le.classes_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
MAX_LEN = 256
TOP_K = 10

BITS = 4
USE_4BIT = BITS == 4
USE_8BIT = BITS == 8

MODEL_VARIATIONS = [
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math/pytorch/deepseek-math-7b-instruct/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math-7b-instruct-qlora-4bit-5fold/transformers/default/1"
        ),
        "submission_file": "submission_deepseek_math_7b.csv",
        "use_lora": True,
        "use_qlora": False,
        "n_fold": 5,
    },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma-2/transformers/gemma-2-9b-it/2"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "gemma2-9b-it-qlora-4bit/transformers/default/2"
    #     ),
    #     "submission_file": "submission_gemma2_9b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    #     "n_fold": 5,
    # },
    # {
    #     "model_name": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen-3-embedding/transformers/4b/1"
    #     ),
    #     "adapter_path": get_model_name(
    #         "/kaggle" in ROOT_PATH, ROOT_PATH, "qwen3-embedding-4b/transformers/default/1"
    #     ),
    #     "submission_file": "submission_qwen3_embedding_4b.csv",
    #     "use_lora": True,
    #     "use_qlora": False,
    #     "n_fold": 5,
    # },
    {
        "model_name": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "ettin-encoder-1b-5fold/transformers/default/1"
        ),
        "adapter_path": get_model_name(
            "/kaggle" in ROOT_PATH, ROOT_PATH, "ettin-encoder-1b-5fold/transformers/default/1"
        ),
        "submission_file": "submission_ettin_1b.csv",
        "use_lora": False,
        "use_qlora": False,
        "n_fold": 5,
    },
]

In [6]:
TRAIN_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "train.csv")
TEST_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "test.csv")

In [7]:
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

In [8]:
print("Training Shape:", train_df.shape)
print("Testing Shape:", test_df.shape)

Training Shape: (36696, 7)
Testing Shape: (3, 5)


In [9]:
idx = train_df.Category.str.contains("True", case=False)
tmp = train_df.loc[idx].copy()
tmp["c"] = tmp.groupby(["QuestionId", "MC_Answer"]).MC_Answer.transform("count")
tmp = tmp.sort_values("c", ascending=False)
tmp = tmp.drop_duplicates(["QuestionId"])
tmp = tmp[["QuestionId", "MC_Answer"]]
tmp["is_mc_answer_correct"] = True

train_df = train_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)

test_df = test_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)

  train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)
  test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)


In [10]:
def clear_memory():
    for obj in list(globals().keys()):
        if isinstance(globals()[obj], torch.nn.Module) or isinstance(globals()[obj], torch.Tensor):
            del globals()[obj]

    torch.cuda.empty_cache()
    gc.collect()
    torch.cuda.ipc_collect()

    time.sleep(1)

In [11]:
def generate_test_predictions(model_name, adapter_path, use_lora, n_fold, submission_file):
    """Generate test predictions using all fold models"""
    print(f"\n{'=' * 60}")
    print("GENERATING TEST PREDICTIONS")
    print(f"{'=' * 60}")

    # Prepare test data
    test_df["stringified_input"] = test_df.apply(
        lambda row: stringify_input(row, model_name), axis=1
    )

    all_test_predictions = []

    for fold_idx in range(n_fold):
        print(f"Loading fold {fold_idx + 1} model...")

        # Load tokenizer

        model_path = os.path.join(model_name, f"fold_{fold_idx}")
        if use_lora:
            model_path = model_name

        tokenizer = get_tokenizer(model_path)

        # Prepare test dataset
        test_ds = Dataset.from_pandas(test_df[["stringified_input"]])

        def tokenize_function(examples):
            return tokenizer(examples["stringified_input"])

        test_ds = test_ds.map(tokenize_function, batched=True)

        # Load model and generate predictions
        qlora_config = {
            "torch_dtype": torch.float16,
        }
        if "ettin" not in model_name.lower():
            qlora_config["device_map"] = "auto"
        seq_model = get_sequence_classifier(model_path, n_classes, qlora_config)

        # Handle padding token
        if (
            "gemma" in model_name.lower()
            or "qwen" in model_name.lower()
            or "deepseek-math" in model_name.lower()
            or "llama-3.1" in model_name.lower()
            or "acemath" in model_name.lower()
        ):
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token
                tokenizer.pad_token_id = tokenizer.eos_token_id
            seq_model.config.pad_token_id = tokenizer.pad_token_id

        if use_lora:
            fold_model_path = os.path.join(adapter_path, f"fold_{fold_idx}")
            seq_model = PeftModel.from_pretrained(seq_model, fold_model_path)

        # Create trainer for inference
        training_args = get_training_arguments(
            bf16_support="/kaggle" not in ROOT_PATH,
        )
        trainer = get_trainer(seq_model, tokenizer, training_args, test_ds, test_ds)

        # Generate predictions
        predictions = trainer.predict(test_ds)
        probs = torch.nn.functional.softmax(
            torch.tensor(predictions.predictions), dim=1
        ).numpy()

        all_test_predictions.append(probs)

        # Clean up
        del seq_model, tokenizer, training_args, trainer
        del test_ds, predictions, probs
        clear_memory()
        clear_memory()
        clear_memory()
        clear_memory()

    # Ensemble predictions (simple average)
    ensemble_predictions = np.mean(all_test_predictions, axis=0)

    # Generate submission
    topk = np.argsort(-ensemble_predictions, axis=1)[:, :TOP_K]
    flat_topk = topk.flatten()
    decoded_labels = le.inverse_transform(flat_topk)
    topk_labels = decoded_labels.reshape(topk.shape)

    joined_preds = ["|".join(row) for row in topk_labels]

    submission = pd.DataFrame(
        {"row_id": test_df.row_id.values, "Category:Misconception": joined_preds}
    )
    submission.to_csv(submission_file, index=False)

    print(f"Test predictions saved to '{submission_file}'")
    return ensemble_predictions, submission

In [12]:
for model_variation in MODEL_VARIATIONS:
    test_predictions, submission = generate_test_predictions(
        model_variation["model_name"],
        model_variation["adapter_path"],
        model_variation["use_lora"],
        model_variation["n_fold"],
        model_variation["submission_file"],
    )


GENERATING TEST PREDICTIONS
Loading fold 1 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 2 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 3 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 4 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 5 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Test predictions saved to 'submission_deepseek_math_7b.csv'

GENERATING TEST PREDICTIONS
Loading fold 1 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading fold 2 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading fold 3 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading fold 4 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Loading fold 5 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  return Trainer(


Test predictions saved to 'submission_ettin_1b.csv'


In [13]:
from collections import defaultdict


# https://www.kaggle.com/code/bibanh/lb-0-944-the-art-of-ensemble#4.-ENSEMBLE-EVERYTHING
def get_top_k_ensemble(list_of_predictions, k=3):
    predictions = []
    weights = []
    for i, lp in enumerate(list_of_predictions):
        predictions.append(lp.split("|"))
        w = 4.0
        weights.append(w)
    score = defaultdict(int)

    for i, lst in enumerate(predictions):
        weight = weights[i]
        for rank, item in enumerate(lst):
            score[item] += (len(lst) - rank) * weight

    sorted_items = sorted(score.items(), key=lambda x: -x[1])
    return ' '.join([item for item, _ in sorted_items[:k]])

In [14]:
dfs = {}
for model_variation in MODEL_VARIATIONS:
    model_name = model_variation["submission_file"].replace("submission_", "").replace(".csv", "")
    df = pd.read_csv(model_variation["submission_file"])
    dfs[model_name] = df

ensemble_df = dfs[model_name][['row_id']].copy()
for model_name in dfs:
    ensemble_df[f"predictions_{model_name}"] = dfs[model_name]["Category:Misconception"]

print("Ensemble df shape:", ensemble_df.shape)

Ensemble df shape: (3, 3)


In [15]:
ensemble_df["Category:Misconception"] = ensemble_df.apply(
    lambda row: get_top_k_ensemble(
        [
            row[
                f"predictions_{model_variations['submission_file'].replace('submission_', '').replace('.csv', '')}"
            ]
            for model_variations in MODEL_VARIATIONS
        ],
    ),
    axis=1,
)
ensemble_df[["row_id", "Category:Misconception"]].to_csv("submission.csv", index=False)
pd.read_csv("submission.csv")

Unnamed: 0,row_id,Category:Misconception
0,36696,True_Correct:NA True_Neither:NA True_Misconcep...
1,36697,False_Misconception:WNB False_Neither:NA False...
2,36698,True_Neither:NA True_Correct:NA True_Misconcep...
