In [1]:
!pip install --no-deps /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
!pip install --no-deps /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/huggingface_hub-0.34.4-py3-none-any.whl
!pip install --no-deps /kaggle/input/map-utilities/accelerate-1.10.0-py3-none-any.whl

Processing /kaggle/input/map-utilities/transformers-4.55.3-py3-none-any.whl
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.55.3
Processing /kaggle/input/map-utilities/bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
Processing /kaggle/input/map-utilities/peft-0.17.1-py3-none-any.whl
Installing collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.15.2
    Uninstalling peft-0.15.2:
      Successfully uninstalled peft-0.15.2
Successfully installed peft-0.17.1
Processing /kaggle/input/map-utilities/datasets-4.0.0-py3-none-any.whl
Installing collected packages: datasets
  Attempting uninstall: datasets
    Found existing installa

In [2]:
import os
import sys
import time
import gc

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

ROOT_PATH = os.getcwd()
if "/kaggle" in ROOT_PATH:
    ROOT_PATH = "/kaggle/input"
    sys.path.append(os.path.join(ROOT_PATH, "map-utilities"))

In [3]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

import torch
from datasets import Dataset
from peft import PeftModel, LoraConfig, TaskType
from transformers import BitsAndBytesConfig

from utils import (
    stringify_input,
    get_model_name,
    get_sequence_classifier,
    get_tokenizer,
    get_training_arguments,
    get_trainer,
)

2025-09-26 17:51:02.661123: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758909063.011549      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758909063.114286      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
le = joblib.load(os.path.join(ROOT_PATH, "map-utilities", "label_encoder.joblib"))
n_classes = len(le.classes_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
N_FOLDS = 5
TOP_K = 10

BASE_MODEL = "deepseek-math/pytorch/deepseek-math-7b-instruct/1"

if "/kaggle" not in ROOT_PATH:
    BASE_MODEL = os.path.join(ROOT_PATH, BASE_MODEL)

MODEL_NAME = get_model_name("/kaggle" in ROOT_PATH, ROOT_PATH, BASE_MODEL)
ADAPTER_PATH = get_model_name("/kaggle" in ROOT_PATH, ROOT_PATH, "deepseek-math-7b-instruct-qlora-4bit-5fold/transformers/default/1")

USE_LORA = True
USE_QLORA = False
BITS = 4
USE_4BIT = BITS == 4
USE_8BIT = BITS == 8

In [6]:
TRAIN_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "train.csv")
TEST_PATH = os.path.join(ROOT_PATH, "map-charting-student-math-misunderstandings", "test.csv")

In [7]:
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

In [8]:
print("Training Shape:", train_df.shape)
print("Testing Shape:", test_df.shape)

Training Shape: (36696, 7)
Testing Shape: (3, 5)


In [9]:
idx = train_df.Category.str.contains("True", case=False)
tmp = train_df.loc[idx].copy()
tmp["c"] = tmp.groupby(["QuestionId", "MC_Answer"]).MC_Answer.transform("count")
tmp = tmp.sort_values("c", ascending=False)
tmp = tmp.drop_duplicates(["QuestionId"])
tmp = tmp[["QuestionId", "MC_Answer"]]
tmp["is_mc_answer_correct"] = True

train_df = train_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)

test_df = test_df.merge(tmp, on=["QuestionId", "MC_Answer"], how="left")
test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)

  train_df.is_mc_answer_correct = train_df.is_mc_answer_correct.fillna(False)
  test_df.is_mc_answer_correct = test_df.is_mc_answer_correct.fillna(False)


In [10]:
def clear_memory():
    for obj in list(globals().keys()):
        if isinstance(globals()[obj], torch.nn.Module) or isinstance(globals()[obj], torch.Tensor):
            del globals()[obj]

    torch.cuda.empty_cache()
    gc.collect()
    torch.cuda.ipc_collect()

    time.sleep(1)

In [11]:
def setup_model_config():
    """Setup model configuration for each fold"""
    # LoRA configuration
    lora_config = None
    if USE_LORA:
        R = 8
        lora_config = LoraConfig(
            r=R,
            lora_alpha=R * 4,
            target_modules=[
                "q_proj",
                "k_proj",
                "v_proj",
                "o_proj",
                "down_proj",
                "up_proj",
                "gate_proj",
            ],
            lora_dropout=0.05,
            task_type=TaskType.SEQ_CLS,
            inference_mode=False,
        )

    # Quantization configuration
    q_lora_config = {"torch_dtype": torch.bfloat16}
    if "ettin" not in MODEL_NAME.lower():
        q_lora_config["device_map"] = "auto"

    if USE_QLORA:
        kwargs = {}
        if USE_4BIT:
            kwargs = {
                "load_in_4bit": True,
                "bnb_4bit_quant_type": "nf4",
                "bnb_4bit_compute_dtype": torch.bfloat16,
                "bnb_4bit_use_double_quant": True,
                "bnb_4bit_quant_storage": torch.bfloat16,
            }
        if USE_8BIT:
            kwargs = {"load_in_8bit": True}

        bnb_config = BitsAndBytesConfig(**kwargs)
        q_lora_config["quantization_config"] = bnb_config

    return lora_config, q_lora_config

In [12]:
def generate_test_predictions():
    """Generate test predictions using all fold models"""
    print(f"\n{'=' * 60}")
    print("GENERATING TEST PREDICTIONS")
    print(f"{'=' * 60}")

    # Prepare test data
    test_df["stringified_input"] = test_df.apply(
        lambda row: stringify_input(row, MODEL_NAME), axis=1
    )

    all_test_predictions = []

    for fold_idx in range(N_FOLDS):
        print(f"Loading fold {fold_idx + 1} model...")

        # Load tokenizer

        model_path = os.path.join(MODEL_NAME, f"fold_{fold_idx}")
        if USE_LORA:
            model_path = MODEL_NAME

        tokenizer = get_tokenizer(model_path)

        # Prepare test dataset
        test_ds = Dataset.from_pandas(test_df[["stringified_input"]])

        def tokenize_function(examples):
            return tokenizer(examples["stringified_input"])

        test_ds = test_ds.map(tokenize_function, batched=True)

        # Load model and generate predictions
        lora_config, q_lora_config = setup_model_config()
        seq_model = get_sequence_classifier(model_path, n_classes, q_lora_config)

        # Handle padding token
        if (
            "gemma" in MODEL_NAME.lower()
            or "qwen" in MODEL_NAME.lower()
            or "deepseek-math" in MODEL_NAME.lower()
            or "llama-3.1" in MODEL_NAME.lower()
            or "acemath" in MODEL_NAME.lower()
        ):
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token
                tokenizer.pad_token_id = tokenizer.eos_token_id
            seq_model.config.pad_token_id = tokenizer.pad_token_id

        if USE_LORA:
            fold_model_path = os.path.join(ADAPTER_PATH, f"fold_{fold_idx}")
            seq_model = PeftModel.from_pretrained(seq_model, fold_model_path)

        # Create trainer for inference
        training_args = get_training_arguments(
            bf16_support="/kaggle" not in ROOT_PATH,
        )
        trainer = get_trainer(seq_model, tokenizer, training_args, test_ds, test_ds)

        # Generate predictions
        predictions = trainer.predict(test_ds)
        probs = torch.nn.functional.softmax(
            torch.tensor(predictions.predictions), dim=1
        ).numpy()

        all_test_predictions.append(probs)

        # Clean up
        del seq_model, tokenizer, training_args, trainer
        del test_ds, predictions, probs
        clear_memory()
        clear_memory()
        clear_memory()
        clear_memory()

    # Ensemble predictions (simple average)
    ensemble_predictions = np.mean(all_test_predictions, axis=0)

    # Generate submission
    topk = np.argsort(-ensemble_predictions, axis=1)[:, :TOP_K]
    flat_topk = topk.flatten()
    decoded_labels = le.inverse_transform(flat_topk)
    topk_labels = decoded_labels.reshape(topk.shape)

    joined_preds = [" ".join(row) for row in topk_labels]

    submission = pd.DataFrame(
        {"row_id": test_df.row_id.values, "Category:Misconception": joined_preds}
    )
    submission.to_csv("submission.csv", index=False)

    print("Test predictions saved to 'submission.csv'")
    return ensemble_predictions, submission

In [13]:
test_predictions, submission = generate_test_predictions()


GENERATING TEST PREDICTIONS
Loading fold 1 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 2 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 3 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 4 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Loading fold 5 model...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/deepseek-math/pytorch/deepseek-math-7b-instruct/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return Trainer(


Test predictions saved to 'submission.csv'


In [14]:
submission

Unnamed: 0,row_id,Category:Misconception
0,36696,True_Correct:NA True_Neither:NA True_Misconcep...
1,36697,False_Misconception:WNB False_Neither:NA False...
2,36698,True_Neither:NA True_Correct:NA True_Misconcep...
