In [1]:
#!/usr/bin/env python
import os, random
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    set_seed,
    EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, TaskType

os.environ["WANDB_DISABLED"] = "true"

# 1. Reproducibility
SEED = 42
set_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# 2. Load & clean CSVs
train_df = pd.read_csv("/kaggle/input/5790finalproj-dataset/train.csv", encoding="latin1")
test_df  = pd.read_csv("/kaggle/input/5790finalproj-dataset/test.csv",  encoding="latin1")
for df in (train_df, test_df):
    for col in ["Question","Response","CorrectAnswer"]:
        df[col] = df[col].fillna("").astype(str)

# 3. Map labels
label_map = {-1:0, 0:1, 1:2}
train_df["label"] = train_df["label"].map(label_map)
test_df["label"]  = test_df["label"].map(label_map)

# 4. Stratified split 80/20 on train
train_pd, val_pd = train_test_split(
    train_df, test_size=0.2,
    stratify=train_df["label"],
    random_state=SEED
)

# 5. To HF Dataset
def df_to_ds(df):
    ds = Dataset.from_pandas(df.reset_index(drop=True))
    drop = [c for c in ["index","Experiment","Topic","ID"] if c in ds.column_names]
    return ds.remove_columns(drop)

train_ds = df_to_ds(train_pd)
val_ds   = df_to_ds(val_pd)
test_ds  = df_to_ds(test_df)

# 6. Tokenizer & preprocess
MODEL = "microsoft/deberta-v3-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL)

def preprocess(batch):
    texts = [
        f"{q} [SEP] {r} [SEP] {c}"
        for q,r,c in zip(batch["Question"], batch["Response"], batch["CorrectAnswer"])
    ]
    enc = tokenizer(texts, truncation=True, max_length=256)
    enc["labels"] = batch["label"]
    return enc

train_ds = train_ds.map(preprocess, batched=True, remove_columns=train_ds.column_names)
val_ds   = val_ds.map(preprocess,   batched=True, remove_columns=val_ds.column_names)
test_ds  = test_ds.map(preprocess,  batched=True, remove_columns=test_ds.column_names)

# 7. Data collator
data_collator = DataCollatorWithPadding(tokenizer)

# 8. Model + LoRA
base = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=3)
lora_cfg = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8, lora_alpha=32, lora_dropout=0.1,
    target_modules=["query_proj","key_proj","value_proj","dense"]
)
model = get_peft_model(base, lora_cfg)
model.gradient_checkpointing_enable()

# 9. Class weights
counts = np.bincount(train_ds["labels"], minlength=3)
cw = torch.tensor(1.0/counts, dtype=torch.float)
cw /= cw.sum()

# 10. Custom Trainer
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss(weight=cw.to(logits.device))
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

# 11. TrainingArguments
args = TrainingArguments(
    output_dir="out",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=3e-5,
    num_train_epochs=5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_macro_f1",
    fp16=True
)

# 12. Metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=-1)
    return {
        "accuracy":    accuracy_score(p.label_ids, preds),
        "macro_f1":    f1_score(p.label_ids, preds, average="macro"),
        "weighted_f1": f1_score(p.label_ids, preds, average="weighted")
    }

# 13. Trainer
trainer = WeightedTrainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# 14. Train & eval
if __name__ == "__main__":
    trainer.train()
    trainer.save_model("best_model")

    out = trainer.predict(test_ds)
    print("Test metrics:", compute_metrics(out))
    y_true = out.label_ids
    y_pred = np.argmax(out.predictions, axis=-1)
    print(classification_report(
        y_true, y_pred,
        target_names=["Incorrect(-1)","Partial(0)","Correct(1)"]
    ))


2025-05-12 20:01:18.330056: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747080078.607288      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747080078.679509      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Map:   0%|          | 0/450 [00:00<?, ? examples/s]

Map:   0%|          | 0/30466 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = WeightedTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,No log,1.114925,0.444444,0.205128,0.273504
2,No log,1.097582,0.444444,0.205128,0.273504
3,No log,1.087892,0.446667,0.221702,0.300207
4,1.091300,1.086728,0.446667,0.229802,0.313413




Test metrics: {'accuracy': 0.5475283923061773, 'macro_f1': 0.26500331844628927, 'weighted_f1': 0.4237862140980383}
               precision    recall  f1-score   support

Incorrect(-1)       0.55      0.96      0.70     16614
   Partial(0)       0.00      0.00      0.00       320
   Correct(1)       0.52      0.05      0.10     13532

     accuracy                           0.55     30466
    macro avg       0.36      0.34      0.27     30466
 weighted avg       0.53      0.55      0.42     30466

