In [None]:
# ----------------------------------------------------------------------
# BERT fine-tuning + plotting (training loss, val metrics, confusion-matrix)
# ----------------------------------------------------------------------
import os, random, numpy as np, pandas as pd, torch, pathlib, matplotlib.pyplot as plt, seaborn as sns
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, DataCollatorWithPadding
)
from datasets import load_dataset, DatasetDict
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
                             confusion_matrix)

print("Transformers", __import__("transformers").__version__)

# â”€â”€â”€ configuration â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
MODEL_NAME  = "bert-base-uncased"
DATA_ROOT   = "/kaggle/input/ai-2-dl-for-nlp-2025-homework-3"  # CSV folder
MAX_LENGTH  = 128
BATCH_SIZE  = 16
NUM_EPOCHS  = 3
LEARNING_RATE = 2e-5
SEED          = 42

OUTPUT_DIR = "/kaggle/working/bert_model"
FIG_DIR    = "/kaggle/working/figures"
pathlib.Path(OUTPUT_DIR).mkdir(exist_ok=True)
pathlib.Path(FIG_DIR).mkdir(exist_ok=True)

ID_COL, TEXT_COL, LABEL_COL = "ID", "Text", "Label"

def seed_everything(seed=SEED):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
seed_everything()

# â”€â”€â”€ load CSV splits â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
raw = load_dataset(
    "csv",
    data_files={
        "train":      f"{DATA_ROOT}/train_dataset.csv",
        "validation": f"{DATA_ROOT}/val_dataset.csv",
        "test":       f"{DATA_ROOT}/test_dataset.csv",
    },
)
ds = DatasetDict(train=raw["train"], validation=raw["validation"], test=raw["test"])

# â”€â”€â”€ tokenisation â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
tok = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="right")

def tokenize(batch):
    enc = tok(batch[TEXT_COL], truncation=True, max_length=MAX_LENGTH)
    if LABEL_COL in batch and batch[LABEL_COL][0] is not None:          # train/val splits
        enc["labels"] = [int(x) for x in batch[LABEL_COL]]
    return enc

ds_tok = ds.map(tokenize, batched=True, remove_columns=[ID_COL, TEXT_COL, LABEL_COL])
data_collator = DataCollatorWithPadding(tok)

# â”€â”€â”€ metrics for Trainer â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
def compute_metrics(pred):
    logits, labels = pred
    preds = logits.argmax(-1)
    acc  = accuracy_score(labels, preds)
    p, r, f1, _ = precision_recall_fscore_support(labels, preds,
                                                  average="macro", zero_division=0)
    return {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

# â”€â”€â”€ model + Trainer â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

training_args = TrainingArguments(
    output_dir                    = OUTPUT_DIR,
    num_train_epochs              = NUM_EPOCHS,
    per_device_train_batch_size   = BATCH_SIZE,
    per_device_eval_batch_size    = BATCH_SIZE,
    learning_rate                 = LEARNING_RATE,
    eval_strategy                      = "epoch",
    save_strategy                 = "epoch",
    load_best_model_at_end        = True,
    metric_for_best_model         = "f1",
    seed                          = SEED,
    report_to                     = "none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset   = ds_tok["train"],
    eval_dataset    = ds_tok["validation"],
    tokenizer       = tok,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
)

# â”€â”€â”€ training & evaluation â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
trainer.train()
print("ðŸ“Š  Validation metrics:", trainer.evaluate())

# â”€â”€â”€ curves â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
history = trainer.state.log_history
train_loss = [x["loss"] for x in history if "loss" in x and "epoch" in x]
eval_f1    = [x["eval_f1"] for x in history if "eval_f1" in x]
eval_acc   = [x["eval_accuracy"] for x in history if "eval_accuracy" in x]

plt.figure()
plt.plot(train_loss)
plt.xlabel("Training step"); plt.ylabel("Loss")
plt.title("BERT â€“ training loss"); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/bert_loss.png", dpi=300); plt.close()

plt.figure()
plt.plot(eval_f1,  'o-', label="F1")
plt.plot(eval_acc, 'o-', label="Accuracy")
plt.xlabel("Epoch"); plt.ylabel("Score"); plt.legend()
plt.title("BERT â€“ validation metrics"); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/bert_val_metrics.png", dpi=300); plt.close()

# â”€â”€â”€ confusion-matrix on validation split â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
val_logits = trainer.predict(ds_tok["validation"]).predictions
val_preds  = val_logits.argmax(-1)
cm = confusion_matrix(ds["validation"][LABEL_COL], val_preds)

plt.figure(figsize=(3.5,3))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["neg","pos"], yticklabels=["neg","pos"])
plt.xlabel("Predicted"); plt.ylabel("Actual")
plt.title("BERT â€“ validation CM"); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/bert_confusion.png", dpi=300); plt.close()

# â”€â”€â”€ prediction & submission â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
test_preds = trainer.predict(ds_tok["test"]).predictions.argmax(-1)
test_ids   = raw["test"][ID_COL]
submission = pd.DataFrame({ID_COL: test_ids, LABEL_COL: test_preds.astype(int)})
submission.to_csv("submission.csv", index=False)
print("âœ… submission.csv written with", len(submission), "rows")

trainer.save_model(f"{OUTPUT_DIR}/best"); tok.save_pretrained(f"{OUTPUT_DIR}/best")

print("Files in /kaggle/working/figures:", os.listdir(FIG_DIR))


In [None]:
from IPython.display import FileLinks
FileLinks('figures')          # clickable download links
