In [None]:
# ==========================
# MELD fine-tuning (BERT-family) — Setup (install deps + imports)
#   - Compatible with DistilBERT / BERT / RoBERTa (swap model checkpoint + tokenizer)
#   - Purpose: Setup (install deps + imports)
# ==========================

!pip -q install -U transformers datasets accelerate scikit-learn pandas

import numpy as np, pandas as pd, torch, os
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed
from sklearn.metrics import accuracy_score, f1_score

# ====== PATHS  ======
TRAIN_CSV = "/content/train_sent_emo.csv"
VAL_CSV   = "/content/dev_sent_emo.csv"
TEST_CSV  = "/content/test_sent_emo.csv"


In [None]:

# =======================
# MELD (Ekman-7) DistilBERT / BERT / RoBERTa fine-tune (5 seeds)
# - best checkpoint selected by weighted_f1 on VAL
# - evaluate on TEST
# =======================


# ====== CONFIG ======
MODEL_BASE = "bert-base-uncased"
TEXT_COL = "Utterance"
LABEL_COL = "Emotion"

LABELS = ["anger","disgust","fear","joy","neutral","sadness","surprise"]
label2id = {l:i for i,l in enumerate(LABELS)}
id2label = {i:l for l,i in label2id.items()}

SEEDS = [42,43,44,45,46]
LR = 2e-5
EPOCHS = 4
BATCH_TRAIN = 16
BATCH_EVAL  = 32
MAX_LEN = 256
OUT_ROOT = "bert_meld"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=True)


In [None]:
# ==========================
# Data loading + preprocessing helpers
# ==========================

# ====== DATA ======
def load_df(path):
    df = pd.read_csv(path).dropna(subset=[TEXT_COL, LABEL_COL]).copy()
    df[TEXT_COL]  = df[TEXT_COL].astype(str)
    df[LABEL_COL] = df[LABEL_COL].astype(str).str.strip().str.lower()
    df = df[df[LABEL_COL].isin(LABELS)].copy()
    return df

def to_ds(df):
    ds = Dataset.from_pandas(df[[TEXT_COL, LABEL_COL]], preserve_index=False)
    def enc(batch):
        out = tok(batch[TEXT_COL], truncation=True, padding=False, max_length=MAX_LEN)
        out["labels"] = [label2id[x] for x in batch[LABEL_COL]]
        return out
    return ds.map(enc, batched=True, remove_columns=[TEXT_COL, LABEL_COL])

train_ds = to_ds(load_df(TRAIN_CSV))
val_ds   = to_ds(load_df(VAL_CSV))
test_ds  = to_ds(load_df(TEST_CSV))

print("Counts:", len(train_ds), len(val_ds), len(test_ds))

# ====== METRICS ======
def compute_metrics(eval_pred):
    logits, y_true = eval_pred
    y_pred = np.argmax(logits, axis=1)
    return {
        "acc": accuracy_score(y_true, y_pred),
        "weighted_f1": f1_score(y_true, y_pred, average="weighted"),
        "macro_f1": f1_score(y_true, y_pred, average="macro"),
    }

In [None]:
# ==========================
#  Trainer callback (save best checkpoint per epoch)
# ==========================

import os
from transformers import TrainerCallback

class SaveByEpochCallback(TrainerCallback):
    def __init__(self, out_root):
        self.out_root = out_root
        os.makedirs(out_root, exist_ok=True)

    def on_epoch_end(self, args, state, control, **kwargs):
        model = kwargs["model"]
        tokenizer = kwargs.get("tokenizer", None)
        # epoch μπορεί να είναι float (π.χ. 1.0, 2.0)
        ep = state.epoch
        ep_i = int(round(ep)) if ep is not None else 0

        save_dir = os.path.join(self.out_root, f"epoch_{ep_i:02d}")
        os.makedirs(save_dir, exist_ok=True)

        # save model (+ tokenizer if available)
        model.save_pretrained(save_dir)
        if tokenizer is not None:
            tokenizer.save_pretrained(save_dir)

        print(f"✅ Saved epoch checkpoint to: {save_dir}")
        return control


In [None]:
# ==========================
#Train + evaluate across seeds (select best by val weighted-F1)
# ==========================

rows = []
best_ckpts = {}  # seed -> checkpoint path

for seed in SEEDS:
    print("\n" + "="*20, "SEED", seed, "="*20)
    set_seed(seed)

    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_BASE,
        num_labels=len(LABELS),
        label2id=label2id,
        id2label=id2label
    ).to(DEVICE)

    args = TrainingArguments(
        output_dir=f"{OUT_ROOT}_seed{seed}",
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="weighted_f1",
        greater_is_better=True,

        learning_rate=LR,
        num_train_epochs=EPOCHS,
        per_device_train_batch_size=BATCH_TRAIN,
        per_device_eval_batch_size=BATCH_EVAL,
        weight_decay=0.01,
        warmup_ratio=0.06,

        fp16=torch.cuda.is_available(),
        report_to="none",
        seed=seed,
        logging_steps=50,
    )

    epoch_saver = SaveByEpochCallback("/content/epoch_checkpoints")
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tok,
        compute_metrics=compute_metrics,
        callbacks=[epoch_saver],
    )

    trainer.train()

    best_ckpts[seed] = trainer.state.best_model_checkpoint
    res = trainer.evaluate(test_ds)

    rows.append({
        "seed": seed,
        "test_acc": float(res["eval_acc"]),
        "test_weighted_f1": float(res["eval_weighted_f1"]),
        "test_macro_f1": float(res["eval_macro_f1"]),
        "best_ckpt": best_ckpts[seed],
    })

df = pd.DataFrame(rows)
print("\nPer-seed results:")
display(df)

print("\nMEAN:")
display(df.drop(columns=["seed","best_ckpt"]).mean().to_frame("mean"))

print("\nSTD:")
display(df.drop(columns=["seed","best_ckpt"]).std().to_frame("std"))


Map:   0%|          | 0/9989 [00:00<?, ? examples/s]

Map:   0%|          | 0/1109 [00:00<?, ? examples/s]

Map:   0%|          | 0/2610 [00:00<?, ? examples/s]

Counts: 9989 1109 2610



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.0217,1.15409,0.620379,0.585296,0.385644
2,1.0075,1.105296,0.632101,0.607797,0.431821
3,0.7691,1.163171,0.62128,0.598487,0.450625
4,0.5678,1.223742,0.62128,0.606795,0.486387





Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.1579,1.142672,0.624887,0.597244,0.398696
2,1.0253,1.120816,0.608656,0.575429,0.396509
3,0.7426,1.188545,0.604148,0.579332,0.414567
4,0.6066,1.238227,0.603246,0.584756,0.426805





Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.1525,1.19314,0.595131,0.540839,0.345708
2,0.9377,1.16052,0.608656,0.571704,0.398159
3,0.7491,1.224798,0.609558,0.585263,0.44958
4,0.6255,1.244862,0.605951,0.587597,0.45899





Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.0678,1.19447,0.597836,0.555968,0.35891
2,0.9377,1.165025,0.611362,0.577882,0.395636
3,0.782,1.168559,0.61046,0.593091,0.448758
4,0.5134,1.255159,0.614067,0.597628,0.470195





Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.1197,1.179797,0.607755,0.565901,0.366155
2,1.0044,1.126707,0.62128,0.591017,0.396873
3,0.7111,1.2113,0.609558,0.583121,0.428422
4,0.5945,1.242534,0.599639,0.586718,0.453501



Per-seed results:


Unnamed: 0,seed,test_acc,test_weighted_f1,test_macro_f1,best_ckpt
0,42,0.649042,0.63085,0.4251,bert_meld_seed42/checkpoint-1250
1,43,0.637165,0.619514,0.393065,bert_meld_seed43/checkpoint-625
2,44,0.635249,0.622409,0.443135,bert_meld_seed44/checkpoint-2500
3,45,0.62682,0.615405,0.435178,bert_meld_seed45/checkpoint-2500
4,46,0.657088,0.631279,0.416587,bert_meld_seed46/checkpoint-1250



MEAN:


Unnamed: 0,mean
test_acc,0.641073
test_weighted_f1,0.623891
test_macro_f1,0.422613



STD:


Unnamed: 0,std
test_acc,0.011962
test_weighted_f1,0.007007
test_macro_f1,0.01933


In [None]:
# ==========================
#  Collect / copy best checkpoints to *_BEST folders
# ==========================

import os, shutil

for seed, ckpt_path in best_ckpts.items():
    best_dir = f"{OUT_ROOT}_seed{seed}_BEST"
    if os.path.exists(best_dir):
        shutil.rmtree(best_dir)
    shutil.copytree(ckpt_path, best_dir)
    tok.save_pretrained(best_dir)
    print(f"Saved BEST for seed {seed}: {best_dir}")


Saved BEST for seed 42: bert_meld_seed42_BEST
Saved BEST for seed 43: bert_meld_seed43_BEST
Saved BEST for seed 44: bert_meld_seed44_BEST
Saved BEST for seed 45: bert_meld_seed45_BEST
Saved BEST for seed 46: bert_meld_seed46_BEST
