In [None]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')
# ================== IMPORT ==================
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
)

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, f1_score, confusion_matrix

from tqdm import tqdm

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/train_clean.csv')
test_df  = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/test_clean.csv')

In [None]:
TEXT_COL = "text_clean"
LABEL_COL = "label"

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
# ========= 1. Prepare tokenizer & dataset =========
MODEL_NAME = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

MAX_LEN = 128

In [None]:
# ================== DATASET CLASS ==================
class TextDataset(Dataset):
    def __init__(self, df, text_col, label_col, tokenizer, max_len=128):
        self.texts = df[text_col].astype(str).tolist()
        self.labels = df[label_col].astype(int).tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        item = {k: v.squeeze(0) for k, v in encoding.items()}
        item["labels"] = torch.tensor(label, dtype=torch.long)
        return item


In [None]:
def make_loader(dataset, batch_size, shuffle=False):
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [None]:

train_sub, val_sub = train_test_split(
    train_df,
    test_size=0.2,
    random_state=42,
    stratify=train_df[LABEL_COL],
)

In [None]:

classes = np.sort(train_df[LABEL_COL].unique())
num_labels = len(classes)
print("Classes:", classes)
print("Number of labels:", num_labels)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=train_sub[LABEL_COL].values,
)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print("Class weights:", class_weights)


Classes: [0 1 2]
Number of labels: 3
Class weights: tensor([0.9732, 0.8180, 1.3333])


In [None]:
# datasets
train_dataset = TextDataset(train_sub, TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
val_dataset   = TextDataset(val_sub,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
test_dataset  = TextDataset(test_df,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)

In [None]:
# ================== TRAIN 1 MODEL ==================
def train_one_model(
    train_loader,
    val_loader,
    num_labels,
    lr=2e-5,
    epochs=3,
    class_weights=None,
    weight_decay=0.01,
    warmup_ratio=0.1,
):
    model = BertForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=num_labels,
        hidden_dropout_prob=0.3,   # default = 0.1
        attention_probs_dropout_prob=0.3
    )
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    total_steps = len(train_loader) * epochs
    num_warmup_steps = int(warmup_ratio * total_steps)

    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=total_steps,
    )

    if class_weights is not None:
        cw = class_weights.to(device)
        loss_fn = nn.CrossEntropyLoss(weight=cw)
    else:
        loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for batch in pbar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits

            loss = loss_fn(logits, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            pbar.set_postfix({"loss": total_loss / (pbar.n + 1)})

        avg_train_loss = total_loss / len(train_loader)
        print(f"[Epoch {epoch+1}] Avg train loss: {avg_train_loss:.4f}")

        # ----- EVAL -----
        model.eval()
        val_preds = []
        val_labels = []
        val_loss = 0.0

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                )
                logits = outputs.logits
                loss = loss_fn(logits, labels)
                val_loss += loss.item()

                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_macro_f1 = f1_score(val_labels, val_preds, average="macro")
        print(
            f"[Epoch {epoch+1}] Val loss: {avg_val_loss:.4f} | "
            f"Val Macro F1: {val_macro_f1:.4f}"
        )


    model.eval()
    val_preds = []
    val_labels = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    final_macro_f1 = f1_score(val_labels, val_preds, average="macro")
    return model, final_macro_f1

In [None]:
# ================== GRID SEARCH ==================
import itertools

param_grid = {
    "lr": [1.5e-5, 2e-5],
    "batch_size": [16],
    "epochs": [2, 3],
    "weight_decay": [0.01, 0.02],
    "warmup_ratio": [0.06],
}

In [None]:
best_f1 = -1.0
best_params = None
best_model = None

In [None]:
for lr, batch_size, epochs, weight_decay, warmup_ratio in itertools.product(
    param_grid["lr"],
    param_grid["batch_size"],
    param_grid["epochs"],
    param_grid["weight_decay"],
    param_grid["warmup_ratio"],
):
    print("=" * 60)
    print(
        f"Trying params: lr={lr}, batch_size={batch_size}, "
        f"epochs={epochs}, weight_decay={weight_decay}, "
        f"warmup_ratio={warmup_ratio}"
    )

    train_loader = make_loader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = make_loader(val_dataset, batch_size=batch_size, shuffle=False)

    model, val_f1 = train_one_model(
        train_loader=train_loader,
        val_loader=val_loader,
        num_labels=num_labels,
        lr=lr,
        epochs=epochs,
        class_weights=class_weights,
        weight_decay=weight_decay,
        warmup_ratio=warmup_ratio,
    )

    print(f"Params -> Val Macro F1 = {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_params = {
            "lr": lr,
            "batch_size": batch_size,
            "epochs": epochs,
            "weight_decay": weight_decay,
            "warmup_ratio": warmup_ratio,
        }
        best_model = model

print("=" * 60)
print("Best params:", best_params)
print("Best Val Macro F1:", best_f1)

Trying params: lr=1.5e-05, batch_size=16, epochs=2, weight_decay=0.01, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2: 100%|██████████| 50/50 [00:18<00:00,  2.77it/s, loss=1.05]


[Epoch 1] Avg train loss: 1.0533
[Epoch 1] Val loss: 0.9453 | Val Macro F1: 0.4589


Epoch 2/2: 100%|██████████| 50/50 [00:17<00:00,  2.78it/s, loss=0.86]


[Epoch 2] Avg train loss: 0.8600
[Epoch 2] Val loss: 0.8304 | Val Macro F1: 0.4983
Params -> Val Macro F1 = 0.4983
Trying params: lr=1.5e-05, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2: 100%|██████████| 50/50 [00:18<00:00,  2.75it/s, loss=1.08]


[Epoch 1] Avg train loss: 1.0814
[Epoch 1] Val loss: 0.9708 | Val Macro F1: 0.7022


Epoch 2/2: 100%|██████████| 50/50 [00:18<00:00,  2.68it/s, loss=0.903]


[Epoch 2] Avg train loss: 0.9026
[Epoch 2] Val loss: 0.8518 | Val Macro F1: 0.7230
Params -> Val Macro F1 = 0.7230
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=1]


[Epoch 1] Avg train loss: 1.0046
[Epoch 1] Val loss: 0.8470 | Val Macro F1: 0.5720


Epoch 2/3: 100%|██████████| 50/50 [00:18<00:00,  2.71it/s, loss=0.777]


[Epoch 2] Avg train loss: 0.7770
[Epoch 2] Val loss: 0.6985 | Val Macro F1: 0.6353


Epoch 3/3: 100%|██████████| 50/50 [00:18<00:00,  2.74it/s, loss=0.663]


[Epoch 3] Avg train loss: 0.6625
[Epoch 3] Val loss: 0.6552 | Val Macro F1: 0.6781
Params -> Val Macro F1 = 0.6781
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=1.03]


[Epoch 1] Avg train loss: 1.0264
[Epoch 1] Val loss: 0.9030 | Val Macro F1: 0.5618


Epoch 2/3: 100%|██████████| 50/50 [00:18<00:00,  2.71it/s, loss=0.773]


[Epoch 2] Avg train loss: 0.7734
[Epoch 2] Val loss: 0.7453 | Val Macro F1: 0.6844


Epoch 3/3: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, loss=0.642]


[Epoch 3] Avg train loss: 0.6423
[Epoch 3] Val loss: 0.6953 | Val Macro F1: 0.7239
Params -> Val Macro F1 = 0.7239
Trying params: lr=2e-05, batch_size=16, epochs=2, weight_decay=0.01, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2: 100%|██████████| 50/50 [00:18<00:00,  2.71it/s, loss=0.984]


[Epoch 1] Avg train loss: 0.9840
[Epoch 1] Val loss: 0.8450 | Val Macro F1: 0.4635


Epoch 2/2: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, loss=0.697]


[Epoch 2] Avg train loss: 0.6972
[Epoch 2] Val loss: 0.6574 | Val Macro F1: 0.7170
Params -> Val Macro F1 = 0.7170
Trying params: lr=2e-05, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, loss=1.02]


[Epoch 1] Avg train loss: 1.0224
[Epoch 1] Val loss: 0.8419 | Val Macro F1: 0.4878


Epoch 2/2: 100%|██████████| 50/50 [00:18<00:00,  2.71it/s, loss=0.752]


[Epoch 2] Avg train loss: 0.7517
[Epoch 2] Val loss: 0.7217 | Val Macro F1: 0.6501
Params -> Val Macro F1 = 0.6501
Trying params: lr=2e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=1.06]


[Epoch 1] Avg train loss: 1.0579
[Epoch 1] Val loss: 0.8771 | Val Macro F1: 0.6998


Epoch 2/3: 100%|██████████| 50/50 [00:18<00:00,  2.70it/s, loss=0.693]


[Epoch 2] Avg train loss: 0.6927
[Epoch 2] Val loss: 0.5596 | Val Macro F1: 0.7700


Epoch 3/3: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, loss=0.541]


[Epoch 3] Avg train loss: 0.5407
[Epoch 3] Val loss: 0.5051 | Val Macro F1: 0.8077
Params -> Val Macro F1 = 0.8077
Trying params: lr=2e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=1.03]


[Epoch 1] Avg train loss: 1.0342
[Epoch 1] Val loss: 0.8481 | Val Macro F1: 0.6943


Epoch 2/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=0.717]


[Epoch 2] Avg train loss: 0.7168
[Epoch 2] Val loss: 0.6544 | Val Macro F1: 0.6624


Epoch 3/3: 100%|██████████| 50/50 [00:18<00:00,  2.72it/s, loss=0.576]


[Epoch 3] Avg train loss: 0.5762
[Epoch 3] Val loss: 0.6032 | Val Macro F1: 0.6756
Params -> Val Macro F1 = 0.6756
Best params: {'lr': 2e-05, 'batch_size': 16, 'epochs': 3, 'weight_decay': 0.01, 'warmup_ratio': 0.06}
Best Val Macro F1: 0.8077079663339205


In [None]:
def evaluate_on_loader(model, loader, name=""):
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            y = batch["labels"].to(device)

            logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            ).logits

            p = torch.argmax(logits, dim=1)

            preds.extend(p.cpu().numpy())
            labels.extend(y.cpu().numpy())

    print(f"\n=== Performance on {name} ===")
    print(classification_report(labels, preds, digits=4))
    macro_f1 = f1_score(labels, preds, average="macro")
    print("Macro F1:", macro_f1)
    return labels, preds, macro_f1

In [None]:
batch_size = best_params["batch_size"]

train_loader_dbg = make_loader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False
)

test_loader = make_loader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# Train_sub
train_labels, train_preds, train_f1 = evaluate_on_loader(
    best_model, train_loader_dbg, name="TRAIN_SUB"
)

# Test_set
test_labels, test_preds, test_f1 = evaluate_on_loader(
    best_model, test_loader, name="TEST_SET"
)

print("\nConfusion matrix (TEST_SET):")
cm = confusion_matrix(test_labels, test_preds)
print(cm)


=== Performance on TRAIN_SUB ===
              precision    recall  f1-score   support

           0     0.8626    0.8248    0.8433       274
           1     0.7890    0.7914    0.7902       326
           2     0.8199    0.8650    0.8418       200

    accuracy                         0.8213       800
   macro avg     0.8238    0.8271    0.8251       800
weighted avg     0.8219    0.8213    0.8213       800

Macro F1: 0.8251106038906343

=== Performance on TEST_SET ===
              precision    recall  f1-score   support

           0     0.8286    0.8208    0.8246       106
           1     0.7975    0.7730    0.7850       163
           2     0.5965    0.6667    0.6296        51

    accuracy                         0.7719       320
   macro avg     0.7408    0.7535    0.7464       320
weighted avg     0.7757    0.7719    0.7734       320

Macro F1: 0.7464403027882085

Confusion matrix (TEST_SET):
[[ 87  15   4]
 [ 18 126  19]
 [  0  17  34]]
