In [None]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')
# ================== IMPORT ==================
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
)
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, f1_score, confusion_matrix

from tqdm import tqdm

Mounted at /content/drive


In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/train_clean.csv')
test_df  = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/test_clean.csv')

In [None]:
TEXT_COL = "text_clean"
LABEL_COL = "label"

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
# ========= 1.tokenizer & dataset =========
MODEL_NAME = "roberta-base"  # đổi sang "bert-base-multilingual-cased" nếu có tiếng Việt
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

MAX_LEN = 128  # có thể tăng/giảm

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
# ================== DATASET CLASS ==================
class TextDataset(Dataset):
    def __init__(self, df, text_col, label_col, tokenizer, max_len=128):
        self.texts = df[text_col].astype(str).tolist()
        self.labels = df[label_col].astype(int).tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        item = {k: v.squeeze(0) for k, v in encoding.items()}
        item["labels"] = torch.tensor(label, dtype=torch.long)
        return item


In [None]:
def make_loader(dataset, batch_size, shuffle=False):
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [None]:
# ================== TRAIN / VAL + CLASS WEIGHT ==================
train_sub, val_sub = train_test_split(
    train_df,
    test_size=0.2,
    random_state=42,
    stratify=train_df[LABEL_COL],
)

In [None]:

classes = np.sort(train_df[LABEL_COL].unique())
num_labels = len(classes)
print("Classes:", classes)
print("Number of labels:", num_labels)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=train_sub[LABEL_COL].values,
)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print("Class weights:", class_weights)


Classes: [0 1 2]
Number of labels: 3
Class weights: tensor([0.9732, 0.8180, 1.3333])


In [None]:
# datasets
train_dataset = TextDataset(train_sub, TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
val_dataset   = TextDataset(val_sub,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
test_dataset  = TextDataset(test_df,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)

In [None]:
# ================== TRAIN 1 MODEL ==================
def train_one_model(
    train_loader,
    val_loader,
    num_labels,
    lr=2e-5,
    epochs=3,
    class_weights=None,
    weight_decay=0.01,
    warmup_ratio=0.1,
):
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=num_labels,
        hidden_dropout_prob=0.3,   # default = 0.1
        attention_probs_dropout_prob=0.3
    )
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    total_steps = len(train_loader) * epochs
    num_warmup_steps = int(warmup_ratio * total_steps)

    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=total_steps,
    )

    if class_weights is not None:
        cw = class_weights.to(device)
        loss_fn = nn.CrossEntropyLoss(weight=cw)
    else:
        loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for batch in pbar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits

            loss = loss_fn(logits, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            pbar.set_postfix({"loss": total_loss / (pbar.n + 1)})

        avg_train_loss = total_loss / len(train_loader)
        print(f"[Epoch {epoch+1}] Avg train loss: {avg_train_loss:.4f}")

        model.eval()
        val_preds = []
        val_labels = []
        val_loss = 0.0

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                )
                logits = outputs.logits
                loss = loss_fn(logits, labels)
                val_loss += loss.item()

                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_macro_f1 = f1_score(val_labels, val_preds, average="macro")
        print(
            f"[Epoch {epoch+1}] Val loss: {avg_val_loss:.4f} | "
            f"Val Macro F1: {val_macro_f1:.4f}"
        )

    model.eval()
    val_preds = []
    val_labels = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    final_macro_f1 = f1_score(val_labels, val_preds, average="macro")
    return model, final_macro_f1

In [None]:
import itertools
param_grid = {
    "lr": [1e-5, 1.5e-5],
    "batch_size": [16],
    "epochs": [3, 4],
    "weight_decay": [0.01, 0.02],
    "warmup_ratio": [0.06, 0.08],
}

In [None]:
best_f1 = -1.0
best_params = None
best_model = None

In [None]:
for lr, batch_size, epochs, weight_decay, warmup_ratio in itertools.product(
    param_grid["lr"],
    param_grid["batch_size"],
    param_grid["epochs"],
    param_grid["weight_decay"],
    param_grid["warmup_ratio"],
):
    print("=" * 60)
    print(
        f"Trying params: lr={lr}, batch_size={batch_size}, "
        f"epochs={epochs}, weight_decay={weight_decay}, "
        f"warmup_ratio={warmup_ratio}"
    )

    train_loader = make_loader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = make_loader(val_dataset, batch_size=batch_size, shuffle=False)

    model, val_f1 = train_one_model(
        train_loader=train_loader,
        val_loader=val_loader,
        num_labels=num_labels,
        lr=lr,
        epochs=epochs,
        class_weights=class_weights,
        weight_decay=weight_decay,
        warmup_ratio=warmup_ratio,
    )

    print(f"Params -> Val Macro F1 = {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_params = {
            "lr": lr,
            "batch_size": batch_size,
            "epochs": epochs,
            "weight_decay": weight_decay,
            "warmup_ratio": warmup_ratio,
        }
        best_model = model

print("=" * 60)
print("Best params:", best_params)
print("Best Val Macro F1:", best_f1)

Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.06


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.79it/s, loss=1.11]


[Epoch 1] Avg train loss: 1.1117
[Epoch 1] Val loss: 1.0919 | Val Macro F1: 0.3227


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.86it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0768
[Epoch 2] Val loss: 1.0683 | Val Macro F1: 0.6324


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.82it/s, loss=1.01]


[Epoch 3] Avg train loss: 1.0124
[Epoch 3] Val loss: 1.0121 | Val Macro F1: 0.5288


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5288
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.08


Epoch 1/3: 100%|██████████| 50/50 [00:18<00:00,  2.70it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1023
[Epoch 1] Val loss: 1.0952 | Val Macro F1: 0.3283


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.79it/s, loss=1.09]


[Epoch 2] Avg train loss: 1.0948
[Epoch 2] Val loss: 1.0866 | Val Macro F1: 0.4144


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.85it/s, loss=1.08]


[Epoch 3] Avg train loss: 1.0773
[Epoch 3] Val loss: 1.0792 | Val Macro F1: 0.4499
Params -> Val Macro F1 = 0.4499
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.85it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0995
[Epoch 1] Val loss: 1.0925 | Val Macro F1: 0.1692


Epoch 2/3: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, loss=1.09]


[Epoch 2] Avg train loss: 1.0893
[Epoch 2] Val loss: 1.0824 | Val Macro F1: 0.2851


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.07]


[Epoch 3] Avg train loss: 1.0733
[Epoch 3] Val loss: 1.0740 | Val Macro F1: 0.5195


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5195
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.08


Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0982
[Epoch 1] Val loss: 1.0902 | Val Macro F1: 0.2357


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0769
[Epoch 2] Val loss: 1.0693 | Val Macro F1: 0.6557


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.03]


[Epoch 3] Avg train loss: 1.0262
[Epoch 3] Val loss: 1.0377 | Val Macro F1: 0.6418


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.6418
Trying params: lr=1e-05, batch_size=16, epochs=4, weight_decay=0.01, warmup_ratio=0.06


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.81it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0973
[Epoch 1] Val loss: 1.0896 | Val Macro F1: 0.1692


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0772
[Epoch 2] Val loss: 1.0547 | Val Macro F1: 0.4045


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.944]


[Epoch 3] Avg train loss: 0.9440
[Epoch 3] Val loss: 0.7796 | Val Macro F1: 0.7099


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.705]


[Epoch 4] Avg train loss: 0.7049
[Epoch 4] Val loss: 0.6607 | Val Macro F1: 0.7373


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7373
Trying params: lr=1e-05, batch_size=16, epochs=4, weight_decay=0.01, warmup_ratio=0.08


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.12]


[Epoch 1] Avg train loss: 1.1175
[Epoch 1] Val loss: 1.0960 | Val Macro F1: 0.1333


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.1]


[Epoch 2] Avg train loss: 1.0958
[Epoch 2] Val loss: 1.0830 | Val Macro F1: 0.4028


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.03]


[Epoch 3] Avg train loss: 1.0310
[Epoch 3] Val loss: 1.0084 | Val Macro F1: 0.5369


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.798]


[Epoch 4] Avg train loss: 0.7977
[Epoch 4] Val loss: 0.7874 | Val Macro F1: 0.6504


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.6504
Trying params: lr=1e-05, batch_size=16, epochs=4, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1035
[Epoch 1] Val loss: 1.0881 | Val Macro F1: 0.6123


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.07]


[Epoch 2] Avg train loss: 1.0677
[Epoch 2] Val loss: 1.0276 | Val Macro F1: 0.6736


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.815]


[Epoch 3] Avg train loss: 0.8151
[Epoch 3] Val loss: 0.6097 | Val Macro F1: 0.7130


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.628]


[Epoch 4] Avg train loss: 0.6283
[Epoch 4] Val loss: 0.5576 | Val Macro F1: 0.7100


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7100
Trying params: lr=1e-05, batch_size=16, epochs=4, weight_decay=0.02, warmup_ratio=0.08


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.11]


[Epoch 1] Avg train loss: 1.1051
[Epoch 1] Val loss: 1.0907 | Val Macro F1: 0.1673


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0786
[Epoch 2] Val loss: 1.0588 | Val Macro F1: 0.6186


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.929]


[Epoch 3] Avg train loss: 0.9286
[Epoch 3] Val loss: 0.7589 | Val Macro F1: 0.5977


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.701]


[Epoch 4] Avg train loss: 0.7012
[Epoch 4] Val loss: 0.6412 | Val Macro F1: 0.6767


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.6767
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.06


Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0923
[Epoch 1] Val loss: 1.0822 | Val Macro F1: 0.4133


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.03]


[Epoch 2] Avg train loss: 1.0261
[Epoch 2] Val loss: 0.8590 | Val Macro F1: 0.7648


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.717]


[Epoch 3] Avg train loss: 0.7170
[Epoch 3] Val loss: 0.5860 | Val Macro F1: 0.7284


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7284
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.01, warmup_ratio=0.08


Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.85it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1021
[Epoch 1] Val loss: 1.0865 | Val Macro F1: 0.7079


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.06]


[Epoch 2] Avg train loss: 1.0585
[Epoch 2] Val loss: 0.9951 | Val Macro F1: 0.7306


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.785]


[Epoch 3] Avg train loss: 0.7855
[Epoch 3] Val loss: 0.6703 | Val Macro F1: 0.7444


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7444
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1015
[Epoch 1] Val loss: 1.0881 | Val Macro F1: 0.4742


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.02]


[Epoch 2] Avg train loss: 1.0220
[Epoch 2] Val loss: 0.8341 | Val Macro F1: 0.7536


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.692]


[Epoch 3] Avg train loss: 0.6922
[Epoch 3] Val loss: 0.5948 | Val Macro F1: 0.7887


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7887
Trying params: lr=1.5e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.08


Epoch 1/3: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=1.11]


[Epoch 1] Avg train loss: 1.1112
[Epoch 1] Val loss: 1.0883 | Val Macro F1: 0.1939


Epoch 2/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.05]


[Epoch 2] Avg train loss: 1.0511
[Epoch 2] Val loss: 0.9358 | Val Macro F1: 0.7508


Epoch 3/3: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.703]


[Epoch 3] Avg train loss: 0.7033
[Epoch 3] Val loss: 0.6226 | Val Macro F1: 0.6833


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.6833
Trying params: lr=1.5e-05, batch_size=16, epochs=4, weight_decay=0.01, warmup_ratio=0.06


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1022
[Epoch 1] Val loss: 1.0792 | Val Macro F1: 0.4515


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.846]


[Epoch 2] Avg train loss: 0.8461
[Epoch 2] Val loss: 0.5974 | Val Macro F1: 0.7160


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.561]


[Epoch 3] Avg train loss: 0.5610
[Epoch 3] Val loss: 0.4645 | Val Macro F1: 0.7832


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.447]


[Epoch 4] Avg train loss: 0.4466
[Epoch 4] Val loss: 0.4261 | Val Macro F1: 0.7953


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7953
Trying params: lr=1.5e-05, batch_size=16, epochs=4, weight_decay=0.01, warmup_ratio=0.08


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.11]


[Epoch 1] Avg train loss: 1.1141
[Epoch 1] Val loss: 1.0907 | Val Macro F1: 0.3905


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.05]


[Epoch 2] Avg train loss: 1.0522
[Epoch 2] Val loss: 0.9912 | Val Macro F1: 0.5012


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.82it/s, loss=0.736]


[Epoch 3] Avg train loss: 0.7361
[Epoch 3] Val loss: 0.5759 | Val Macro F1: 0.6924


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.586]


[Epoch 4] Avg train loss: 0.5864
[Epoch 4] Val loss: 0.5494 | Val Macro F1: 0.6888


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.6888
Trying params: lr=1.5e-05, batch_size=16, epochs=4, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0955
[Epoch 1] Val loss: 1.0825 | Val Macro F1: 0.4968


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.931]


[Epoch 2] Avg train loss: 0.9313
[Epoch 2] Val loss: 0.6060 | Val Macro F1: 0.7366


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.555]


[Epoch 3] Avg train loss: 0.5549
[Epoch 3] Val loss: 0.4962 | Val Macro F1: 0.7871


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.466]


[Epoch 4] Avg train loss: 0.4658
[Epoch 4] Val loss: 0.4419 | Val Macro F1: 0.7969


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7969
Trying params: lr=1.5e-05, batch_size=16, epochs=4, weight_decay=0.02, warmup_ratio=0.08


Epoch 1/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1036
[Epoch 1] Val loss: 1.0773 | Val Macro F1: 0.1863


Epoch 2/4: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, loss=0.864]


[Epoch 2] Avg train loss: 0.8644
[Epoch 2] Val loss: 0.5670 | Val Macro F1: 0.7071


Epoch 3/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.55]


[Epoch 3] Avg train loss: 0.5502
[Epoch 3] Val loss: 0.4594 | Val Macro F1: 0.7711


Epoch 4/4: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, loss=0.465]


[Epoch 4] Avg train loss: 0.4654
[Epoch 4] Val loss: 0.4298 | Val Macro F1: 0.8027
Params -> Val Macro F1 = 0.8027
Best params: {'lr': 1.5e-05, 'batch_size': 16, 'epochs': 4, 'weight_decay': 0.02, 'warmup_ratio': 0.08}
Best Val Macro F1: 0.8026659786721706


In [None]:
def evaluate_on_loader(model, loader, name=""):
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            y = batch["labels"].to(device)

            logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            ).logits

            p = torch.argmax(logits, dim=1)

            preds.extend(p.cpu().numpy())
            labels.extend(y.cpu().numpy())

    print(f"\n=== Performance on {name} ===")
    print(classification_report(labels, preds, digits=4))
    macro_f1 = f1_score(labels, preds, average="macro")
    print("Macro F1:", macro_f1)
    return labels, preds, macro_f1

In [None]:
batch_size = best_params["batch_size"]

train_loader_dbg = make_loader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False
)

test_loader = make_loader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# Train_sub để xem overfit
train_labels, train_preds, train_f1 = evaluate_on_loader(
    best_model, train_loader_dbg, name="TRAIN_SUB"
)

# Test_set để report chính thức
test_labels, test_preds, test_f1 = evaluate_on_loader(
    best_model, test_loader, name="TEST_SET"
)

print("\nConfusion matrix (TEST_SET):")
cm = confusion_matrix(test_labels, test_preds)
print(cm)


=== Performance on TRAIN_SUB ===
              precision    recall  f1-score   support

           0     0.8421    0.9343    0.8858       274
           1     0.8678    0.6442    0.7394       326
           2     0.7126    0.9050    0.7974       200

    accuracy                         0.8087       800
   macro avg     0.8075    0.8278    0.8075       800
weighted avg     0.8202    0.8087    0.8041       800

Macro F1: 0.80753553223369

=== Performance on TEST_SET ===
              precision    recall  f1-score   support

           0     0.7642    0.8868    0.8210       106
           1     0.9106    0.6871    0.7832       163
           2     0.5946    0.8627    0.7040        51

    accuracy                         0.7812       320
   macro avg     0.7565    0.8122    0.7694       320
weighted avg     0.8117    0.7812    0.7831       320

Macro F1: 0.7693924939689132

Confusion matrix (TEST_SET):
[[ 94   7   5]
 [ 26 112  25]
 [  3   4  44]]


In [None]:
torch.save(best_model.state_dict(), 'best_model.pth')
print("Model saved successfully to 'best_model.pth'")

Model saved successfully to 'best_model.pth'


In [None]:
correct_example_idx = -1
incorrect_example_idx = -1

for i in range(len(test_labels)):
    if test_labels[i] == test_preds[i]:
        if correct_example_idx == -1:
            correct_example_idx = i
    else:
        if incorrect_example_idx == -1:
            incorrect_example_idx = i

    if correct_example_idx != -1 and incorrect_example_idx != -1:
        break

# Map numerical labels to descriptive names if possible (assuming 0, 1, 2 correspond to some categories)
# If not, the numerical labels will be displayed.
# For example, if you have a mapping like: label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
label_map = {0: "Label 0", 1: "Label 1", 2: "Label 2"} # Replace with actual label names if available

print("\n--- Correct Prediction Example ---")
if correct_example_idx != -1:
    print(f"Text: {test_df.iloc[correct_example_idx][TEXT_COL]}")
    print(f"Actual Label: {label_map.get(test_labels[correct_example_idx], test_labels[correct_example_idx])}")
    print(f"Predicted Label: {label_map.get(test_preds[correct_example_idx], test_preds[correct_example_idx])}")
else:
    print("No correct prediction found in the sampled test set (this is highly unlikely).")

print("\n--- Incorrect Prediction Example ---")
if incorrect_example_idx != -1:
    print(f"Text: {test_df.iloc[incorrect_example_idx][TEXT_COL]}")
    print(f"Actual Label: {label_map.get(test_labels[incorrect_example_idx], test_labels[incorrect_example_idx])}")
    print(f"Predicted Label: {label_map.get(test_preds[incorrect_example_idx], test_preds[incorrect_example_idx])}")
else:
    print("No incorrect prediction found in the sampled test set (this is highly unlikely with a F1 < 1.0).")


--- Correct Prediction Example ---
Text: Verizon's environmental, health and safety management system provides a framework for identifying, controlling, and reducing the risks associated with the environments in which we operate. Besides regular management system assessments, internal and third-party compliance audits and inspections are performed annually at hundreds of facilities worldwide. The goal of these assessments is to identify and correct site-specific issues, and to educate and empower facility managers and supervisors to implement corrective actions. Verizon's environment, health and safety efforts are directed and supported by experienced experts around the world that support our operations and facilities.
Actual Label: Label 1
Predicted Label: Label 1

--- Incorrect Prediction Example ---
Text: Sustainable strategy 'red lines' For our sustainable strategy range, we incorporate a series of proprietary 'red lines' in order to ensure the poorest- performing companies from a