In [None]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')
# ================== IMPORT ==================
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
)


from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, f1_score, confusion_matrix

from tqdm import tqdm

Mounted at /content/drive


In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/train_clean.csv')
test_df  = pd.read_csv('/content/drive/MyDrive/NLP_Final_Project/test_clean.csv')

In [None]:
TEXT_COL = "text_clean"
LABEL_COL = "label"

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
# ========= 1. tokenizer & dataset =========
MODEL_NAME = "climatebert/distilroberta-base-climate-f"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

MAX_LEN = 128

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

In [None]:
# ================== DATASET CLASS ==================
class TextDataset(Dataset):
    def __init__(self, df, text_col, label_col, tokenizer, max_len=128):
        self.texts = df[text_col].astype(str).tolist()
        self.labels = df[label_col].astype(int).tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        item = {k: v.squeeze(0) for k, v in encoding.items()}
        item["labels"] = torch.tensor(label, dtype=torch.long)
        return item


In [None]:
def make_loader(dataset, batch_size, shuffle=False):
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [None]:
# ================== TRAIN / VAL + CLASS WEIGHT ==================
train_sub, val_sub = train_test_split(
    train_df,
    test_size=0.2,
    random_state=42,
    stratify=train_df[LABEL_COL],
)

In [None]:

classes = np.sort(train_df[LABEL_COL].unique())
num_labels = len(classes)
print("Classes:", classes)
print("Number of labels:", num_labels)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=train_sub[LABEL_COL].values,
)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print("Class weights:", class_weights)


Classes: [0 1 2]
Number of labels: 3
Class weights: tensor([0.9732, 0.8180, 1.3333])


In [None]:
# datasets
train_dataset = TextDataset(train_sub, TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
val_dataset   = TextDataset(val_sub,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)
test_dataset  = TextDataset(test_df,   TEXT_COL, LABEL_COL, tokenizer, MAX_LEN)

In [None]:
# ================== TRAIN 1 MODEL ==================
def train_one_model(
    train_loader,
    val_loader,
    num_labels,
    lr=2e-5,
    epochs=3,
    class_weights=None,
    weight_decay=0.01,
    warmup_ratio=0.1,
):
    model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    hidden_dropout_prob=0.2,
    attention_probs_dropout_prob=0.2
)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    total_steps = len(train_loader) * epochs
    num_warmup_steps = int(warmup_ratio * total_steps)

    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=total_steps,
    )

    if class_weights is not None:
        cw = class_weights.to(device)
        loss_fn = nn.CrossEntropyLoss(weight=cw)
    else:
        loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for batch in pbar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits

            loss = loss_fn(logits, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            pbar.set_postfix({"loss": total_loss / (pbar.n + 1)})

        avg_train_loss = total_loss / len(train_loader)
        print(f"[Epoch {epoch+1}] Avg train loss: {avg_train_loss:.4f}")

        model.eval()
        val_preds = []
        val_labels = []
        val_loss = 0.0

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                )
                logits = outputs.logits
                loss = loss_fn(logits, labels)
                val_loss += loss.item()

                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_macro_f1 = f1_score(val_labels, val_preds, average="macro")
        print(
            f"[Epoch {epoch+1}] Val loss: {avg_val_loss:.4f} | "
            f"Val Macro F1: {val_macro_f1:.4f}"
        )

    model.eval()
    val_preds = []
    val_labels = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    final_macro_f1 = f1_score(val_labels, val_preds, average="macro")
    return model, final_macro_f1

In [None]:
# ================== GRID SEARCH  ==================
import itertools

param_grid = {
    "lr": [7e-6, 1e-5],
    "batch_size": [16],
    "epochs": [2, 3],
    "weight_decay": [0.02, 0.03],
    "warmup_ratio": [0.06, 0.1],
}

In [None]:
best_f1 = -1.0
best_params = None
best_model = None

In [None]:
for lr, batch_size, epochs, weight_decay, warmup_ratio in itertools.product(
    param_grid["lr"],
    param_grid["batch_size"],
    param_grid["epochs"],
    param_grid["weight_decay"],
    param_grid["warmup_ratio"],
):
    print("=" * 60)
    print(
        f"Trying params: lr={lr}, batch_size={batch_size}, "
        f"epochs={epochs}, weight_decay={weight_decay}, "
        f"warmup_ratio={warmup_ratio}"
    )

    train_loader = make_loader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = make_loader(val_dataset, batch_size=batch_size, shuffle=False)

    model, val_f1 = train_one_model(
        train_loader=train_loader,
        val_loader=val_loader,
        num_labels=num_labels,
        lr=lr,
        epochs=epochs,
        class_weights=class_weights,
        weight_decay=weight_decay,
        warmup_ratio=warmup_ratio,
    )

    print(f"Params -> Val Macro F1 = {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_params = {
            "lr": lr,
            "batch_size": batch_size,
            "epochs": epochs,
            "weight_decay": weight_decay,
            "warmup_ratio": warmup_ratio,
        }
        best_model = model

print("=" * 60)
print("Best params:", best_params)
print("Best Val Macro F1:", best_f1)

Trying params: lr=7e-06, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.06


config.json:   0%|          | 0.00/752 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/2: 100%|██████████| 50/50 [00:09<00:00,  5.20it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1008
[Epoch 1] Val loss: 1.0894 | Val Macro F1: 0.4399


Epoch 2/2: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0783
[Epoch 2] Val loss: 1.0790 | Val Macro F1: 0.4638


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.4638
Trying params: lr=7e-06, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.1


Epoch 1/2: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0947
[Epoch 1] Val loss: 1.0831 | Val Macro F1: 0.4447


Epoch 2/2: 100%|██████████| 50/50 [00:08<00:00,  5.66it/s, loss=1.07]


[Epoch 2] Avg train loss: 1.0740
[Epoch 2] Val loss: 1.0708 | Val Macro F1: 0.5357


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5357
Trying params: lr=7e-06, batch_size=16, epochs=2, weight_decay=0.03, warmup_ratio=0.06


Epoch 1/2: 100%|██████████| 50/50 [00:08<00:00,  5.60it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0954
[Epoch 1] Val loss: 1.0771 | Val Macro F1: 0.4844


Epoch 2/2: 100%|██████████| 50/50 [00:08<00:00,  5.60it/s, loss=1.05]


[Epoch 2] Avg train loss: 1.0546
[Epoch 2] Val loss: 1.0549 | Val Macro F1: 0.7173


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7173
Trying params: lr=7e-06, batch_size=16, epochs=2, weight_decay=0.03, warmup_ratio=0.1


Epoch 1/2: 100%|██████████| 50/50 [00:08<00:00,  5.60it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0953
[Epoch 1] Val loss: 1.0860 | Val Macro F1: 0.4348


Epoch 2/2: 100%|██████████| 50/50 [00:08<00:00,  5.56it/s, loss=1.08]


[Epoch 2] Avg train loss: 1.0760
[Epoch 2] Val loss: 1.0771 | Val Macro F1: 0.5323


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5323
Trying params: lr=7e-06, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.47it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0968
[Epoch 1] Val loss: 1.0792 | Val Macro F1: 0.4425


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.44it/s, loss=1.05]


[Epoch 2] Avg train loss: 1.0550
[Epoch 2] Val loss: 1.0381 | Val Macro F1: 0.7652


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.43it/s, loss=0.995]


[Epoch 3] Avg train loss: 0.9947
[Epoch 3] Val loss: 0.9923 | Val Macro F1: 0.7765


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7765
Trying params: lr=7e-06, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.1


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.44it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0948
[Epoch 1] Val loss: 1.0782 | Val Macro F1: 0.4593


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.34it/s, loss=1.05]


[Epoch 2] Avg train loss: 1.0469
[Epoch 2] Val loss: 1.0242 | Val Macro F1: 0.7768


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.28it/s, loss=0.97]


[Epoch 3] Avg train loss: 0.9704
[Epoch 3] Val loss: 0.9590 | Val Macro F1: 0.8113


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.8113
Trying params: lr=7e-06, batch_size=16, epochs=3, weight_decay=0.03, warmup_ratio=0.06


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.30it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0973
[Epoch 1] Val loss: 1.0805 | Val Macro F1: 0.3153


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, loss=1.04]


[Epoch 2] Avg train loss: 1.0420
[Epoch 2] Val loss: 1.0104 | Val Macro F1: 0.6253


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.42it/s, loss=0.951]


[Epoch 3] Avg train loss: 0.9510
[Epoch 3] Val loss: 0.9202 | Val Macro F1: 0.7085


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7085
Trying params: lr=7e-06, batch_size=16, epochs=3, weight_decay=0.03, warmup_ratio=0.1


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0931
[Epoch 1] Val loss: 1.0789 | Val Macro F1: 0.6114


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.39it/s, loss=1.04]


[Epoch 2] Avg train loss: 1.0389
[Epoch 2] Val loss: 1.0125 | Val Macro F1: 0.7239


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, loss=0.947]


[Epoch 3] Avg train loss: 0.9473
[Epoch 3] Val loss: 0.9292 | Val Macro F1: 0.7702


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7702
Trying params: lr=1e-05, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/2: 100%|██████████| 50/50 [00:09<00:00,  5.39it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0940
[Epoch 1] Val loss: 1.0685 | Val Macro F1: 0.5384


Epoch 2/2: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, loss=1.03]


[Epoch 2] Avg train loss: 1.0260
[Epoch 2] Val loss: 1.0248 | Val Macro F1: 0.7245


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7245
Trying params: lr=1e-05, batch_size=16, epochs=2, weight_decay=0.02, warmup_ratio=0.1


Epoch 1/2: 100%|██████████| 50/50 [00:09<00:00,  5.35it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0954
[Epoch 1] Val loss: 1.0764 | Val Macro F1: 0.3411


Epoch 2/2: 100%|██████████| 50/50 [00:09<00:00,  5.35it/s, loss=1.04]


[Epoch 2] Avg train loss: 1.0424
[Epoch 2] Val loss: 1.0337 | Val Macro F1: 0.5838


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5838
Trying params: lr=1e-05, batch_size=16, epochs=2, weight_decay=0.03, warmup_ratio=0.06


Epoch 1/2: 100%|██████████| 50/50 [00:09<00:00,  5.36it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0862
[Epoch 1] Val loss: 1.0618 | Val Macro F1: 0.3677


Epoch 2/2: 100%|██████████| 50/50 [00:09<00:00,  5.40it/s, loss=1.02]


[Epoch 2] Avg train loss: 1.0172
[Epoch 2] Val loss: 0.9944 | Val Macro F1: 0.5704


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.5704
Trying params: lr=1e-05, batch_size=16, epochs=2, weight_decay=0.03, warmup_ratio=0.1


Epoch 1/2: 100%|██████████| 50/50 [00:09<00:00,  5.43it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.0958
[Epoch 1] Val loss: 1.0713 | Val Macro F1: 0.6271


Epoch 2/2: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, loss=1.04]


[Epoch 2] Avg train loss: 1.0410
[Epoch 2] Val loss: 1.0267 | Val Macro F1: 0.7165


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7165
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.06


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, loss=1.08]


[Epoch 1] Avg train loss: 1.0834
[Epoch 1] Val loss: 1.0637 | Val Macro F1: 0.5329


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.36it/s, loss=0.97]


[Epoch 2] Avg train loss: 0.9699
[Epoch 2] Val loss: 0.8275 | Val Macro F1: 0.8260


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, loss=0.706]


[Epoch 3] Avg train loss: 0.7062
[Epoch 3] Val loss: 0.6294 | Val Macro F1: 0.8036


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.8036
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.02, warmup_ratio=0.1


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.42it/s, loss=1.09]


[Epoch 1] Avg train loss: 1.0884
[Epoch 1] Val loss: 1.0674 | Val Macro F1: 0.5033


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.36it/s, loss=0.978]


[Epoch 2] Avg train loss: 0.9780
[Epoch 2] Val loss: 0.8206 | Val Macro F1: 0.7454


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.35it/s, loss=0.719]


[Epoch 3] Avg train loss: 0.7185
[Epoch 3] Val loss: 0.6346 | Val Macro F1: 0.7966
Params -> Val Macro F1 = 0.7966
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.03, warmup_ratio=0.06


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, loss=1.07]


[Epoch 1] Avg train loss: 1.0740
[Epoch 1] Val loss: 1.0340 | Val Macro F1: 0.5176


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.40it/s, loss=0.852]


[Epoch 2] Avg train loss: 0.8520
[Epoch 2] Val loss: 0.6364 | Val Macro F1: 0.7665


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.41it/s, loss=0.59]


[Epoch 3] Avg train loss: 0.5905
[Epoch 3] Val loss: 0.5402 | Val Macro F1: 0.7777


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Params -> Val Macro F1 = 0.7777
Trying params: lr=1e-05, batch_size=16, epochs=3, weight_decay=0.03, warmup_ratio=0.1


Epoch 1/3: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, loss=1.1]


[Epoch 1] Avg train loss: 1.1017
[Epoch 1] Val loss: 1.0662 | Val Macro F1: 0.2266


Epoch 2/3: 100%|██████████| 50/50 [00:09<00:00,  5.34it/s, loss=0.961]


[Epoch 2] Avg train loss: 0.9611
[Epoch 2] Val loss: 0.7562 | Val Macro F1: 0.8015


Epoch 3/3: 100%|██████████| 50/50 [00:09<00:00,  5.36it/s, loss=0.66]


[Epoch 3] Avg train loss: 0.6601
[Epoch 3] Val loss: 0.5718 | Val Macro F1: 0.7782
Params -> Val Macro F1 = 0.7782
Best params: {'lr': 7e-06, 'batch_size': 16, 'epochs': 3, 'weight_decay': 0.02, 'warmup_ratio': 0.1}
Best Val Macro F1: 0.8113414456426699


In [None]:
def evaluate_on_loader(model, loader, name=""):
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            y = batch["labels"].to(device)

            logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            ).logits

            p = torch.argmax(logits, dim=1)

            preds.extend(p.cpu().numpy())
            labels.extend(y.cpu().numpy())

    print(f"\n=== Performance on {name} ===")
    print(classification_report(labels, preds, digits=4))
    macro_f1 = f1_score(labels, preds, average="macro")
    print("Macro F1:", macro_f1)
    return labels, preds, macro_f1

In [None]:
batch_size = best_params["batch_size"]

train_loader_dbg = make_loader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False
)

test_loader = make_loader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# Train_sub
train_labels, train_preds, train_f1 = evaluate_on_loader(
    best_model, train_loader_dbg, name="TRAIN_SUB"
)

# Test_set
test_labels, test_preds, test_f1 = evaluate_on_loader(
    best_model, test_loader, name="TEST_SET"
)

print("\nConfusion matrix (TEST_SET):")
cm = confusion_matrix(test_labels, test_preds)
print(cm)


=== Performance on TRAIN_SUB ===
              precision    recall  f1-score   support

           0     0.7891    0.9015    0.8416       274
           1     0.7966    0.7086    0.7500       326
           2     0.8122    0.8000    0.8060       200

    accuracy                         0.7975       800
   macro avg     0.7993    0.8033    0.7992       800
weighted avg     0.7979    0.7975    0.7954       800

Macro F1: 0.7992042104540441

=== Performance on TEST_SET ===
              precision    recall  f1-score   support

           0     0.7917    0.8962    0.8407       106
           1     0.8170    0.7669    0.7911       163
           2     0.6596    0.6078    0.6327        51

    accuracy                         0.7844       320
   macro avg     0.7561    0.7570    0.7548       320
weighted avg     0.7835    0.7844    0.7823       320

Macro F1: 0.7548334221108629

Confusion matrix (TEST_SET):
[[ 95   9   2]
 [ 24 125  14]
 [  1  19  31]]
