### Best Model Checkpoint Drive Link: ([BERT](https://drive.google.com/file/d/1NGBUWxXf8YeYLP4f5QpUPyaBLhbVfFhJ/view?usp=drive_link) and [Mental-BERT](https://drive.google.com/file/d/1tTRBfuf99F8jK11G0Q08k_MxIFvXvPfD/view?usp=drive_link)).

In [1]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import f1_score
from tqdm import tqdm
from collections import Counter
import math

# Hyperparameters and Global Variables

### We use 70:10:20 train:val:test split as specified in the paper

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
EPOCHS = 10
LR = 2e-5
TRAIN_RATIO = 0.7  # 70% train
VAL_RATIO = 0.1  # 10% validation
TEST_RATIO = 0.2 # 20% test

LABELS = ["Nervousness", "Lack of Worry Control", "Excessive Worry", 
          "Difficulty Relaxing", "Restlessness", "Impending Doom"]
LABEL_MAP = {label: i for i, label in enumerate(LABELS)}
NUM_CLASSES = len(LABELS)

# Dataset Class

In [3]:
class AnxietyDataset(Dataset):
    def __init__(self, data, tokenizer, label_map, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.label_map = label_map
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        text = sample["ocr_text"]
        label = self.label_map[sample["meme_anxiety_category"]]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

# Dataloading and Preprocessing

In [4]:
full_train_data = json.load(open("/kaggle/input/anxiety-dataset/anxiety_train.json", "r"))
test_data = json.load(open("/kaggle/input/anxiety-dataset/anxiety_test.json", "r"))

labels = [LABEL_MAP[item["meme_anxiety_category"]] for item in full_train_data]

In [5]:
# train-val split

train_size = math.ceil(len(full_train_data) * TRAIN_RATIO / (TRAIN_RATIO + VAL_RATIO))
train_data, val_data = train_test_split(
    full_train_data, train_size=train_size, stratify=labels, random_state=42
)

In [6]:
def print_class_distribution(dataset, dataset_name):
    label_counts = Counter([LABEL_MAP[item["meme_anxiety_category"]] for item in dataset])
    print(f"Class distribution in {dataset_name} dataset:")
    for label, count in sorted(label_counts.items()):
        print(f"  {LABELS[label]}: {count}")
    print("-----------------------------------")

print_class_distribution(train_data, "Train")
print_class_distribution(val_data, "Validation")
print_class_distribution(test_data, "Test")

Class distribution in Train dataset:
  Nervousness: 373
  Lack of Worry Control: 331
  Excessive Worry: 322
  Difficulty Relaxing: 356
  Restlessness: 405
  Impending Doom: 366
-----------------------------------
Class distribution in Validation dataset:
  Nervousness: 53
  Lack of Worry Control: 47
  Excessive Worry: 46
  Difficulty Relaxing: 51
  Restlessness: 58
  Impending Doom: 52
-----------------------------------
Class distribution in Test dataset:
  Nervousness: 106
  Lack of Worry Control: 94
  Excessive Worry: 92
  Difficulty Relaxing: 102
  Restlessness: 116
  Impending Doom: 105
-----------------------------------


# Model Training

In [18]:
def train_model(model, model_name, train_data, val_data, epochs, model_save_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    optimizer = optim.AdamW(model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()

    train_dataset = AnxietyDataset(train_data, tokenizer, LABEL_MAP)
    val_dataset = AnxietyDataset(val_data, tokenizer, LABEL_MAP)
    
    print("Train Set Size:", len(train_dataset))
    print("Validation Set Size:", len(val_dataset))
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    best_f1 = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training Phase
        model.train()
        train_preds, train_labels = [], []
        train_loss = 0

        for batch in tqdm(train_loader):
            input_ids, attention_mask, labels = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["label"].to(DEVICE),
            )

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Compute train F1 scores
        train_macro_f1 = f1_score(train_labels, train_preds, average="macro")
        train_weighted_f1 = f1_score(train_labels, train_preds, average="weighted")

        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Train Macro-F1: {train_macro_f1:.4f}, Weighted-F1: {train_weighted_f1:.4f}")

        # Validation Phase
        val_loss, val_macro_f1, val_weighted_f1 = evaluate_model(model, val_loader)
        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Macro-F1: {val_macro_f1:.4f}, Weighted-F1: {val_weighted_f1:.4f}")

        f1_hm = 2 * val_macro_f1 * val_weighted_f1 / (val_macro_f1 + val_weighted_f1)

        # Save best model
        if f1_hm > best_f1:
            best_f1 = f1_hm
            torch.save(model.state_dict(), f"{model_save_name}_anxiety_model.pth")
            print("Best model saved!")

    return model, tokenizer

def evaluate_model(model, loader):
    model.eval()
    preds, labels = [], []
    loss = 0

    with torch.no_grad():
        for batch in loader:
            input_ids, attention_mask, labels_batch = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["label"].to(DEVICE),
            )

            outputs = model(input_ids, attention_mask, labels=labels_batch)
            loss += outputs.loss.item()

            preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
            labels.extend(labels_batch.cpu().numpy())

    macro_f1 = f1_score(labels, preds, average="macro")
    weighted_f1 = f1_score(labels, preds, average="weighted")

    return loss / len(loader), macro_f1, weighted_f1

## OCR + BERT Model Training

In [8]:
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES).to(DEVICE)
model_bert, tokenizer_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "bert")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train Set Size: 2153
Validation Set Size: 307

Epoch 1/10


100%|██████████| 135/135 [00:51<00:00,  2.60it/s]


Train Loss: 1.6225
Train Macro-F1: 0.3323, Weighted-F1: 0.3356
Validation Loss: 1.3673
Validation Macro-F1: 0.4738, Weighted-F1: 0.4769
Best model saved!

Epoch 2/10


100%|██████████| 135/135 [00:50<00:00,  2.68it/s]


Train Loss: 1.1457
Train Macro-F1: 0.6061, Weighted-F1: 0.6074
Validation Loss: 1.0931
Validation Macro-F1: 0.5934, Weighted-F1: 0.5931
Best model saved!

Epoch 3/10


100%|██████████| 135/135 [00:51<00:00,  2.65it/s]


Train Loss: 0.7792
Train Macro-F1: 0.7548, Weighted-F1: 0.7556
Validation Loss: 1.0520
Validation Macro-F1: 0.6285, Weighted-F1: 0.6257
Best model saved!

Epoch 4/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.4366
Train Macro-F1: 0.8778, Weighted-F1: 0.8788
Validation Loss: 1.1325
Validation Macro-F1: 0.6252, Weighted-F1: 0.6241

Epoch 5/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.2542
Train Macro-F1: 0.9356, Weighted-F1: 0.9365
Validation Loss: 1.2089
Validation Macro-F1: 0.6486, Weighted-F1: 0.6436
Best model saved!

Epoch 6/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.1527
Train Macro-F1: 0.9649, Weighted-F1: 0.9652
Validation Loss: 1.3599
Validation Macro-F1: 0.5943, Weighted-F1: 0.5928

Epoch 7/10


100%|██████████| 135/135 [00:50<00:00,  2.65it/s]


Train Loss: 0.1119
Train Macro-F1: 0.9751, Weighted-F1: 0.9754
Validation Loss: 1.4118
Validation Macro-F1: 0.6313, Weighted-F1: 0.6289

Epoch 8/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0881
Train Macro-F1: 0.9806, Weighted-F1: 0.9810
Validation Loss: 1.4800
Validation Macro-F1: 0.6351, Weighted-F1: 0.6330

Epoch 9/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0697
Train Macro-F1: 0.9808, Weighted-F1: 0.9810
Validation Loss: 1.7132
Validation Macro-F1: 0.6031, Weighted-F1: 0.5997

Epoch 10/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0590
Train Macro-F1: 0.9836, Weighted-F1: 0.9837
Validation Loss: 1.5479
Validation Macro-F1: 0.6254, Weighted-F1: 0.6238


## OCR + BERT Model Evaluation

In [12]:
test_dataset_bert = AnxietyDataset(test_data, tokenizer_bert, LABEL_MAP)
print("Test Set Size:", len(test_dataset_bert))
test_loader_bert = DataLoader(test_dataset_bert, batch_size=BATCH_SIZE, shuffle=False)

model_bert.load_state_dict(torch.load("bert_anxiety_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_bert, test_loader_bert)

print(f"\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")

Test Set Size: 615

Final Test Evaluation:
Test Loss: 1.2925
Test Macro-F1: 0.6163
Test Weighted-F1: 0.6143


## OCR + Mental-BERT Model Training

In [14]:
model_name = "mental/mental-bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES).to(DEVICE)
model_mental_bert, tokenizer_mental_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "mental_bert")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train Set Size: 2153
Validation Set Size: 307

Epoch 1/10


100%|██████████| 135/135 [00:52<00:00,  2.59it/s]


Train Loss: 1.5044
Train Macro-F1: 0.4281, Weighted-F1: 0.4288
Validation Loss: 1.1532
Validation Macro-F1: 0.5849, Weighted-F1: 0.5821
Best model saved!

Epoch 2/10


100%|██████████| 135/135 [00:50<00:00,  2.67it/s]


Train Loss: 0.9034
Train Macro-F1: 0.6969, Weighted-F1: 0.6958
Validation Loss: 0.9703
Validation Macro-F1: 0.6476, Weighted-F1: 0.6450
Best model saved!

Epoch 3/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.5224
Train Macro-F1: 0.8423, Weighted-F1: 0.8418
Validation Loss: 1.0567
Validation Macro-F1: 0.6361, Weighted-F1: 0.6305

Epoch 4/10


100%|██████████| 135/135 [00:50<00:00,  2.67it/s]


Train Loss: 0.2471
Train Macro-F1: 0.9356, Weighted-F1: 0.9355
Validation Loss: 1.1351
Validation Macro-F1: 0.6473, Weighted-F1: 0.6437

Epoch 5/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.1420
Train Macro-F1: 0.9645, Weighted-F1: 0.9647
Validation Loss: 1.3002
Validation Macro-F1: 0.6335, Weighted-F1: 0.6311

Epoch 6/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0830
Train Macro-F1: 0.9817, Weighted-F1: 0.9819
Validation Loss: 1.4449
Validation Macro-F1: 0.6310, Weighted-F1: 0.6270

Epoch 7/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0769
Train Macro-F1: 0.9823, Weighted-F1: 0.9823
Validation Loss: 1.3914
Validation Macro-F1: 0.6315, Weighted-F1: 0.6280

Epoch 8/10


100%|██████████| 135/135 [00:50<00:00,  2.67it/s]


Train Loss: 0.0535
Train Macro-F1: 0.9841, Weighted-F1: 0.9842
Validation Loss: 1.4877
Validation Macro-F1: 0.6149, Weighted-F1: 0.6110

Epoch 9/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0503
Train Macro-F1: 0.9866, Weighted-F1: 0.9865
Validation Loss: 1.5840
Validation Macro-F1: 0.6055, Weighted-F1: 0.6023

Epoch 10/10


100%|██████████| 135/135 [00:50<00:00,  2.66it/s]


Train Loss: 0.0376
Train Macro-F1: 0.9874, Weighted-F1: 0.9875
Validation Loss: 1.6814
Validation Macro-F1: 0.6217, Weighted-F1: 0.6176


## OCR + Mental-BERT Model Evaluation

In [15]:
test_dataset_mental_bert = AnxietyDataset(test_data, tokenizer_mental_bert, LABEL_MAP)
print("Test Set Size:", len(test_dataset_mental_bert))
test_loader_mental_bert = DataLoader(test_dataset_mental_bert, batch_size=BATCH_SIZE, shuffle=False)

model_mental_bert.load_state_dict(torch.load("mental_bert_anxiety_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_mental_bert, test_loader_mental_bert)

print(f"\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")

Test Set Size: 615

Final Test Evaluation:
Test Loss: 1.0819
Test Macro-F1: 0.6235
Test Weighted-F1: 0.6232
