### Best Model Checkpoint Drive Link: ([Mental-BERT](https://drive.google.com/file/d/1zIHYksEZFY-dE_s6yPTdhSJWXuPSEdis/view?usp=drive_link)).

In [1]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import f1_score
from tqdm import tqdm
from collections import Counter
import math

# Hyperparameters and Global Variables

### We use 70:10:20 train:val:test split as specified in the paper

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 32
EPOCHS = 10
LR = 2e-5
TRAIN_RATIO = 0.7  # 70% train
VAL_RATIO = 0.1  # 10% validation
TEST_RATIO = 0.2 # 20% test

LABELS = ["Nervousness", "Lack of Worry Control", "Excessive Worry", 
          "Difficulty Relaxing", "Restlessness", "Impending Doom"]
LABEL_MAP = {label: i for i, label in enumerate(LABELS)}
NUM_CLASSES = len(LABELS)

# Dataset Class

In [9]:
class AnxietyDataset(Dataset):
    def __init__(self, data, tokenizer, label_map, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.label_map = label_map
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        text = sample["ocr_text"] + " " + sample["figurative_reasoning"]
        label = self.label_map[sample["meme_anxiety_category"]]

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

# Dataloading and Preprocessing

In [10]:
full_train_data = json.load(open("/kaggle/input/anxiety-llava/anxiety_train_llava_dataset.json", "r"))
test_data = json.load(open("/kaggle/input/anxiety-llava/anxiety_test_llava_dataset.json", "r"))

labels = [LABEL_MAP[item["meme_anxiety_category"]] for item in full_train_data]

In [11]:
# train-val split

train_size = math.ceil(len(full_train_data) * TRAIN_RATIO / (TRAIN_RATIO + VAL_RATIO))
train_data, val_data = train_test_split(
    full_train_data, train_size=train_size, stratify=labels, random_state=42
)

In [12]:
def print_class_distribution(dataset, dataset_name):
    label_counts = Counter([LABEL_MAP[item["meme_anxiety_category"]] for item in dataset])
    print(f"Class distribution in {dataset_name} dataset:")
    for label, count in sorted(label_counts.items()):
        print(f"  {LABELS[label]}: {count}")
    print("-----------------------------------")

print_class_distribution(train_data, "Train")
print_class_distribution(val_data, "Validation")
print_class_distribution(test_data, "Test")

Class distribution in Train dataset:
  Nervousness: 373
  Lack of Worry Control: 331
  Excessive Worry: 322
  Difficulty Relaxing: 356
  Restlessness: 405
  Impending Doom: 366
-----------------------------------
Class distribution in Validation dataset:
  Nervousness: 53
  Lack of Worry Control: 47
  Excessive Worry: 46
  Difficulty Relaxing: 51
  Restlessness: 58
  Impending Doom: 52
-----------------------------------
Class distribution in Test dataset:
  Nervousness: 106
  Lack of Worry Control: 94
  Excessive Worry: 92
  Difficulty Relaxing: 102
  Restlessness: 116
  Impending Doom: 105
-----------------------------------


# Model Training

In [13]:
def train_model(model, model_name, train_data, val_data, epochs, model_save_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    optimizer = optim.AdamW(model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()

    train_dataset = AnxietyDataset(train_data, tokenizer, LABEL_MAP)
    val_dataset = AnxietyDataset(val_data, tokenizer, LABEL_MAP)
    
    print("Train Set Size:", len(train_dataset))
    print("Validation Set Size:", len(val_dataset))
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    best_f1 = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training Phase
        model.train()
        train_preds, train_labels = [], []
        train_loss = 0

        for batch in tqdm(train_loader):
            input_ids, attention_mask, labels = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["label"].to(DEVICE),
            )

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Compute train F1 scores
        train_macro_f1 = f1_score(train_labels, train_preds, average="macro")
        train_weighted_f1 = f1_score(train_labels, train_preds, average="weighted")

        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Train Macro-F1: {train_macro_f1:.4f}, Weighted-F1: {train_weighted_f1:.4f}")

        # Validation Phase
        val_loss, val_macro_f1, val_weighted_f1 = evaluate_model(model, val_loader)
        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Macro-F1: {val_macro_f1:.4f}, Weighted-F1: {val_weighted_f1:.4f}")

        f1_hm = 2 * val_macro_f1 * val_weighted_f1 / (val_macro_f1 + val_weighted_f1)

        # Save best model
        if f1_hm > best_f1:
            best_f1 = f1_hm
            torch.save(model.state_dict(), f"{model_save_name}_anxiety_model.pth")
            print("Best model saved!")

    return model, tokenizer

def evaluate_model(model, loader):
    model.eval()
    preds, labels = [], []
    loss = 0

    with torch.no_grad():
        for batch in loader:
            input_ids, attention_mask, labels_batch = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["label"].to(DEVICE),
            )

            outputs = model(input_ids, attention_mask, labels=labels_batch)
            loss += outputs.loss.item()

            preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
            labels.extend(labels_batch.cpu().numpy())

    macro_f1 = f1_score(labels, preds, average="macro")
    weighted_f1 = f1_score(labels, preds, average="weighted")

    return loss / len(loader), macro_f1, weighted_f1

## OCR + LLAVA Figurative Reasoning + Mental-BERT Model Training

In [16]:
model_name = "mental/mental-bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES).to(DEVICE)
model_mental_bert, tokenizer_mental_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "mental_bert_llava")

config.json:   0%|          | 0.00/639 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/321 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Train Set Size: 2153
Validation Set Size: 307

Epoch 1/10


100%|██████████| 68/68 [03:34<00:00,  3.15s/it]


Train Loss: 1.6380
Train Macro-F1: 0.3208, Weighted-F1: 0.3237
Validation Loss: 1.2551
Validation Macro-F1: 0.4654, Weighted-F1: 0.4625
Best model saved!

Epoch 2/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 1.1379
Train Macro-F1: 0.5809, Weighted-F1: 0.5816
Validation Loss: 0.9810
Validation Macro-F1: 0.6246, Weighted-F1: 0.6256
Best model saved!

Epoch 3/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.7922
Train Macro-F1: 0.7284, Weighted-F1: 0.7289
Validation Loss: 0.9251
Validation Macro-F1: 0.6360, Weighted-F1: 0.6353
Best model saved!

Epoch 4/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.4711
Train Macro-F1: 0.8611, Weighted-F1: 0.8614
Validation Loss: 0.9852
Validation Macro-F1: 0.6638, Weighted-F1: 0.6633
Best model saved!

Epoch 5/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.2722
Train Macro-F1: 0.9264, Weighted-F1: 0.9270
Validation Loss: 1.1405
Validation Macro-F1: 0.6612, Weighted-F1: 0.6614

Epoch 6/10


100%|██████████| 68/68 [03:41<00:00,  3.25s/it]


Train Loss: 0.1558
Train Macro-F1: 0.9578, Weighted-F1: 0.9582
Validation Loss: 1.2149
Validation Macro-F1: 0.6406, Weighted-F1: 0.6406

Epoch 7/10


100%|██████████| 68/68 [03:41<00:00,  3.25s/it]


Train Loss: 0.0997
Train Macro-F1: 0.9741, Weighted-F1: 0.9744
Validation Loss: 1.3386
Validation Macro-F1: 0.6314, Weighted-F1: 0.6284

Epoch 8/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.0565
Train Macro-F1: 0.9854, Weighted-F1: 0.9856
Validation Loss: 1.4824
Validation Macro-F1: 0.6388, Weighted-F1: 0.6369

Epoch 9/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.0515
Train Macro-F1: 0.9915, Weighted-F1: 0.9916
Validation Loss: 1.3958
Validation Macro-F1: 0.6535, Weighted-F1: 0.6518

Epoch 10/10


100%|██████████| 68/68 [03:41<00:00,  3.26s/it]


Train Loss: 0.0466
Train Macro-F1: 0.9887, Weighted-F1: 0.9889
Validation Loss: 1.4665
Validation Macro-F1: 0.6516, Weighted-F1: 0.6497


## OCR + Mental-BERT Model Evaluation

In [18]:
test_dataset_mental_bert = AnxietyDataset(test_data, tokenizer_mental_bert, LABEL_MAP)
print("Test Set Size:", len(test_dataset_mental_bert))
test_loader_mental_bert = DataLoader(test_dataset_mental_bert, batch_size=BATCH_SIZE, shuffle=False)

model_mental_bert.load_state_dict(torch.load("mental_bert_llava_anxiety_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_mental_bert, test_loader_mental_bert)

print(f"\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")

Test Set Size: 615

Final Test Evaluation:
Test Loss: 1.1704
Test Macro-F1: 0.6183
Test Weighted-F1: 0.6173
