### Best Model Checkpoint Drive Link: ([Mental-BERT LLAVA](https://drive.google.com/file/d/1ISBSZzCrlp5lDtW7Idnxjk4bfaY_DRVd/view?usp=sharing)).

In [1]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import f1_score
from tqdm import tqdm
from collections import Counter

# Hyperparameters and Global Variables

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5

LABELS = ["Lack of Interest", "Feeling Down", "Eating Disorder",
          "Sleeping Disorder", "Low Self-Esteem", "Concentration Problem", "Self-Harm"]
LABEL_MAP = {label: i for i, label in enumerate(LABELS)}
NUM_CLASSES = len(LABELS)

# Dataset Class

In [3]:
class DepressionDataset(Dataset):
    def __init__(self, data, tokenizer, label_map, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.label_map = label_map
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        text = sample["ocr_text"] + " " + sample["figurative_reasoning"]
        labels = sample["meme_depressive_categories"]

        # Convert label list to one-hot encoding
        label_tensor = torch.zeros(len(self.label_map))
        for label in labels:
            label_tensor[self.label_map[label]] = 1

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": label_tensor,
        }

# Dataloading and Preprocessing

In [4]:
train_data = json.load(open("/kaggle/input/llava-data/depression_train_llava_dataset.json", "r"))
val_data = json.load(open("/kaggle/input/llava-data/depression_val_llava_dataset.json", "r"))
test_data = json.load(open("/kaggle/input/llava-data/depression_test_llava_dataset.json", "r"))

In [5]:
print("Size of Training Data:", len(train_data))
print("Size of Validation Data:", len(val_data))
print("Size of Test Data:", len(test_data))

Size of Training Data: 8722
Size of Validation Data: 359
Size of Test Data: 520


In [6]:
def print_class_distribution(dataset, dataset_name):
    label_counts = Counter([LABEL_MAP[i] for item in dataset for i in item["meme_depressive_categories"]])
    print(f"Class distribution in {dataset_name} dataset:")
    for label, count in sorted(label_counts.items()):
        print(f"  {LABELS[label]}: {count}")
    print("-----------------------------------")

print_class_distribution(train_data, "Train")
print_class_distribution(val_data, "Validation")
print_class_distribution(test_data, "Test")

Class distribution in Train dataset:
  Lack of Interest: 471
  Feeling Down: 2085
  Eating Disorder: 1939
  Sleeping Disorder: 1562
  Low Self-Esteem: 855
  Concentration Problem: 595
  Self-Harm: 1516
-----------------------------------
Class distribution in Validation dataset:
  Lack of Interest: 45
  Feeling Down: 195
  Eating Disorder: 49
  Sleeping Disorder: 45
  Low Self-Esteem: 85
  Concentration Problem: 42
  Self-Harm: 61
-----------------------------------
Class distribution in Test dataset:
  Lack of Interest: 71
  Feeling Down: 218
  Eating Disorder: 92
  Sleeping Disorder: 79
  Low Self-Esteem: 114
  Concentration Problem: 66
  Self-Harm: 81
-----------------------------------


# Model Training

In [7]:
def train_model(model, model_name, train_data, val_data, epochs, model_save_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    train_dataset = DepressionDataset(train_data, tokenizer, LABEL_MAP)
    val_dataset = DepressionDataset(val_data, tokenizer, LABEL_MAP)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()

    best_val_f1 = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training Phase
        model.train()
        train_loss = 0
        train_preds, train_labels = [], []

        for batch in tqdm(train_loader):
            input_ids, attention_mask, labels = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["labels"].to(DEVICE),
            )

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, labels=labels)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(torch.sigmoid(outputs.logits).detach().cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Compute train F1 scores
        train_macro_f1, train_weighted_f1 = compute_f1_scores(train_labels, train_preds)
        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Train Macro-F1: {train_macro_f1:.4f}, Weighted-F1: {train_weighted_f1:.4f}")

        # Validation Phase
        val_loss, val_macro_f1, val_weighted_f1 = evaluate_model(model, val_loader)
        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Macro-F1: {val_macro_f1:.4f}, Weighted-F1: {val_weighted_f1:.4f}")

        f1_hm = 2 * val_macro_f1 * val_weighted_f1 / (val_macro_f1 + val_weighted_f1)

        # Save best model
        if f1_hm > best_val_f1:
            best_val_f1 = f1_hm
            torch.save(model.state_dict(), f"{model_save_name}_depression_model.pth")
            print("Best model saved!")

    return model, tokenizer

def evaluate_model(model, loader):
    model.eval()
    loss = 0
    preds, labels = [], []

    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for batch in loader:
            input_ids, attention_mask, labels_batch = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["labels"].to(DEVICE),
            )

            outputs = model(input_ids, attention_mask, labels=labels_batch)
            loss += criterion(outputs.logits, labels_batch).item()

            preds.extend(torch.sigmoid(outputs.logits).cpu().numpy())
            labels.extend(labels_batch.cpu().numpy())

    macro_f1, weighted_f1 = compute_f1_scores(labels, preds)
    return loss / len(loader), macro_f1, weighted_f1

def compute_f1_scores(true_labels, pred_probs, threshold=0.5):
    true_labels = np.array(true_labels)
    pred_probs = np.array(pred_probs)
    pred_labels = (pred_probs > threshold).astype(int)

    macro_f1 = f1_score(true_labels, pred_labels, average="macro", zero_division=0)
    weighted_f1 = f1_score(true_labels, pred_labels, average="weighted", zero_division=0)
    return macro_f1, weighted_f1

## OCR + Mental-BERT Model Training

In [9]:
model_name = "mental/mental-bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES, problem_type="multi_label_classification").to(DEVICE)
model_mental_bert, tokenizer_mental_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "mental_bert_llava")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/10


100%|██████████| 546/546 [14:47<00:00,  1.62s/it]


Train Loss: 0.2885
Train Macro-F1: 0.4916, Weighted-F1: 0.5307
Validation Loss: 0.3607
Validation Macro-F1: 0.5548, Weighted-F1: 0.5101
Best model saved!

Epoch 2/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.1788
Train Macro-F1: 0.7479, Weighted-F1: 0.7664
Validation Loss: 0.3771
Validation Macro-F1: 0.5709, Weighted-F1: 0.5616
Best model saved!

Epoch 3/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.1262
Train Macro-F1: 0.8309, Weighted-F1: 0.8456
Validation Loss: 0.4151
Validation Macro-F1: 0.5814, Weighted-F1: 0.5484

Epoch 4/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0553
Train Macro-F1: 0.9388, Weighted-F1: 0.9431
Validation Loss: 0.4963
Validation Macro-F1: 0.5898, Weighted-F1: 0.5852
Best model saved!

Epoch 5/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0504
Train Macro-F1: 0.9485, Weighted-F1: 0.9493
Validation Loss: 0.5656
Validation Macro-F1: 0.5523, Weighted-F1: 0.5539
Best model saved!

Epoch 6/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0476
Train Macro-F1: 0.9523, Weighted-F1: 0.9564
Validation Loss: 0.6182
Validation Macro-F1: 0.5473, Weighted-F1: 0.5459

Epoch 7/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0427
Train Macro-F1: 0.9645, Weighted-F1: 0.9640
Validation Loss: 0.8724
Validation Macro-F1: 0.5111, Weighted-F1: 0.5134

Epoch 8/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0419
Train Macro-F1: 0.9624, Weighted-F1: 0.9623
Validation Loss: 0.9757
Validation Macro-F1: 0.4995, Weighted-F1: 0.4908

Epoch 9/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0379
Train Macro-F1: 0.9715, Weighted-F1: 0.9713
Validation Loss: 0.9824
Validation Macro-F1: 0.5282, Weighted-F1: 0.5201

Epoch 10/10


100%|██████████| 546/546 [14:46<00:00,  1.62s/it]


Train Loss: 0.0401
Train Macro-F1: 0.9655, Weighted-F1: 0.9665
Validation Loss: 0.9584
Validation Macro-F1: 0.5145, Weighted-F1: 0.5166


## OCR + Mental-BERT Model Evaluation

In [11]:
test_dataset_mental_bert = DepressionDataset(test_data, tokenizer_mental_bert, LABEL_MAP)
test_loader_mental_bert = DataLoader(test_dataset_mental_bert, batch_size=BATCH_SIZE, shuffle=False)

model_mental_bert.load_state_dict(torch.load("mental_bert_llava_depression_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_mental_bert, test_loader_mental_bert)

print(f"\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")

Final Test Evaluation:
Test Loss: 0.4334
Test Macro-F1: 0.6298
Test Weighted-F1: 0.6263
