### Best Model Checkpoint Drive Link: ([BERT](https://drive.google.com/file/d/1AAQlwZGk0XEMPaMdJWMctXTqUVCKr5pm/view?usp=drive_link) and [Mental-BERT](https://drive.google.com/file/d/1R-WApOMkNu-Hm1IAQ8frI83dJT_A3jgh/view?usp=drive_link)).

In [7]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import f1_score
from tqdm import tqdm
from collections import Counter

# Hyperparameters and Global Variables

In [13]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5

LABELS = ["Lack of Interest", "Feeling Down", "Eating Disorder",
          "Sleeping Disorder", "Low Self-Esteem", "Concentration Problem", "Self-Harm"]
LABEL_MAP = {label: i for i, label in enumerate(LABELS)}
NUM_CLASSES = len(LABELS)

# Dataset Class

In [3]:
class DepressionDataset(Dataset):
    def __init__(self, data, tokenizer, label_map, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.label_map = label_map
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        text = sample["ocr_text"]
        labels = sample["meme_depressive_categories"]

        # Convert label list to one-hot encoding
        label_tensor = torch.zeros(len(self.label_map))
        for label in labels:
            label_tensor[self.label_map[label]] = 1

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": label_tensor,
        }

# Dataloading and Preprocessing

In [5]:
train_data = json.load(open("/kaggle/input/depression-dataset/depression_train.json", "r"))
val_data = json.load(open("/kaggle/input/depression-dataset/depression_val.json", "r"))
test_data = json.load(open("/kaggle/input/depression-dataset/depression_test.json", "r"))

In [10]:
print("Size of Training Data:", len(train_data))
print("Size of Validation Data:", len(val_data))
print("Size of Test Data:", len(test_data))

Size of Training Data: 8722
Size of Validation Data: 359
Size of Test Data: 520


In [9]:
def print_class_distribution(dataset, dataset_name):
    label_counts = Counter([LABEL_MAP[i] for item in dataset for i in item["meme_depressive_categories"]])
    print(f"Class distribution in {dataset_name} dataset:")
    for label, count in sorted(label_counts.items()):
        print(f"  {LABELS[label]}: {count}")
    print("-----------------------------------")

print_class_distribution(train_data, "Train")
print_class_distribution(val_data, "Validation")
print_class_distribution(test_data, "Test")

Class distribution in Train dataset:
  Lack of Interest: 471
  Feeling Down: 2085
  Eating Disorder: 1939
  Sleeping Disorder: 1562
  Low Self-Esteem: 855
  Concentration Problem: 595
  Self-Harm: 1516
-----------------------------------
Class distribution in Validation dataset:
  Lack of Interest: 45
  Feeling Down: 195
  Eating Disorder: 49
  Sleeping Disorder: 45
  Low Self-Esteem: 85
  Concentration Problem: 42
  Self-Harm: 61
-----------------------------------
Class distribution in Test dataset:
  Lack of Interest: 71
  Feeling Down: 218
  Eating Disorder: 92
  Sleeping Disorder: 79
  Low Self-Esteem: 114
  Concentration Problem: 66
  Self-Harm: 81
-----------------------------------


# Model Training

In [15]:
def train_model(model, model_name, train_data, val_data, epochs, model_save_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    train_dataset = DepressionDataset(train_data, tokenizer, LABEL_MAP)
    val_dataset = DepressionDataset(val_data, tokenizer, LABEL_MAP)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()

    best_val_f1 = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training Phase
        model.train()
        train_loss = 0
        train_preds, train_labels = [], []

        for batch in tqdm(train_loader):
            input_ids, attention_mask, labels = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["labels"].to(DEVICE),
            )

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, labels=labels)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(torch.sigmoid(outputs.logits).detach().cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Compute train F1 scores
        train_macro_f1, train_weighted_f1 = compute_f1_scores(train_labels, train_preds)
        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Train Macro-F1: {train_macro_f1:.4f}, Weighted-F1: {train_weighted_f1:.4f}")

        # Validation Phase
        val_loss, val_macro_f1, val_weighted_f1 = evaluate_model(model, val_loader)
        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Macro-F1: {val_macro_f1:.4f}, Weighted-F1: {val_weighted_f1:.4f}")

        f1_hm = 2 * val_macro_f1 * val_weighted_f1 / (val_macro_f1 + val_weighted_f1)

        # Save best model
        if f1_hm > best_val_f1:
            best_val_f1 = f1_hm
            torch.save(model.state_dict(), f"{model_save_name}_depression_model.pth")
            print("Best model saved!")

    return model, tokenizer

def evaluate_model(model, loader):
    model.eval()
    loss = 0
    preds, labels = [], []

    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for batch in loader:
            input_ids, attention_mask, labels_batch = (
                batch["input_ids"].to(DEVICE),
                batch["attention_mask"].to(DEVICE),
                batch["labels"].to(DEVICE),
            )

            outputs = model(input_ids, attention_mask, labels=labels_batch)
            loss += criterion(outputs.logits, labels_batch).item()

            preds.extend(torch.sigmoid(outputs.logits).cpu().numpy())
            labels.extend(labels_batch.cpu().numpy())

    macro_f1, weighted_f1 = compute_f1_scores(labels, preds)
    return loss / len(loader), macro_f1, weighted_f1

def compute_f1_scores(true_labels, pred_probs, threshold=0.5):
    true_labels = np.array(true_labels)
    pred_probs = np.array(pred_probs)
    pred_labels = (pred_probs > threshold).astype(int)

    macro_f1 = f1_score(true_labels, pred_labels, average="macro", zero_division=0)
    weighted_f1 = f1_score(true_labels, pred_labels, average="weighted", zero_division=0)
    return macro_f1, weighted_f1

## OCR + BERT Model Training

In [16]:
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES, problem_type="multi_label_classification").to(DEVICE)
model_bert, tokenizer_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "bert")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.3110
Train Macro-F1: 0.4025, Weighted-F1: 0.4765
Validation Loss: 0.3648
Validation Macro-F1: 0.5411, Weighted-F1: 0.5406
Best model saved!

Epoch 2/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.1886
Train Macro-F1: 0.7333, Weighted-F1: 0.7553
Validation Loss: 0.3551
Validation Macro-F1: 0.5465, Weighted-F1: 0.5500
Best model saved!

Epoch 3/10


100%|██████████| 546/546 [03:29<00:00,  2.60it/s]


Train Loss: 0.1323
Train Macro-F1: 0.8193, Weighted-F1: 0.8390
Validation Loss: 0.3932
Validation Macro-F1: 0.6068, Weighted-F1: 0.6143
Best model saved!

Epoch 4/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0894
Train Macro-F1: 0.8868, Weighted-F1: 0.8998
Validation Loss: 0.4475
Validation Macro-F1: 0.5812, Weighted-F1: 0.5816

Epoch 5/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0617
Train Macro-F1: 0.9295, Weighted-F1: 0.9368
Validation Loss: 0.5221
Validation Macro-F1: 0.5756, Weighted-F1: 0.5584

Epoch 6/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0461
Train Macro-F1: 0.9488, Weighted-F1: 0.9530
Validation Loss: 0.5211
Validation Macro-F1: 0.5966, Weighted-F1: 0.6079

Epoch 7/10


100%|██████████| 546/546 [03:28<00:00,  2.62it/s]


Train Loss: 0.0387
Train Macro-F1: 0.9589, Weighted-F1: 0.9616
Validation Loss: 0.5836
Validation Macro-F1: 0.5670, Weighted-F1: 0.5759

Epoch 8/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0318
Train Macro-F1: 0.9655, Weighted-F1: 0.9680
Validation Loss: 0.5709
Validation Macro-F1: 0.5951, Weighted-F1: 0.5892

Epoch 9/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0276
Train Macro-F1: 0.9677, Weighted-F1: 0.9709
Validation Loss: 0.6540
Validation Macro-F1: 0.5677, Weighted-F1: 0.5632

Epoch 10/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0239
Train Macro-F1: 0.9725, Weighted-F1: 0.9744
Validation Loss: 0.6326
Validation Macro-F1: 0.5827, Weighted-F1: 0.5903


## OCR + BERT Model Evaluation

In [19]:
test_dataset_bert = DepressionDataset(test_data, tokenizer_bert, LABEL_MAP)
test_loader_bert = DataLoader(test_dataset_bert, batch_size=BATCH_SIZE, shuffle=False)

model_bert.load_state_dict(torch.load("bert_depression_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_bert, test_loader_bert)

print(f"Final Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")

Final Test Evaluation:
Test Loss: 0.3503
Test Macro-F1: 0.6355
Test Weighted-F1: 0.6347


## OCR + Mental-BERT Model Training

In [21]:
model_name = "mental/mental-bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=NUM_CLASSES, problem_type="multi_label_classification").to(DEVICE)
model_mental_bert, tokenizer_mental_bert = train_model(model, model_name, train_data, val_data, EPOCHS, "mental_bert")

config.json:   0%|          | 0.00/639 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/321 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


Epoch 1/10


100%|██████████| 546/546 [03:30<00:00,  2.60it/s]


Train Loss: 0.2761
Train Macro-F1: 0.5307, Weighted-F1: 0.5681
Validation Loss: 0.3870
Validation Macro-F1: 0.5568, Weighted-F1: 0.5435
Best model saved!

Epoch 2/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.1745
Train Macro-F1: 0.7519, Weighted-F1: 0.7705
Validation Loss: 0.3960
Validation Macro-F1: 0.5722, Weighted-F1: 0.5565
Best model saved!

Epoch 3/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.1228
Train Macro-F1: 0.8406, Weighted-F1: 0.8541
Validation Loss: 0.4455
Validation Macro-F1: 0.5944, Weighted-F1: 0.5729
Best model saved!

Epoch 4/10


100%|██████████| 546/546 [03:28<00:00,  2.61it/s]


Train Loss: 0.0813
Train Macro-F1: 0.9032, Weighted-F1: 0.9124
Validation Loss: 0.5153
Validation Macro-F1: 0.5796, Weighted-F1: 0.5719

Epoch 5/10


100%|██████████| 546/546 [03:28<00:00,  2.62it/s]


Train Loss: 0.0543
Train Macro-F1: 0.9384, Weighted-F1: 0.9442
Validation Loss: 0.4937
Validation Macro-F1: 0.5966, Weighted-F1: 0.6014
Best model saved!

Epoch 6/10


100%|██████████| 546/546 [03:28<00:00,  2.61it/s]


Train Loss: 0.0428
Train Macro-F1: 0.9527, Weighted-F1: 0.9565
Validation Loss: 0.5180
Validation Macro-F1: 0.5977, Weighted-F1: 0.6065
Best model saved!

Epoch 7/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0352
Train Macro-F1: 0.9618, Weighted-F1: 0.9636
Validation Loss: 0.5967
Validation Macro-F1: 0.5766, Weighted-F1: 0.5587

Epoch 8/10


100%|██████████| 546/546 [03:28<00:00,  2.62it/s]


Train Loss: 0.0267
Train Macro-F1: 0.9711, Weighted-F1: 0.9730
Validation Loss: 0.5951
Validation Macro-F1: 0.5902, Weighted-F1: 0.5906

Epoch 9/10


100%|██████████| 546/546 [03:28<00:00,  2.62it/s]


Train Loss: 0.0232
Train Macro-F1: 0.9756, Weighted-F1: 0.9766
Validation Loss: 0.5983
Validation Macro-F1: 0.6035, Weighted-F1: 0.6112
Best model saved!

Epoch 10/10


100%|██████████| 546/546 [03:29<00:00,  2.61it/s]


Train Loss: 0.0203
Train Macro-F1: 0.9770, Weighted-F1: 0.9783
Validation Loss: 0.6248
Validation Macro-F1: 0.5866, Weighted-F1: 0.6004


## OCR + Mental-BERT Model Evaluation

In [22]:
test_dataset_mental_bert = DepressionDataset(test_data, tokenizer_mental_bert, LABEL_MAP)
test_loader_mental_bert = DataLoader(test_dataset_mental_bert, batch_size=BATCH_SIZE, shuffle=False)

model_mental_bert.load_state_dict(torch.load("mental_bert_depression_model.pth", weights_only=True))
test_loss, test_macro_f1, test_weighted_f1 = evaluate_model(model_mental_bert, test_loader_mental_bert)

print(f"\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Macro-F1: {test_macro_f1:.4f}")
print(f"Test Weighted-F1: {test_weighted_f1:.4f}")


Final Test Evaluation:
Test Loss: 0.5416
Test Macro-F1: 0.6313
Test Weighted-F1: 0.6249
