## Dataset Download

In [1]:
!gdown 1LQoyq_ZjHJih7hxJ2yJ_OrZhOIE_BZwB

Downloading...
From (original): https://drive.google.com/uc?id=1LQoyq_ZjHJih7hxJ2yJ_OrZhOIE_BZwB
From (redirected): https://drive.google.com/uc?id=1LQoyq_ZjHJih7hxJ2yJ_OrZhOIE_BZwB&confirm=t&uuid=8aec5ebd-370f-48d8-b4b8-9474e3dd613b
To: /kaggle/working/depression_dataset_complete.zip
100%|████████████████████████████████████████| 652M/652M [00:09<00:00, 68.4MB/s]


In [2]:
!unzip depression_dataset_complete.zip > /dev/null

In [14]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import CLIPImageProcessor, CLIPVisionModel
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
from tqdm import tqdm
from PIL import Image
import json
import math

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
LR = 2e-5
FUSION_DIM = 768
LABELS = ["Lack of Interest", "Feeling Down", "Eating Disorder",
          "Sleeping Disorder", "Low Self-Esteem", "Concentration Problem", "Self-Harm"]
LABEL_MAP = {label: i for i, label in enumerate(LABELS)}
NUM_CLASSES = len(LABELS)

## Custom Dataset

In [15]:
class ImageOnlyDepressionDataset(Dataset):
    def __init__(self, data, image_path, image_processor):
        self.data = data
        self.image_processor = image_processor
        self.img_path = image_path
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]

        image_path = os.path.join(self.img_path, sample["sample_id"] + ".jpeg")
        image = Image.open(image_path).convert("RGB")
        image_tensor = self.image_processor(image, return_tensors="pt")

        labels = torch.zeros(NUM_CLASSES)
        for category in sample["meme_depressive_categories"]:
            labels[LABEL_MAP[category]] = 1.0

        return {
            "image": image_tensor,
            "label": labels
        }

def custom_collate_fn(batch):
    labels = torch.stack([item['label'] for item in batch])

    images = {}
    for key in batch[0]['image'].keys():
        if isinstance(batch[0]['image'][key], torch.Tensor):
            images[key] = torch.stack([item['image'][key].squeeze(0) for item in batch])

    return {
        'image': images,
        'label': labels,
    }

## Model Definition

In [16]:
class ImageOnlyModel(nn.Module):
    def __init__(self, num_classes=7, fusion_dim=768):
        super(ImageOnlyModel, self).__init__()

        self.vision_encoder = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
        self.vision_dim = self.vision_encoder.config.hidden_size

        fusion_dim = 1024

        self.vision_projection = nn.Linear(self.vision_dim, fusion_dim)

        self.classifier = nn.Sequential(
            nn.Linear(fusion_dim, fusion_dim),
            nn.LayerNorm(fusion_dim),
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(fusion_dim, num_classes)
        )

    def forward(self, image_features):
        vision_outputs = self.vision_encoder(**image_features)
        image_cls = vision_outputs.pooler_output

        vision_features = self.vision_projection(image_cls)

        logits = self.classifier(vision_features)

        return logits

## Training Functions

In [17]:
def train_image_only_model(model, train_data, val_data, img_path, epochs, model_save_name):
    image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

    train_dataset = ImageOnlyDepressionDataset(train_data, os.path.join(img_path, "train"), image_processor)
    val_dataset = ImageOnlyDepressionDataset(val_data, os.path.join(img_path, "val"), image_processor)

    print("Train Set Size:", len(train_dataset))
    print("Validation Set Size:", len(val_dataset))

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        collate_fn=custom_collate_fn
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=custom_collate_fn
    )

    optimizer = optim.AdamW(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=2, verbose=True
    )
    criterion = nn.BCEWithLogitsLoss()

    model = model.to(DEVICE)
    model = nn.DataParallel(model)

    best_f1 = 0
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        model.train()
        train_loss = 0
        all_train_preds, all_train_labels = [], []

        for batch in tqdm(train_loader, desc="Training"):
            labels = batch["label"].to(DEVICE)
            image_features = {k: v.to(DEVICE) for k, v in batch["image"].items()}

            optimizer.zero_grad()
            logits = model(image_features)
            loss = criterion(logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_loss += loss.item()
            with torch.no_grad():
                predictions = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()
                all_train_preds.extend(predictions)
                all_train_labels.extend(labels.cpu().numpy())

        train_loss = train_loss / len(train_loader)
        train_metrics = compute_multilabel_metrics(np.array(all_train_labels), np.array(all_train_preds))

        print(f"Train Loss: {train_loss:.4f}")
        print(f"Train Macro-F1: {train_metrics['macro_f1']:.4f}, Weighted-F1: {train_metrics['weighted_f1']:.4f}")

        val_loss, val_metrics = evaluate_image_only_model(
            model, val_loader, criterion
        )

        print(f"Validation Loss: {val_loss:.4f}")
        print(f"Validation Macro-F1: {val_metrics['macro_f1']:.4f}, Weighted-F1: {val_metrics['weighted_f1']:.4f}")

        scheduler.step(val_metrics['macro_f1'])

        scheduler.step(val_metrics['macro_f1'])
        
        f1_hm = 2 * val_metrics["macro_f1"] * val_metrics["weighted_f1"] / (val_metrics["macro_f1"] + val_metrics["weighted_f1"])
        if f1_hm > best_f1:
            best_f1 = f1_hm
            torch.save(model.state_dict(), f"{model_save_name}_depression.pth")
            print("Best model saved!")
        
    return model

def compute_multilabel_metrics(y_true, y_pred):
    macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')
    weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    
    per_class_precision, per_class_recall, per_class_f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None)
    metrics = {
        'macro_precision': macro_precision,
        'macro_recall': macro_recall,
        'macro_f1': macro_f1,
        'weighted_precision': weighted_precision,
        'weighted_recall': weighted_recall,
        'weighted_f1': weighted_f1,
        'per_class_precision': per_class_precision,
        'per_class_recall': per_class_recall,
        'per_class_f1': per_class_f1
    }
    
    return metrics

def evaluate_image_only_model(model, loader, criterion):
    model.eval()
    val_loss = 0
    all_val_preds, all_val_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating"):
            labels = batch["label"].to(DEVICE)
            image_features = {k: v.to(DEVICE) for k, v in batch["image"].items()}
            
            logits = model(image_features)
            loss = criterion(logits, labels)
            
            val_loss += loss.item()
            predictions = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()
            all_val_preds.extend(predictions)
            all_val_labels.extend(labels.cpu().numpy())
    
    val_loss = val_loss / len(loader)
    val_metrics = compute_multilabel_metrics(np.array(all_val_labels), np.array(all_val_preds))
    
    print("\nPer-class metrics:")
    for i, label in enumerate(LABELS):
        print(f"{label}: F1={val_metrics['per_class_f1'][i]:.4f}, "
              f"Precision={val_metrics['per_class_precision'][i]:.4f}, "
              f"Recall={val_metrics['per_class_recall'][i]:.4f}")
    
    return val_loss, val_metrics

## Model Training

In [19]:
train_data = json.load(open("depression_train_llava_dataset.json", "r"))
val_data = json.load(open("depression_val_llava_dataset.json", "r"))
test_data = json.load(open("depression_test_llava_dataset.json", "r"))

image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

img_path = "depressive_image"

image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

test_dataset = ImageOnlyDepressionDataset(test_data, os.path.join(img_path, "test"), image_processor)
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=custom_collate_fn
)

In [None]:
model = ImageOnlyModel(
    num_classes=NUM_CLASSES,
    fusion_dim=FUSION_DIM
)

trained_model = train_image_only_model(
    model,
    train_data,
    val_data,
    img_path,
    epochs=30,
    model_save_name="only_image"
)

Train Set Size: 8722
Validation Set Size: 359

Epoch 1/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.31it/s]


Train Loss: 0.3221
Train Macro-F1: 0.3554, Weighted-F1: 0.3887


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.35it/s]



Per-class metrics:
Lack of Interest: F1=0.2963, Precision=0.8889, Recall=0.1778
Feeling Down: F1=0.3857, Precision=0.6353, Recall=0.2769
Eating Disorder: F1=0.4681, Precision=0.4889, Recall=0.4490
Sleeping Disorder: F1=0.6842, Precision=0.8387, Recall=0.5778
Low Self-Esteem: F1=0.0000, Precision=0.0000, Recall=0.0000
Concentration Problem: F1=0.5085, Precision=0.8824, Recall=0.3571
Self-Harm: F1=0.1159, Precision=0.5000, Recall=0.0656
Validation Loss: 0.4472
Validation Macro-F1: 0.3512, Weighted-F1: 0.3270
Best model saved!



Epoch 2/30


Training: 100%|██████████| 546/546 [03:56<00:00,  2.30it/s]


Train Loss: 0.2495
Train Macro-F1: 0.5645, Weighted-F1: 0.5917


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.30it/s]



Per-class metrics:
Lack of Interest: F1=0.2308, Precision=0.8571, Recall=0.1333
Feeling Down: F1=0.3769, Precision=0.7538, Recall=0.2513
Eating Disorder: F1=0.5347, Precision=0.5192, Recall=0.5510
Sleeping Disorder: F1=0.5429, Precision=0.7600, Recall=0.4222
Low Self-Esteem: F1=0.0449, Precision=0.5000, Recall=0.0235
Concentration Problem: F1=0.4688, Precision=0.6818, Recall=0.3571
Self-Harm: F1=0.3038, Precision=0.6667, Recall=0.1967
Validation Loss: 0.4624
Validation Macro-F1: 0.3575, Weighted-F1: 0.3382
Best model saved!



Epoch 3/30


Training: 100%|██████████| 546/546 [04:00<00:00,  2.27it/s]


Train Loss: 0.1849
Train Macro-F1: 0.7005, Weighted-F1: 0.7297


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.27it/s]



Per-class metrics:
Lack of Interest: F1=0.2182, Precision=0.6000, Recall=0.1333
Feeling Down: F1=0.5209, Precision=0.6983, Recall=0.4154
Eating Disorder: F1=0.5319, Precision=0.5556, Recall=0.5102
Sleeping Disorder: F1=0.6000, Precision=0.8400, Recall=0.4667
Low Self-Esteem: F1=0.0000, Precision=0.0000, Recall=0.0000
Concentration Problem: F1=0.4941, Precision=0.4884, Recall=0.5000
Self-Harm: F1=0.3294, Precision=0.5833, Recall=0.2295
Validation Loss: 0.5183
Validation Macro-F1: 0.3849, Weighted-F1: 0.3933
Best model saved!



Epoch 4/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.32it/s]


Train Loss: 0.1378
Train Macro-F1: 0.7963, Weighted-F1: 0.8178


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.15it/s]



Per-class metrics:
Lack of Interest: F1=0.2642, Precision=0.8750, Recall=0.1556
Feeling Down: F1=0.4043, Precision=0.6552, Recall=0.2923
Eating Disorder: F1=0.3838, Precision=0.3800, Recall=0.3878
Sleeping Disorder: F1=0.5205, Precision=0.6786, Recall=0.4222
Low Self-Esteem: F1=0.2479, Precision=0.4167, Recall=0.1765
Concentration Problem: F1=0.4545, Precision=0.6250, Recall=0.3571
Self-Harm: F1=0.4160, Precision=0.4062, Recall=0.4262
Validation Loss: 0.5730
Validation Macro-F1: 0.3845, Weighted-F1: 0.3803



Epoch 5/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.32it/s]


Train Loss: 0.0999
Train Macro-F1: 0.8680, Weighted-F1: 0.8775


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.30it/s]



Per-class metrics:
Lack of Interest: F1=0.3158, Precision=0.7500, Recall=0.2000
Feeling Down: F1=0.5552, Precision=0.6643, Recall=0.4769
Eating Disorder: F1=0.4198, Precision=0.5312, Recall=0.3469
Sleeping Disorder: F1=0.5238, Precision=0.5641, Recall=0.4889
Low Self-Esteem: F1=0.1852, Precision=0.4348, Recall=0.1176
Concentration Problem: F1=0.3729, Precision=0.6471, Recall=0.2619
Self-Harm: F1=0.3721, Precision=0.3529, Recall=0.3934
Validation Loss: 0.6094
Validation Macro-F1: 0.3921, Weighted-F1: 0.4228
Best model saved!



Epoch 6/30


Training: 100%|██████████| 546/546 [03:53<00:00,  2.34it/s]


Train Loss: 0.0809
Train Macro-F1: 0.8992, Weighted-F1: 0.9048


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.33it/s]



Per-class metrics:
Lack of Interest: F1=0.2593, Precision=0.7778, Recall=0.1556
Feeling Down: F1=0.6000, Precision=0.6545, Recall=0.5538
Eating Disorder: F1=0.4096, Precision=0.5000, Recall=0.3469
Sleeping Disorder: F1=0.4082, Precision=0.3774, Recall=0.4444
Low Self-Esteem: F1=0.0842, Precision=0.4000, Recall=0.0471
Concentration Problem: F1=0.3571, Precision=0.7143, Recall=0.2381
Self-Harm: F1=0.3636, Precision=0.4737, Recall=0.2951
Validation Loss: 0.6814
Validation Macro-F1: 0.3546, Weighted-F1: 0.4051



Epoch 7/30


Training: 100%|██████████| 546/546 [03:56<00:00,  2.31it/s]


Train Loss: 0.0735
Train Macro-F1: 0.9112, Weighted-F1: 0.9155


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.31it/s]



Per-class metrics:
Lack of Interest: F1=0.2034, Precision=0.4286, Recall=0.1333
Feeling Down: F1=0.5283, Precision=0.6829, Recall=0.4308
Eating Disorder: F1=0.4466, Precision=0.4259, Recall=0.4694
Sleeping Disorder: F1=0.4722, Precision=0.6296, Recall=0.3778
Low Self-Esteem: F1=0.2920, Precision=0.3846, Recall=0.2353
Concentration Problem: F1=0.4590, Precision=0.7368, Recall=0.3333
Self-Harm: F1=0.4308, Precision=0.4058, Recall=0.4590
Validation Loss: 0.7055
Validation Macro-F1: 0.4046, Weighted-F1: 0.4323
Best model saved!



Epoch 8/30


Training: 100%|██████████| 546/546 [03:59<00:00,  2.28it/s]


Train Loss: 0.0617
Train Macro-F1: 0.9293, Weighted-F1: 0.9315


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.31it/s]



Per-class metrics:
Lack of Interest: F1=0.2222, Precision=0.6667, Recall=0.1333
Feeling Down: F1=0.5192, Precision=0.6923, Recall=0.4154
Eating Disorder: F1=0.4444, Precision=0.4068, Recall=0.4898
Sleeping Disorder: F1=0.5102, Precision=0.4717, Recall=0.5556
Low Self-Esteem: F1=0.1714, Precision=0.4500, Recall=0.1059
Concentration Problem: F1=0.4776, Precision=0.6400, Recall=0.3810
Self-Harm: F1=0.3725, Precision=0.4634, Recall=0.3115
Validation Loss: 0.8075
Validation Macro-F1: 0.3882, Weighted-F1: 0.4087



Epoch 10/30


Training: 100%|██████████| 546/546 [03:59<00:00,  2.28it/s]


Train Loss: 0.0575
Train Macro-F1: 0.9307, Weighted-F1: 0.9349


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.15it/s]



Per-class metrics:
Lack of Interest: F1=0.2143, Precision=0.5455, Recall=0.1333
Feeling Down: F1=0.5204, Precision=0.6694, Recall=0.4256
Eating Disorder: F1=0.5036, Precision=0.3889, Recall=0.7143
Sleeping Disorder: F1=0.4854, Precision=0.4310, Recall=0.5556
Low Self-Esteem: F1=0.1765, Precision=0.5294, Recall=0.1059
Concentration Problem: F1=0.3793, Precision=0.6875, Recall=0.2619
Self-Harm: F1=0.3303, Precision=0.3750, Recall=0.2951
Validation Loss: 0.8945
Validation Macro-F1: 0.3728, Weighted-F1: 0.3998



Epoch 11/30


Training: 100%|██████████| 546/546 [03:54<00:00,  2.32it/s]


Train Loss: 0.0567
Train Macro-F1: 0.9343, Weighted-F1: 0.9379


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.23it/s]



Per-class metrics:
Lack of Interest: F1=0.2642, Precision=0.8750, Recall=0.1556
Feeling Down: F1=0.5433, Precision=0.6500, Recall=0.4667
Eating Disorder: F1=0.5000, Precision=0.5106, Recall=0.4898
Sleeping Disorder: F1=0.3711, Precision=0.3462, Recall=0.4000
Low Self-Esteem: F1=0.2560, Precision=0.4000, Recall=0.1882
Concentration Problem: F1=0.5789, Precision=0.6471, Recall=0.5238
Self-Harm: F1=0.3542, Precision=0.4857, Recall=0.2787
Validation Loss: 0.8090
Validation Macro-F1: 0.4097, Weighted-F1: 0.4343



Epoch 12/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.32it/s]


Train Loss: 0.0274
Train Macro-F1: 0.9722, Weighted-F1: 0.9723


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.19it/s]



Per-class metrics:
Lack of Interest: F1=0.2222, Precision=0.6667, Recall=0.1333
Feeling Down: F1=0.6000, Precision=0.6774, Recall=0.5385
Eating Disorder: F1=0.4800, Precision=0.3947, Recall=0.6122
Sleeping Disorder: F1=0.4634, Precision=0.5135, Recall=0.4222
Low Self-Esteem: F1=0.2056, Precision=0.5000, Recall=0.1294
Concentration Problem: F1=0.4308, Precision=0.6087, Recall=0.3333
Self-Harm: F1=0.4000, Precision=0.6207, Recall=0.2951
Validation Loss: 0.9103
Validation Macro-F1: 0.4003, Weighted-F1: 0.4432



Epoch 13/30


Training: 100%|██████████| 546/546 [03:53<00:00,  2.34it/s]


Train Loss: 0.0176
Train Macro-F1: 0.9833, Weighted-F1: 0.9827


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.18it/s]



Per-class metrics:
Lack of Interest: F1=0.2642, Precision=0.8750, Recall=0.1556
Feeling Down: F1=0.5316, Precision=0.6942, Recall=0.4308
Eating Disorder: F1=0.5000, Precision=0.4328, Recall=0.5918
Sleeping Disorder: F1=0.4842, Precision=0.4600, Recall=0.5111
Low Self-Esteem: F1=0.1443, Precision=0.5833, Recall=0.0824
Concentration Problem: F1=0.4390, Precision=0.4500, Recall=0.4286
Self-Harm: F1=0.3962, Precision=0.4667, Recall=0.3443
Validation Loss: 1.0338
Validation Macro-F1: 0.3942, Weighted-F1: 0.4152



Epoch 14/30


Training: 100%|██████████| 546/546 [03:58<00:00,  2.29it/s]


Train Loss: 0.0129
Train Macro-F1: 0.9866, Weighted-F1: 0.9861


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.22it/s]



Per-class metrics:
Lack of Interest: F1=0.2593, Precision=0.7778, Recall=0.1556
Feeling Down: F1=0.4934, Precision=0.6881, Recall=0.3846
Eating Disorder: F1=0.4615, Precision=0.3971, Recall=0.5510
Sleeping Disorder: F1=0.4615, Precision=0.4565, Recall=0.4667
Low Self-Esteem: F1=0.1905, Precision=0.5000, Recall=0.1176
Concentration Problem: F1=0.4578, Precision=0.4634, Recall=0.4524
Self-Harm: F1=0.3423, Precision=0.3800, Recall=0.3115
Validation Loss: 1.0649
Validation Macro-F1: 0.3809, Weighted-F1: 0.3976



Epoch 15/30


Training: 100%|██████████| 546/546 [03:59<00:00,  2.28it/s]


Train Loss: 0.0082
Train Macro-F1: 0.9901, Weighted-F1: 0.9902


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.17it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5638, Precision=0.6690, Recall=0.4872
Eating Disorder: F1=0.5472, Precision=0.5088, Recall=0.5918
Sleeping Disorder: F1=0.5366, Precision=0.5946, Recall=0.4889
Low Self-Esteem: F1=0.1584, Precision=0.5000, Recall=0.0941
Concentration Problem: F1=0.5238, Precision=0.5238, Recall=0.5238
Self-Harm: F1=0.3853, Precision=0.4375, Recall=0.3443
Validation Loss: 0.9724
Validation Macro-F1: 0.4287, Weighted-F1: 0.4458



Epoch 16/30


Training: 100%|██████████| 546/546 [03:59<00:00,  2.28it/s]


Train Loss: 0.0063
Train Macro-F1: 0.9917, Weighted-F1: 0.9914


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.29it/s]



Per-class metrics:
Lack of Interest: F1=0.2807, Precision=0.6667, Recall=0.1778
Feeling Down: F1=0.5209, Precision=0.6983, Recall=0.4154
Eating Disorder: F1=0.4793, Precision=0.4028, Recall=0.5918
Sleeping Disorder: F1=0.4364, Precision=0.3692, Recall=0.5333
Low Self-Esteem: F1=0.1837, Precision=0.6923, Recall=0.1059
Concentration Problem: F1=0.5195, Precision=0.5714, Recall=0.4762
Self-Harm: F1=0.3750, Precision=0.5143, Recall=0.2951
Validation Loss: 1.0663
Validation Macro-F1: 0.3994, Weighted-F1: 0.4169



Epoch 17/30


Training: 100%|██████████| 546/546 [03:56<00:00,  2.31it/s]


Train Loss: 0.0060
Train Macro-F1: 0.9923, Weighted-F1: 0.9918


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.28it/s]



Per-class metrics:
Lack of Interest: F1=0.2807, Precision=0.6667, Recall=0.1778
Feeling Down: F1=0.5409, Precision=0.6992, Recall=0.4410
Eating Disorder: F1=0.4912, Precision=0.4308, Recall=0.5714
Sleeping Disorder: F1=0.4583, Precision=0.4314, Recall=0.4889
Low Self-Esteem: F1=0.2243, Precision=0.5455, Recall=0.1412
Concentration Problem: F1=0.6000, Precision=0.6316, Recall=0.5714
Self-Harm: F1=0.3519, Precision=0.4043, Recall=0.3115
Validation Loss: 1.0467
Validation Macro-F1: 0.4210, Weighted-F1: 0.4378



Epoch 18/30


Training: 100%|██████████| 546/546 [03:56<00:00,  2.31it/s]


Train Loss: 0.0050
Train Macro-F1: 0.9915, Weighted-F1: 0.9916


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.28it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5477, Precision=0.6846, Recall=0.4564
Eating Disorder: F1=0.4870, Precision=0.4242, Recall=0.5714
Sleeping Disorder: F1=0.4565, Precision=0.4468, Recall=0.4667
Low Self-Esteem: F1=0.1942, Precision=0.5556, Recall=0.1176
Concentration Problem: F1=0.5854, Precision=0.6000, Recall=0.5714
Self-Harm: F1=0.3366, Precision=0.4250, Recall=0.2787
Validation Loss: 1.0543
Validation Macro-F1: 0.4133, Weighted-F1: 0.4323



Epoch 19/30


Training: 100%|██████████| 546/546 [03:56<00:00,  2.31it/s]


Train Loss: 0.0045
Train Macro-F1: 0.9925, Weighted-F1: 0.9927


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.26it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.4800, Precision=0.6857, Recall=0.3692
Eating Disorder: F1=0.4828, Precision=0.4179, Recall=0.5714
Sleeping Disorder: F1=0.4386, Precision=0.3623, Recall=0.5556
Low Self-Esteem: F1=0.1748, Precision=0.5000, Recall=0.1059
Concentration Problem: F1=0.5385, Precision=0.5833, Recall=0.5000
Self-Harm: F1=0.3654, Precision=0.4419, Recall=0.3115
Validation Loss: 1.1174
Validation Macro-F1: 0.3951, Weighted-F1: 0.4015



Epoch 20/30


Training: 100%|██████████| 546/546 [03:52<00:00,  2.35it/s]


Train Loss: 0.0048
Train Macro-F1: 0.9921, Weighted-F1: 0.9920


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.28it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5646, Precision=0.6812, Recall=0.4821
Eating Disorder: F1=0.4865, Precision=0.4355, Recall=0.5510
Sleeping Disorder: F1=0.4681, Precision=0.4490, Recall=0.4889
Low Self-Esteem: F1=0.1942, Precision=0.5556, Recall=0.1176
Concentration Problem: F1=0.5854, Precision=0.6000, Recall=0.5714
Self-Harm: F1=0.3400, Precision=0.4359, Recall=0.2787
Validation Loss: 1.0604
Validation Macro-F1: 0.4178, Weighted-F1: 0.4400



Epoch 21/30


Training: 100%|██████████| 546/546 [03:54<00:00,  2.32it/s]


Train Loss: 0.0041
Train Macro-F1: 0.9928, Weighted-F1: 0.9927


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.35it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5017, Precision=0.7212, Recall=0.3846
Eating Disorder: F1=0.4706, Precision=0.4000, Recall=0.5714
Sleeping Disorder: F1=0.4946, Precision=0.4792, Recall=0.5111
Low Self-Esteem: F1=0.1633, Precision=0.6154, Recall=0.0941
Concentration Problem: F1=0.5714, Precision=0.5714, Recall=0.5714
Self-Harm: F1=0.3853, Precision=0.4375, Recall=0.3443
Validation Loss: 1.1261
Validation Macro-F1: 0.4104, Weighted-F1: 0.4164



Epoch 22/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.32it/s]


Train Loss: 0.0039
Train Macro-F1: 0.9928, Weighted-F1: 0.9928


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5083, Precision=0.7130, Recall=0.3949
Eating Disorder: F1=0.4912, Precision=0.4308, Recall=0.5714
Sleeping Disorder: F1=0.4466, Precision=0.3966, Recall=0.5111
Low Self-Esteem: F1=0.1782, Precision=0.5625, Recall=0.1059
Concentration Problem: F1=0.5185, Precision=0.5385, Recall=0.5000
Self-Harm: F1=0.3495, Precision=0.4286, Recall=0.2951
Validation Loss: 1.1194
Validation Macro-F1: 0.3969, Weighted-F1: 0.4107



Epoch 23/30


Training: 100%|██████████| 546/546 [03:54<00:00,  2.32it/s]


Train Loss: 0.0040
Train Macro-F1: 0.9917, Weighted-F1: 0.9919


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.27it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5304, Precision=0.7034, Recall=0.4256
Eating Disorder: F1=0.4696, Precision=0.4091, Recall=0.5510
Sleeping Disorder: F1=0.5287, Precision=0.5476, Recall=0.5111
Low Self-Esteem: F1=0.1633, Precision=0.6154, Recall=0.0941
Concentration Problem: F1=0.5610, Precision=0.5750, Recall=0.5476
Self-Harm: F1=0.3636, Precision=0.4082, Recall=0.3279
Validation Loss: 1.1131
Validation Macro-F1: 0.4146, Weighted-F1: 0.4266



Epoch 24/30


Training: 100%|██████████| 546/546 [03:53<00:00,  2.34it/s]


Train Loss: 0.0037
Train Macro-F1: 0.9935, Weighted-F1: 0.9930


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5256, Precision=0.7009, Recall=0.4205
Eating Disorder: F1=0.4696, Precision=0.4091, Recall=0.5510
Sleeping Disorder: F1=0.4742, Precision=0.4423, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5366, Precision=0.5500, Recall=0.5238
Self-Harm: F1=0.3495, Precision=0.4286, Recall=0.2951
Validation Loss: 1.1173
Validation Macro-F1: 0.4002, Weighted-F1: 0.4160



Epoch 25/30


Training: 100%|██████████| 546/546 [03:52<00:00,  2.35it/s]


Train Loss: 0.0037
Train Macro-F1: 0.9931, Weighted-F1: 0.9930


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.4966, Precision=0.7184, Recall=0.3795
Eating Disorder: F1=0.4590, Precision=0.3836, Recall=0.5714
Sleeping Disorder: F1=0.4842, Precision=0.4600, Recall=0.5111
Low Self-Esteem: F1=0.1782, Precision=0.5625, Recall=0.1059
Concentration Problem: F1=0.5250, Precision=0.5526, Recall=0.5000
Self-Harm: F1=0.3429, Precision=0.4091, Recall=0.2951
Validation Loss: 1.1460
Validation Macro-F1: 0.3960, Weighted-F1: 0.4063



Epoch 26/30


Training: 100%|██████████| 546/546 [03:52<00:00,  2.35it/s]


Train Loss: 0.0037
Train Macro-F1: 0.9935, Weighted-F1: 0.9933


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5321, Precision=0.7094, Recall=0.4256
Eating Disorder: F1=0.4655, Precision=0.4030, Recall=0.5510
Sleeping Disorder: F1=0.4742, Precision=0.4423, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5316, Precision=0.5676, Recall=0.5000
Self-Harm: F1=0.3529, Precision=0.4390, Recall=0.2951
Validation Loss: 1.1270
Validation Macro-F1: 0.4003, Weighted-F1: 0.4180



Epoch 27/30


Training: 100%|██████████| 546/546 [03:55<00:00,  2.32it/s]


Train Loss: 0.0035
Train Macro-F1: 0.9936, Weighted-F1: 0.9937


Evaluating: 100%|██████████| 23/23 [00:07<00:00,  3.26it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5180, Precision=0.7182, Recall=0.4051
Eating Disorder: F1=0.4667, Precision=0.3944, Recall=0.5714
Sleeping Disorder: F1=0.4742, Precision=0.4423, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5316, Precision=0.5676, Recall=0.5000
Self-Harm: F1=0.3462, Precision=0.4186, Recall=0.2951
Validation Loss: 1.1407
Validation Macro-F1: 0.3975, Weighted-F1: 0.4121



Epoch 28/30


Training: 100%|██████████| 546/546 [03:52<00:00,  2.34it/s]


Train Loss: 0.0035
Train Macro-F1: 0.9938, Weighted-F1: 0.9935


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.34it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5084, Precision=0.7308, Recall=0.3897
Eating Disorder: F1=0.4793, Precision=0.4028, Recall=0.5918
Sleeping Disorder: F1=0.4694, Precision=0.4340, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5316, Precision=0.5676, Recall=0.5000
Self-Harm: F1=0.3429, Precision=0.4091, Recall=0.2951
Validation Loss: 1.1525
Validation Macro-F1: 0.3968, Weighted-F1: 0.4089



Epoch 29/30


Training: 100%|██████████| 546/546 [03:53<00:00,  2.34it/s]


Train Loss: 0.0035
Train Macro-F1: 0.9937, Weighted-F1: 0.9935


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.29it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5212, Precision=0.7143, Recall=0.4103
Eating Disorder: F1=0.4754, Precision=0.3973, Recall=0.5918
Sleeping Disorder: F1=0.4742, Precision=0.4423, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5316, Precision=0.5676, Recall=0.5000
Self-Harm: F1=0.3462, Precision=0.4186, Recall=0.2951
Validation Loss: 1.1439
Validation Macro-F1: 0.3992, Weighted-F1: 0.4141



Epoch 30/30


Training: 100%|██████████| 546/546 [03:54<00:00,  2.32it/s]


Train Loss: 0.0033
Train Macro-F1: 0.9939, Weighted-F1: 0.9943


Evaluating: 100%|██████████| 23/23 [00:06<00:00,  3.32it/s]



Per-class metrics:
Lack of Interest: F1=0.2857, Precision=0.7273, Recall=0.1778
Feeling Down: F1=0.5246, Precision=0.7273, Recall=0.4103
Eating Disorder: F1=0.4793, Precision=0.4028, Recall=0.5918
Sleeping Disorder: F1=0.4792, Precision=0.4510, Recall=0.5111
Low Self-Esteem: F1=0.1600, Precision=0.5333, Recall=0.0941
Concentration Problem: F1=0.5316, Precision=0.5676, Recall=0.5000
Self-Harm: F1=0.3429, Precision=0.4091, Recall=0.2951
Validation Loss: 1.1460
Validation Macro-F1: 0.4005, Weighted-F1: 0.4158


## Inference

In [20]:
!gdown 14hFndYWBAr5sHKU1PTb1V0jCcnJmdxj6

Downloading...
From (original): https://drive.google.com/uc?id=14hFndYWBAr5sHKU1PTb1V0jCcnJmdxj6
From (redirected): https://drive.google.com/uc?id=14hFndYWBAr5sHKU1PTb1V0jCcnJmdxj6&confirm=t&uuid=7b981daa-ae0c-4ccc-899f-a0eadcbcc2d8
To: /kaggle/working/only_image_depression.pth
100%|████████████████████████████████████████| 357M/357M [00:08<00:00, 43.8MB/s]


In [21]:
def inference(test_data, model_path):
    model = ImageOnlyModel(
        num_classes=NUM_CLASSES,
        fusion_dim=FUSION_DIM
    )
        
    model = model.to(DEVICE)

    weights = torch.load(model_path, map_location=DEVICE, weights_only=True)
    weights_single = {k.replace("module.", ""): v for k, v in weights.items()}

    model.load_state_dict(weights_single)

    image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

    test_dataset = ImageOnlyDepressionDataset(test_data, os.path.join(img_path, "test"), image_processor)
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=custom_collate_fn
    )

    loss, metrics = evaluate_image_only_model(
        model, test_loader, nn.BCEWithLogitsLoss()
    )

    print(f"Test Loss: {loss:.4f}")
    print(f"Test Macro-F1: {metrics['macro_f1']:.4f}, Weighted-F1: {metrics['weighted_f1']:.4f}")

In [23]:
inference(test_data, "only_image_depression.pth")

Evaluating: 100%|██████████| 33/33 [00:10<00:00,  3.14it/s]


Per-class metrics:
Lack of Interest: F1=0.2222, Precision=0.5263, Recall=0.1408
Feeling Down: F1=0.5492, Precision=0.6310, Recall=0.4862
Eating Disorder: F1=0.4113, Precision=0.5918, Recall=0.3152
Sleeping Disorder: F1=0.5037, Precision=0.6071, Recall=0.4304
Low Self-Esteem: F1=0.2152, Precision=0.3864, Recall=0.1491
Concentration Problem: F1=0.7009, Precision=0.8039, Recall=0.6212
Self-Harm: F1=0.3284, Precision=0.2750, Recall=0.4074
Test Loss: 0.7051
Test Macro-F1: 0.4187, Weighted-F1: 0.4307



