In [1]:
import zipfile
import os

# Define paths
zip_path = "/Users/lykofos/Desktop/clubs/GDG/MSA/assignment-3/Multimodal_dataset_assignment3/images.zip"  
extract_path = "/Users/lykofos/Desktop/clubs/GDG/MSA/assignment-3/Multimodal_dataset_assignment3/images"  

# Create extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Extract zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction Complete! Files are in:", extract_path)


Extraction Complete! Files are in: /Users/lykofos/Desktop/clubs/GDG/MSA/assignment-3/Multimodal_dataset_assignment3/images


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from transformers import BertModel, BertTokenizer
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from PIL import Image
import os

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
from PIL import Image, UnidentifiedImageError, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True


class HumorMemeDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

        # Ensure column names match exactly in the CSV
        self.data.columns = self.data.columns.str.lower()  # Convert column names to lowercase

        # Label Mapping (Binary Classification)
        self.humour_mapping = {  # Assign to self.humour_mapping
            "not_humorous": 0,
            "humorous": 1,
            "funny": 1,
            "hilarious": 1
        }

        self.sarcasm_mapping = { # Assign to self.sarcasm_mapping
            "not_sarcastic": 0,
            "sarcastic": 1,
            "twisted_meaning": 1,
            "very_twisted": 1
        }

        self.offensive_mapping = { # Assign to self.offensive_mapping
            "not_offensive": 0,
            "slight": 1,
            "very_offensive": 1,
            "hateful_offensive": 1
        }

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.iloc[idx]["text_corrected"]
        if not isinstance(text, str):
            text = str(text)
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]["image_name"])

        # Convert label column names to lowercase if needed
        humour_label = self.humour_mapping.get(str(self.data.iloc[idx]["humour"]).lower(), 0)
        sarcasm_label = self.sarcasm_mapping.get(str(self.data.iloc[idx]["sarcasm"]).lower(), 0)
        offensive_label = self.offensive_mapping.get(str(self.data.iloc[idx]["offensive"]).lower(), 0)

        # Load image
        try:
            image = Image.open(img_path).convert("RGB")
        except (UnidentifiedImageError, OSError) as e:
            print(f"Error loading image {img_path}: {e}")
            # Handle the error: skip the image, replace with a placeholder, etc.
            # For example, to skip the image:
            return None

        if self.transform:
            image = self.transform(image)

        # Tokenize text
        encoded_text = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt")

        return {
            "input_ids": encoded_text["input_ids"].squeeze(0),
            "attention_mask": encoded_text["attention_mask"].squeeze(0),
            "image": image,
            "labels": torch.tensor([humour_label, sarcasm_label, offensive_label], dtype=torch.float)
        }

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
csv_path = "/content/labels.csv"
img_dir = "/content/images/images"

dataset = HumorMemeDataset(csv_file=csv_path, img_dir=img_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)


In [None]:
class MultimodalHumorModel(nn.Module):
    def __init__(self):
        super(MultimodalHumorModel, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.resnet = models.resnet50(pretrained=True)

        # Remove the last classification layer of ResNet
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])

        # Fully connected layers for fusion
        self.fc = nn.Linear(768 + 2048, 3)  # 768 (BERT) + 2048 (ResNet) -> 3 binary output labels

    def forward(self, input_ids, attention_mask, image):
        # Text features from BERT
        text_features = self.bert(input_ids, attention_mask=attention_mask).pooler_output  # (batch_size, 768)

        # Image features from ResNet
        image_features = self.resnet(image)  # (batch_size, 2048, 1, 1)
        image_features = image_features.view(image_features.size(0), -1)  # Flatten -> (batch_size, 2048)

        # Concatenate text and image features
        combined_features = torch.cat((text_features, image_features), dim=1)

        # Predict Humor, Sarcasm, Offensive
        output = self.fc(combined_features)  # (batch_size, 3)
        return output


In [None]:
model = MultimodalHumorModel().to(device)
criterion = nn.BCEWithLogitsLoss()  # Multi-label classification loss
optimizer = optim.Adam(model.parameters(), lr=2e-5)




In [None]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    all_labels = []
    all_preds = []

    for batch in dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        images = batch["image"].to(device)
        labels = batch["labels"].to(device)  # Multi-label tensor

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, images)

        # Compute loss
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # Convert logits to binary predictions
        preds = (torch.sigmoid(outputs) > 0.5).int().cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

    # Convert lists to NumPy arrays
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    # Compute metrics
    humor_f1 = f1_score(all_labels[:, 0], all_preds[:, 0], average="macro")
    sarcasm_f1 = f1_score(all_labels[:, 1], all_preds[:, 1], average="macro")
    offensive_f1 = f1_score(all_labels[:, 2], all_preds[:, 2], average="macro")
    avg_f1 = (humor_f1 + sarcasm_f1 + offensive_f1) / 3

    humor_acc = accuracy_score(all_labels[:, 0], all_preds[:, 0])
    sarcasm_acc = accuracy_score(all_labels[:, 1], all_preds[:, 1])
    offensive_acc = accuracy_score(all_labels[:, 2], all_preds[:, 2])

    humor_prec = precision_score(all_labels[:, 0], all_preds[:, 0], average="macro", zero_division=0)
    sarcasm_prec = precision_score(all_labels[:, 1], all_preds[:, 1], average="macro", zero_division=0)
    offensive_prec = precision_score(all_labels[:, 2], all_preds[:, 2], average="macro", zero_division=0)

    humor_rec = recall_score(all_labels[:, 0], all_preds[:, 0], average="macro", zero_division=0)
    sarcasm_rec = recall_score(all_labels[:, 1], all_preds[:, 1], average="macro", zero_division=0)
    offensive_rec = recall_score(all_labels[:, 2], all_preds[:, 2], average="macro", zero_division=0)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}")
    print(f"  - Humor    -> F1: {humor_f1:.4f}, Acc: {humor_acc:.4f}, Prec: {humor_prec:.4f}, Rec: {humor_rec:.4f}")
    print(f"  - Sarcasm  -> F1: {sarcasm_f1:.4f}, Acc: {sarcasm_acc:.4f}, Prec: {sarcasm_prec:.4f}, Rec: {sarcasm_rec:.4f}")
    print(f"  - Offensive-> F1: {offensive_f1:.4f}, Acc: {offensive_acc:.4f}, Prec: {offensive_prec:.4f}, Rec: {offensive_rec:.4f}")
    print(f"  - Avg F1 Score: {avg_f1:.4f}\n")




Epoch [1/5], Loss: 0.6526
  - Humor    -> F1: 0.4521, Acc: 0.5463, Prec: 0.5124, Rec: 0.5061
  - Sarcasm  -> F1: 0.4199, Acc: 0.7224, Prec: 0.6112, Rec: 0.5002
  - Offensive-> F1: 0.3955, Acc: 0.6086, Prec: 0.5093, Rec: 0.5007
  - Avg F1 Score: 0.4225





Epoch [2/5], Loss: 0.6077
  - Humor    -> F1: 0.6006, Acc: 0.6360, Prec: 0.6420, Rec: 0.6102
  - Sarcasm  -> F1: 0.4422, Acc: 0.7254, Prec: 0.6892, Rec: 0.5092
  - Offensive-> F1: 0.5098, Acc: 0.6396, Prec: 0.6405, Rec: 0.5520
  - Avg F1 Score: 0.5175





Epoch [3/5], Loss: 0.4580
  - Humor    -> F1: 0.7838, Acc: 0.7895, Prec: 0.7905, Rec: 0.7812
  - Sarcasm  -> F1: 0.7119, Acc: 0.8098, Prec: 0.8113, Rec: 0.6863
  - Offensive-> F1: 0.7607, Acc: 0.7852, Prec: 0.7885, Rec: 0.7512
  - Avg F1 Score: 0.7521





Epoch [4/5], Loss: 0.2240
  - Humor    -> F1: 0.9255, Acc: 0.9266, Prec: 0.9270, Rec: 0.9243
  - Sarcasm  -> F1: 0.9055, Acc: 0.9273, Prec: 0.9264, Rec: 0.8893
  - Offensive-> F1: 0.9186, Acc: 0.9236, Prec: 0.9247, Rec: 0.9139
  - Avg F1 Score: 0.9165





Epoch [5/5], Loss: 0.1123
  - Humor    -> F1: 0.9722, Acc: 0.9725, Prec: 0.9728, Rec: 0.9716
  - Sarcasm  -> F1: 0.9720, Acc: 0.9777, Prec: 0.9760, Rec: 0.9681
  - Offensive-> F1: 0.9657, Acc: 0.9675, Prec: 0.9678, Rec: 0.9637
  - Avg F1 Score: 0.9699



In [None]:
torch.save(model.state_dict(), "multimodal_humor_model.pth")
print("Training completed and model saved successfully!")


Training completed and model saved successfully!
