In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
#warnings.filterwarnings('ignore')
import cv2


In [None]:
import cv2
import os
import pandas as pd
from PIL import Image
import numpy as np
import imghdr
import re

def load_images_to_dataframe(image_dir):
    data = []

    for file_name in os.listdir(image_dir):
        file_path = os.path.join(image_dir, file_name)

        # Check if it's an image file
        if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.webp')):
            try:
                img = None  # Initialize image variable

                # Try reading the image using OpenCV
                img = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)

                # If OpenCV fails, try PIL
                if img is None:
                    print(f"⚠️ OpenCV failed to read {file_name}, trying PIL...")
                    img = Image.open(file_path).convert("RGB")  # Try using PIL instead
                    img = img.resize((224, 224))
                    img = np.array(img)
                else:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)

                # Apply brightness/contrast adjustment
                alpha = 1.2
                beta = 20
                img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)

                # Keep the full image name with extension
                image_name = file_name  # Keep the full filename including extension (e.g., 1549.jpg)

                # Append the image data and its filename to the list
                data.append({'image_name': image_name, 'image_data': img})

            except Exception as e:
                print(f"⚠️ Skipping {file_name}: {e}")
                continue

    # Create DataFrame
    df = pd.DataFrame(data)
    return df

In [None]:
df_train = load_images_to_dataframe('/kaggle/input/misogyny-meme-detection/train-20250320T200243Z-001/train')
df_test = load_images_to_dataframe('/kaggle/input/misogyny-meme-detection/test-20250327T173652Z-001/test')
df_dev = load_images_to_dataframe('/kaggle/input/misogyny-meme-detection/dev-20250321T154338Z-001/dev')

In [None]:
print(df_train)

In [None]:
train_csv = pd.read_csv('/kaggle/input/misogyny-meme-detection/train.csv')
dev_csv = pd.read_csv('/kaggle/input/misogyny-meme-detection/dev.csv')
test_csv = pd.read_csv('/kaggle/input/misogyny-meme-detection/test.csv')

train = pd.merge(train_csv, df_train, on='image_name', how='inner')
dev = pd.merge(dev_csv, df_dev, on='image_name', how='inner')
test = pd.merge(test_csv, df_test, on='image_name', how='inner')

In [None]:
print(train)

In [None]:
from PIL import Image
from torchvision import transforms
import numpy as np
import pandas as pd

augmented_data = []
cnt = 0


img_augmentations = transforms.Compose([
    transforms.ColorJitter(brightness=0.5),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomPosterize(bits=4),

])

for idx, row in train.iterrows():
    image_data = row['image_data']
    text = row['transcriptions']
    label = row['labels']
    image_name = row['image_name']

    if label == 1:
        cnt += 1
        # print(cnt)


        image = Image.fromarray(image_data.astype('uint8'))
        img_aug = img_augmentations(image)
        img_aug = np.array(img_aug)

        augmented_data.append({
            'image_data': img_aug,
            'labels': label,
            'image_name': image_name,
            'transcriptions': text
        })


        image = Image.fromarray(image_data.astype('uint8'))
        img_aug = img_augmentations(image)
        img_aug = np.array(img_aug)

        augmented_data.append({
            'image_data': img_aug,
            'labels': label,
            'image_name': image_name,
            'transcriptions': text
        })


augmented_df = pd.DataFrame(augmented_data)
train = pd.concat([train, augmented_df], ignore_index=True)


In [None]:
train

ResNet

In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import numpy as np

# Custom Dataset class for images
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

        # Convert the 'Misogyny' and 'Not-Misogyny' labels to numeric (1 and 0)
        self.df['numeric_labels'] = self.df['labels'].apply(lambda x: 1 if x == 'Misogyny' else 0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']  # Image array
        label = self.df.loc[idx, 'numeric_labels']  # Use numeric labels (0 or 1)

        # Convert numpy array to PIL Image for transformation
        image = Image.fromarray(image_data.astype('uint8'))

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label)

# Image transformations (resize, normalize, and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 (ResNet input size)
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet normalization
])

# Assuming 'train' and 'dev' are the DataFrames for training and validation sets
train_dataset = ImageDataset(train, transform=transform)
dev_dataset = ImageDataset(dev, transform=transform)

# Create DataLoader for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16)

# Load ResNet model
resnet_model = models.resnet50(pretrained=True)
num_classes = 2  # Since we have 2 classes: Misogyny (1) and Not-Misogyny (0)

# Replace the final fully connected layer to match the number of classes
resnet_model.fc = nn.Linear(resnet_model.fc.in_features, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for binary classification
optimizer = optim.Adam(resnet_model.parameters(), lr=1e-4)  # Adam optimizer with learning rate

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
resnet_model.to(device)

for epoch in range(30):  # Run for 30 epochs
    resnet_model.train()  # Set model to training mode
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)  # Move images to the device (GPU or CPU)
        labels = labels.to(device)  # Move labels to the device

        optimizer.zero_grad()  # Zero out the gradients
        outputs = resnet_model(images)  # Get model predictions
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backpropagate the loss
        optimizer.step()  # Update model weights
        running_loss += loss.item()  # Accumulate loss

    # Print the average loss for this epoch
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# Evaluation loop
resnet_model.eval()  # Set model to evaluation mode
with torch.no_grad():  # No need to compute gradients during evaluation
    correct = 0
    total = 0
    for images, labels in dev_loader:
        images = images.to(device)  # Move images to device
        labels = labels.to(device)  # Move labels to device

        outputs = resnet_model(images)  # Get model predictions
        _, predicted = torch.max(outputs, 1)  # Get the class with the highest score
        total += labels.size(0)  # Total number of samples
        correct += (predicted == labels).sum().item()  # Correct predictions

    # Calculate accuracy
    print(f"Accuracy: {100 * correct / total}%")

In [None]:
from sklearn.metrics import classification_report, f1_score
import numpy as np
import torch


resnet_model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []

    for batch in dev_loader:
        # Move the batch to the device
        images = batch[0].to(device)
        labels = batch[1].to(device)

        # Forward pass
        outputs = resnet_model(images)
        _, predicted = torch.max(outputs, 1)


        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())


    accuracy = 100 * sum(np.array(y_true) == np.array(y_pred)) / len(y_true)
    f1 = f1_score(y_true, y_pred, average='macro')
    class_report = classification_report(y_true, y_pred)


    print(f"Accuracy: {accuracy:.2f}%")
    print(f"F1-score: {f1:.4f}")
    print("Classification Report:")
    print(class_report)

In [None]:
df_test = load_images_to_dataframe('/kaggle/input/misogyny-meme-detection/test-20250327T173652Z-001/test')
test_csv = pd.read_csv('/kaggle/input/misogyny-meme-detection/test.csv')
test = pd.merge(test_csv, df_test, on='image_name', how='inner')

# test['transcriptions'] = test['transcriptions'].apply(text_preprocessing)
test

In [None]:
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import torch
import numpy as np
from PIL import Image
from torchvision import transforms


class TestImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']
        image = Image.fromarray(image_data.astype('uint8'))

        if self.transform:
            image = self.transform(image)

        return image

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


test_dataset = TestImageDataset(test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16)


resnet_model.eval()

test_predictions = []

with torch.no_grad():
    for batch in test_loader:

        images = batch.to(device)


        outputs = resnet_model(images)
        _, predicted = torch.max(outputs, 1)


        test_predictions.extend(predicted.cpu().numpy())

test_predictions = np.array(test_predictions)

test['predictions'] = test_predictions

predictions_df = pd.DataFrame({
    'image_name': test['image_name'],
    'predictions': test_predictions
})

predictions_df.to_csv('ResNet_Predictions.csv', index=False)
print("Predictions saved")

In [None]:
# Load and view the CSV file
df_loaded = pd.read_csv('/kaggle/working/ResNet_Predictions.csv')
print(df_loaded.head())  # Display the first few rows of the CSV

DenseNet

In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import numpy as np
from sklearn.metrics import classification_report, f1_score
import pandas as pd

# Custom Dataset class for images (same as before)
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.df['numeric_labels'] = self.df['labels'].apply(lambda x: 1 if x == 'Misogyny' else 0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']
        label = self.df.loc[idx, 'numeric_labels']
        image = Image.fromarray(image_data.astype('uint8'))
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

# Test Dataset class (same as before)
class TestImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']
        image = Image.fromarray(image_data.astype('uint8'))
        if self.transform:
            image = self.transform(image)
        return image

# Image transformations (same as before)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets and dataloaders (same as before)
train_dataset = ImageDataset(train, transform=transform)
dev_dataset = ImageDataset(dev, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16)

# Load DenseNet-121 model (modified from ResNet)
densenet_model = models.densenet121(pretrained=True)
num_classes = 2

# Replace the classifier layer (different from ResNet)
num_features = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(num_features, num_classes)

# Loss and optimizer (same as before)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(densenet_model.parameters(), lr=1e-4)

# Training loop (same structure)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
densenet_model.to(device)

for epoch in range(30):
    densenet_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = densenet_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# Evaluation (same structure)
densenet_model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dev_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = densenet_model(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    f1 = f1_score(y_true, y_pred, average='macro')
    print(f"Accuracy: {accuracy:.2f}%")
    print(f"F1-score: {f1:.4f}")
    print(classification_report(y_true, y_pred))

# Test prediction (same structure)
test_dataset = TestImageDataset(test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16)

densenet_model.eval()
test_predictions = []

with torch.no_grad():
    for batch in test_loader:
        images = batch.to(device)
        outputs = densenet_model(images)
        _, predicted = torch.max(outputs, 1)
        test_predictions.extend(predicted.cpu().numpy())

# Save predictions
predictions_df = pd.DataFrame({
    'image_name': test['image_name'],
    'predictions': test_predictions
})
predictions_df.to_csv('DenseNet_Predictions.csv', index=False)
print("DenseNet predictions saved")

In [None]:
# Load and view the CSV file
df_loaded = pd.read_csv('/kaggle/working/DenseNet_Predictions.csv')
print(df_loaded.head())  # Display the first few rows of the CSV

InceptionV3

In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import numpy as np
from sklearn.metrics import classification_report, f1_score
import pandas as pd

# Custom Dataset class (same as before)
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.df['numeric_labels'] = self.df['labels'].apply(lambda x: 1 if x == 'Misogyny' else 0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']
        label = self.df.loc[idx, 'numeric_labels']
        image = Image.fromarray(image_data.astype('uint8'))
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

# Test Dataset (same as before)
class TestImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_data = self.df.loc[idx, 'image_data']
        image = Image.fromarray(image_data.astype('uint8'))
        if self.transform:
            image = self.transform(image)
        return image

# InceptionV3 requires 299x299 input (different from 224x224)
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Specific to InceptionV3
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Datasets and Dataloaders (same structure)
train_dataset = ImageDataset(train, transform=transform)
dev_dataset = ImageDataset(dev, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16)

# Load InceptionV3 (special handling needed)
inception_model = models.inception_v3(pretrained=True, aux_logits=False)  # Disable aux logits
num_classes = 2

# Replace last layer (different structure than ResNet/DenseNet)
num_features = inception_model.fc.in_features
inception_model.fc = nn.Linear(num_features, num_classes)

# Loss and optimizer (same)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inception_model.parameters(), lr=1e-4)

# Training loop (modified for InceptionV3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inception_model.to(device)

for epoch in range(30):
    inception_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = inception_model(images)  # No aux logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# Evaluation (same structure)
inception_model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in dev_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = inception_model(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    f1 = f1_score(y_true, y_pred, average='macro')
    print(f"Accuracy: {accuracy:.2f}%")
    print(f"F1-score: {f1:.4f}")
    print(classification_report(y_true, y_pred))

# Test prediction
test_dataset = TestImageDataset(test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16)

test_predictions = []
with torch.no_grad():
    for batch in test_loader:
        images = batch.to(device)
        outputs = inception_model(images)
        _, predicted = torch.max(outputs, 1)
        test_predictions.extend(predicted.cpu().numpy())

# Save predictions
predictions_df = pd.DataFrame({
    'image_name': test['image_name'],
    'predictions': test_predictions
})
predictions_df.to_csv('InceptionV3_Predictions.csv', index=False)
print("InceptionV3 predictions saved")