In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Custom Dataset
class SarcasmDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        # Try different encodings
        encodings = ['utf-8', 'iso-8859-1', 'cp1252']
        for encoding in encodings:
            try:
                self.data = pd.read_csv(csv_file, encoding=encoding)
                print(f"Successfully read the CSV file with {encoding} encoding.")
                break
            except UnicodeDecodeError:
                print(f"Failed to read with {encoding} encoding. Trying next...")
        else:
            raise ValueError("Failed to read the CSV file with any of the attempted encodings.")

        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx, 1])
        image = Image.open(img_path).convert('RGB')  # Convert all images to RGB
        label = self.data.iloc[idx, 3]  # Image_label

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)

# Data Transformations with Augmentation
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create Datasets
train_dataset = SarcasmDataset(csv_file='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/SarcNetTrain.csv', 
                               img_dir='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/Image', 
                               transform=train_transform)

val_dataset = SarcasmDataset(csv_file='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/SarcNetVal.csv', 
                             img_dir='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/Image', 
                             transform=val_transform)

test_dataset = SarcasmDataset(csv_file='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/SarcNetTest.csv', 
                              img_dir='/kaggle/input/sarcasm-detectiondata/SarcNet Image-Text/Image', 
                              transform=val_transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# MobileNetV3 Model
def get_mobilenetv3_model():
    model = models.mobilenet_v3_large(pretrained=True)
    
    # Print model structure
    print(model)
    
    # Get the number of features from the last convolutional layer
    last_channel = model.classifier[0].in_features
    
    # Freeze all layers except the last few
    for param in list(model.parameters())[:-10]:
        param.requires_grad = False
    
    # Replace the classifier
    model.classifier = nn.Sequential(
        nn.Linear(last_channel, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 1)
    )
    
    return model

model = get_mobilenetv3_model().to(device)
print(f"Model moved to {device}")
print(f"Number of features in the last convolutional layer: {model.classifier[0].in_features}")
print(f"Classifier structure: {model.classifier}")

# Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3, verbose=True)

# Training Function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=20):
    best_f1 = 0.0
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * images.size(0)
        
        train_loss = train_loss / len(train_loader.dataset)
        
        # Validation
        model.eval()
        val_loss = 0.0
        val_preds = []
        val_true = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).squeeze()
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                val_preds.extend(torch.sigmoid(outputs).cpu().numpy())
                val_true.extend(labels.cpu().numpy())
        
        val_loss = val_loss / len(val_loader.dataset)
        val_preds = (np.array(val_preds) > 0.5).astype(int)
        val_accuracy = accuracy_score(val_true, val_preds)
        val_precision = precision_score(val_true, val_preds)
        val_recall = recall_score(val_true, val_preds)
        val_f1 = f1_score(val_true, val_preds)
        
        # Learning rate scheduler step
        scheduler.step(val_f1)
        
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
        print(f'Val Accuracy: {val_accuracy:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1: {val_f1:.4f}')
        
        # Save best model
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), 'best_sarcasm_detection_model_mobilenetv3_gpu.pth')
            print("Saved best model!")

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler)

Using device: cuda
Failed to read with utf-8 encoding. Trying next...
Successfully read the CSV file with iso-8859-1 encoding.
Failed to read with utf-8 encoding. Trying next...
Successfully read the CSV file with iso-8859-1 encoding.
Failed to read with utf-8 encoding. Trying next...
Successfully read the CSV file with iso-8859-1 encoding.
MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          



Epoch 1/20:
Train Loss: 0.5965, Val Loss: 0.5458
Val Accuracy: 0.7273, Precision: 0.7158, Recall: 0.3036, F1: 0.4263
Saved best model!




Epoch 2/20:
Train Loss: 0.5293, Val Loss: 0.5514
Val Accuracy: 0.7377, Precision: 0.6935, Recall: 0.3839, F1: 0.4943
Saved best model!




Epoch 3/20:
Train Loss: 0.5078, Val Loss: 0.5673
Val Accuracy: 0.7183, Precision: 0.6966, Recall: 0.2768, F1: 0.3962




Epoch 4/20:
Train Loss: 0.4752, Val Loss: 0.6566
Val Accuracy: 0.7079, Precision: 0.7258, Recall: 0.2009, F1: 0.3147




Epoch 5/20:
Train Loss: 0.4386, Val Loss: 0.5718
Val Accuracy: 0.7243, Precision: 0.6242, Recall: 0.4375, F1: 0.5144
Saved best model!




Epoch 6/20:
Train Loss: 0.4157, Val Loss: 0.5940
Val Accuracy: 0.7362, Precision: 0.6424, Recall: 0.4732, F1: 0.5450
Saved best model!




Epoch 7/20:
Train Loss: 0.3700, Val Loss: 0.6336
Val Accuracy: 0.7377, Precision: 0.6600, Recall: 0.4420, F1: 0.5294




Epoch 8/20:
Train Loss: 0.3555, Val Loss: 0.6171
Val Accuracy: 0.7347, Precision: 0.6223, Recall: 0.5223, F1: 0.5680
Saved best model!




Epoch 9/20:
Train Loss: 0.3068, Val Loss: 0.6650
Val Accuracy: 0.7273, Precision: 0.5990, Recall: 0.5536, F1: 0.5754
Saved best model!




Epoch 10/20:
Train Loss: 0.3076, Val Loss: 0.6820
Val Accuracy: 0.7273, Precision: 0.6062, Recall: 0.5223, F1: 0.5612




Epoch 11/20:
Train Loss: 0.2755, Val Loss: 0.7140
Val Accuracy: 0.7139, Precision: 0.5755, Recall: 0.5446, F1: 0.5596




Epoch 12/20:
Train Loss: 0.2643, Val Loss: 0.7657
Val Accuracy: 0.7452, Precision: 0.6963, Recall: 0.4196, F1: 0.5237




Epoch 13/20:
Train Loss: 0.2402, Val Loss: 0.9490
Val Accuracy: 0.7258, Precision: 0.6613, Recall: 0.3661, F1: 0.4713




Epoch 14/20:
Train Loss: 0.1934, Val Loss: 0.8195
Val Accuracy: 0.7362, Precision: 0.6424, Recall: 0.4732, F1: 0.5450




Epoch 15/20:
Train Loss: 0.1740, Val Loss: 0.8262
Val Accuracy: 0.7273, Precision: 0.6185, Recall: 0.4777, F1: 0.5390




Epoch 16/20:
Train Loss: 0.1634, Val Loss: 0.8528
Val Accuracy: 0.7332, Precision: 0.6347, Recall: 0.4732, F1: 0.5422




Epoch 17/20:
Train Loss: 0.1527, Val Loss: 0.8525
Val Accuracy: 0.7258, Precision: 0.6124, Recall: 0.4866, F1: 0.5423




Epoch 18/20:
Train Loss: 0.1566, Val Loss: 0.8603
Val Accuracy: 0.7288, Precision: 0.6221, Recall: 0.4777, F1: 0.5404




Epoch 19/20:
Train Loss: 0.1545, Val Loss: 0.8644
Val Accuracy: 0.7288, Precision: 0.6193, Recall: 0.4866, F1: 0.5450




Epoch 20/20:
Train Loss: 0.1526, Val Loss: 0.8635
Val Accuracy: 0.7273, Precision: 0.6158, Recall: 0.4866, F1: 0.5436


In [4]:
# Load best model and evaluate on test set
model.load_state_dict(torch.load('/kaggle/working/best_sarcasm_detection_model_mobilenetv3_gpu.pth'))
model.eval()
test_preds = []
test_true = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).squeeze()
        test_preds.extend(torch.sigmoid(outputs).cpu().numpy())
        test_true.extend(labels.cpu().numpy())

test_preds = (np.array(test_preds) > 0.5).astype(int)
test_accuracy = accuracy_score(test_true, test_preds)
test_precision = precision_score(test_true, test_preds)
test_recall = recall_score(test_true, test_preds)
test_f1 = f1_score(test_true, test_preds)

print("Test Set Results:")
print(f'Accuracy: {test_accuracy:.4f}')
print(f'Precision: {test_precision:.4f}')
print(f'Recall: {test_recall:.4f}')
print(f'F1 Score: {test_f1:.4f}')

  model.load_state_dict(torch.load('/kaggle/working/best_sarcasm_detection_model_mobilenetv3_gpu.pth'))


Test Set Results:
Accuracy: 0.7102
Precision: 0.5636
Recall: 0.5611
F1 Score: 0.5624
