In [None]:
import cv2
import numpy as np
import pandas as pd
from PIL import Imageimport torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

import timm  # For Swin Transformer and other models

import csv

# Custom Dataset Class
class EmotionDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, ids):
        img = np.array(self.df['pixels'].iloc[ids].split(), dtype='float32').reshape(48, 48)
        img = Image.fromarray(img).convert('L')  # Converts to grayscale
        img = img.convert("RGB")  # Convert a  grayscale image to 3-channel RGB image

        if self.transform:
            img = np.array(img)
            img = self.transform(image=img)['image']
        label = self.df['emotion'].iloc[ids]
        return img, label


# Mixup Function
def mixup_data(x, y, alpha=1.0):

    if alpha > 0:
        lambd = np.random.beta(alpha, alpha)
    else:
        lambd = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lambd * x + (1 - lambd) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lambd

# Mixup loss function
def mixup_criterion(criterion, pred, y_a, y_b, lambd):
    return lambd * criterion(pred, y_a) + (1 - lambd) * criterion(pred, y_b)


#  Data Augmentations for Centered and Aligned Faces
train_transform = A.Compose([
    A.Resize(224, 224, interpolation=cv2.INTER_CUBIC),  # Resize to 224x224
    A.HorizontalFlip(p=0.5),  # Flip the image horizontally
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=35, p=0.7),  # Small shifts, scaling, and rotations
    A.GaussNoise(var_limit=(10.0, 30.0), p=0.7),  # Add Gaussian noise
    A.GaussianBlur(blur_limit=3, p=0.2),  # Simulate blur
    A.RandomGamma(gamma_limit=(80, 120), p=0.7),  # Adjust brightness and contrast
    A.RandomBrightnessContrast(p=0.7),  # Random brightness and contrast
    A.Normalize(mean=(0.5,), std=(0.5,)),  # Normalize for grayscale images
    ToTensorV2(),  # Convert to tensor
])

val_transform = A.Compose([
    A.Resize(224, 224, interpolation=cv2.INTER_CUBIC),  # Resize to 224x224 using bilinear interpolation
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2(),
])


# Function to get call for optimizer optimizer
def get_optimizer(optimizer_name, model, lr):
    if optimizer_name == 'adam':
        return optim.Adam(model.parameters(), lr=lr)


# Training loop with Mixup
def train_model_with_mixup(model, optimizer, scheduler, num_epochs=10, mixup_prob=0.5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Apply Mixup with a given probability
            if np.random.rand() < mixup_prob:
                inputs, targets_a, targets_b, lambd = mixup_data(inputs, labels, alpha=1.0)
                outputs = model(inputs)
                loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lambd)
            else:
                # No Mixup, normal forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            # Accumulate loss and correct predictions
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels).item()
            total_samples += labels.size(0)

        # Scheduler step
        scheduler.step()

        # Calculate average loss and accuracy for the epoch
        epoch_loss = running_loss / total_samples
        epoch_acc = running_corrects / total_samples

        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    return model

# Load and preprocess data
train_df = pd.read_csv('data_cv/train_dataset_cv.csv')
train_df, val_df = train_test_split(train_df, test_size=0.05, stratify=train_df['emotion'], random_state=42)

# Create Datasets
train_dataset = EmotionDataset(df=train_df, transform=train_transform)
val_dataset = EmotionDataset(df=val_df, transform=val_transform)

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=95, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=95, shuffle=False)

# Device setup
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Class Weights for handling class imbalance
class_weights = torch.FloatTensor([1.0, 2.0, 1.0, 0.5, 1.5, 0.7, 1.0]).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Model setup (Swin Transformer)
swin_Model = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=7)
swin_Model = swin_Model.to(device)

# Optimizer and Scheduler
optimizer_swin = get_optimizer('adamw', swin_Model, lr=1e-4)
scheduler_swin = lr_scheduler.StepLR(optimizer_swin, step_size=7, gamma=0.1)

# Train the model
swin_Model = train_model_with_mixup(swin_Model, optimizer_swin, scheduler_swin, num_epochs=20, mixup_prob=0.2)

# Evaluate model on validation set
correct = 0
total = len(val_dataset)
with torch.no_grad():
    swin_Model.eval()
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = swin_Model(inputs)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels).item()

accuracy = correct / total
print(f"Validation Accuracy: {accuracy:.4f}")

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


Epoch 1/20 - Loss: 1.5629, Accuracy: 0.3902
Epoch 2/20 - Loss: 1.3452, Accuracy: 0.4984
Epoch 3/20 - Loss: 1.2593, Accuracy: 0.5387
Epoch 4/20 - Loss: 1.1248, Accuracy: 0.5904
Epoch 5/20 - Loss: 1.0440, Accuracy: 0.6224
Epoch 6/20 - Loss: 0.9297, Accuracy: 0.6709
Epoch 7/20 - Loss: 0.8294, Accuracy: 0.6973
Epoch 8/20 - Loss: 0.5908, Accuracy: 0.7947
Epoch 9/20 - Loss: 0.5338, Accuracy: 0.8224
Epoch 10/20 - Loss: 0.4754, Accuracy: 0.8269
Epoch 11/20 - Loss: 0.4708, Accuracy: 0.8000
Epoch 12/20 - Loss: 0.4163, Accuracy: 0.8327
Epoch 13/20 - Loss: 0.4602, Accuracy: 0.8184
Epoch 14/20 - Loss: 0.4803, Accuracy: 0.8256
Epoch 15/20 - Loss: 0.4081, Accuracy: 0.8564
Epoch 16/20 - Loss: 0.3694, Accuracy: 0.8769
Epoch 17/20 - Loss: 0.3784, Accuracy: 0.8498
Epoch 18/20 - Loss: 0.3758, Accuracy: 0.8398
Epoch 19/20 - Loss: 0.3472, Accuracy: 0.8762
Epoch 20/20 - Loss: 0.3532, Accuracy: 0.8596
Validation Accuracy: 0.6580


In [None]:


# Custom Test Dataset Class without labels
class TestEmotionDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, ids):
        img = np.array(self.df['pixels'].iloc[ids].split(), dtype='float32').reshape(48, 48)
        img = Image.fromarray(img).convert('L')  # Convert to grayscale
        img = img.convert("RGB")  # Convert grayscale to 3-channel RGB

        if self.transform:
            img = np.array(img)
            img = self.transform(image=img)['image']
        img_id = self.df['id'].iloc[ids]
        return img, img_id

# Test data Loading
test_df = pd.read_csv('data_cv/test_dataset_cv.csv')

# Create test dataset and dataloader
test_dataset = TestEmotionDataset(df=test_df, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

# Inference on test data
def predict_test(model, test_loader):
    model.eval()
    predictions = []
    ids = []
    with torch.no_grad():
        for inputs, img_ids in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            ids.extend(img_ids)
    return ids, predictions

# Get test predictions
ids, predictions = predict_test(swin_Model, test_loader)





In [None]:
import pandas as pd
# Create a DataFrame from the lists
df = pd.DataFrame({'id': ids, 'emotion': predictions})

# Save the DataFrame to a CSV file
df.to_csv('predictions.csv', index=False)