Resnet Implementation

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
# from util import split_dataset

In [22]:
# load and preprocess data
data = pd.read_csv('fer2013.csv')
width, height = 48, 48
datapoints = data['pixels'].tolist()
faces = []
for pixel_sequence in datapoints:
    face = [int(pixel) for pixel in pixel_sequence.split(' ')]
    face = np.asarray(face).reshape(width, height)
    faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
emotions = data['emotion'].values

print(faces.shape)
print(emotions)

(35887, 48, 48, 1)
[0 0 2 ... 0 3 2]


In [23]:
# split data into training, validation, and test sets
# Determine the minimum number of samples across all emotion classes
class_counts = np.bincount(emotions)
min_samples = np.min(class_counts)

# Undersample the classes to match the minimum number of samples
undersampler = RandomUnderSampler(sampling_strategy={i: min_samples for i in range(len(class_counts))}, random_state=42)
X_undersampled, y_undersampled = undersampler.fit_resample(faces.reshape(len(faces), -1), emotions)
X_undersampled = X_undersampled.reshape(-1, 1, 48, 48)  # Reshape back to the original shape

# Split the undersampled data into training, testing, and evaluation sets
train_ratio = 0.8
test_ratio = 0.1
eval_ratio = 0.1

# First, split the data into training and remaining sets
X_train, X_rem, y_train, y_rem = train_test_split(X_undersampled, y_undersampled, test_size=1-train_ratio, stratify=y_undersampled, random_state=42)

# Then, split the remaining data into testing and evaluation sets
test_ratio_adjusted = test_ratio / (test_ratio + eval_ratio)
X_test, X_eval, y_test, y_eval = train_test_split(X_rem, y_rem, test_size=test_ratio_adjusted, stratify=y_rem, random_state=42)

# Oversample the minority classes in the training set
oversampler = RandomOverSampler(sampling_strategy='not majority', random_state=42)
X_train_oversampled, y_train_oversampled = oversampler.fit_resample(X_train.reshape(len(X_train), -1), y_train)
X_train_oversampled = X_train_oversampled.reshape(-1, 1, 48, 48)  # Reshape back to the original shape

In [24]:
# define data transformations
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

test_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

In [25]:
# data loaders!
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train_oversampled), torch.tensor(y_train_oversampled))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

eval_dataset = torch.utils.data.TensorDataset(torch.tensor(X_eval), torch.tensor(y_eval))
eval_loader = DataLoader(eval_dataset, batch_size=64, shuffle=False)

In [30]:
# define the model
model = models.resnet50(pretrained=False)
num_features = model.fc.in_features
num_classes = 7
model.fc = nn.Linear(num_features, num_classes)

# move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [31]:
# Training loop
max_epochs = 100
eps = 1e-7
iters = 0
train_losses = []

while iters < max_epochs:
    model.train()
    train_loss = 0.0
    train_correct = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        train_correct += (preds == labels).sum().item()

    train_loss = train_loss / len(train_loader.dataset)
    train_acc = train_correct / len(train_loader.dataset)
    train_losses.append(train_loss)

    model.eval()
    test_loss = 0.0
    test_correct = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            test_correct += (preds == labels).sum().item()

            

    print(f'Epoch {iters+1}/{max_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}')


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 1, 48, 48] to have 3 channels, but got 1 channels instead