In [5]:
# %%bash
# Install dependencies
!pip install numpy pandas matplotlib torch torchvision scikit-learn
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   --- ------------------------------------ 3.9/39.5 MB 26.2 MB/s eta 0:00:02
   ----------- ---------------------------- 11.5/39.5 MB 31.4 MB/s eta 0:00:01
   ------------------- -------------------- 19.7/39.5 MB 34.5 MB/s eta 0:00:01
   -------------------------- ------------- 26.5/39.5 MB 34.3 MB/s eta 0:00:01
   --------------------------------- ------ 33.6/39.5 MB 33.3 MB/s eta 0:00:01
   ---------------------------------------  39.1/39.5 MB 32.3 MB/s eta 0:00:01
   ---------------------------------------- 39.5/39.5 MB 30.6 MB/s eta 0:00:00
Installing collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86


In [None]:
# Import required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image

import os
import cv2
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from image_dataloader import SocialSignalDataset
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [31]:
# Create training and testing dataset
# Transform images

training_path = 'data/train'
testing_path = 'data/testing'

img_transform = transforms.Compose([
    transforms.Grayscale(3), # RGB
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = SocialSignalDataset(root_dir=training_path, transform=img_transform)
# change batch size accordingly (my pc can only run up to 16)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) 

# Check input size for forward feed network
for inputs, labels in train_loader:
    print("Input shape:", inputs.shape)
    print("Labels shape:", labels.shape)
    break  # We only want to check the first batch

Input shape: torch.Size([16, 3, 112, 112])
Labels shape: torch.Size([16])


In [36]:
class SocialSignalModel(nn.Module):
    def __init__(self, num_classes=2):
        super(SocialSignalModel, self).__init__()
        """"
        input_channel: 3 or 4 // test which one gets better inputs
            - 3: for RGB
            - 4: +1 for Depth from camera
        num_classes: 2 
            0 for surprise, 1 for fear
        """
        # TODO: IMPLEMENT
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        ) # Input size 128 * 14 * 14
        self.in_features = 128 * 14 * 14 #Flattened size after 3 Conv2d Layers and Pooling
        self.fc = nn.Linear(self.in_features, num_classes) # in_features = batch_size

    def forward(self, x):
        # TODO: IMPLEMENT
        # CNN --> Conv2d(N,Cin,H,W) --> (N, C_out, H_out,W_out)
        x = self.cnn(x)

        # Flatten 
        x = x.view(-1,self.in_features)

        # Fully Connected Layer
        x = self.fc(x)
        
        return x

In [38]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the model
model = SocialSignalModel().to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001) 

In [40]:
# From Assignment 3 Training
num_epochs =  30# set the number of epochs for training
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    # Loop over each batch
    for i, (sequences, labels) in enumerate(train_loader):
        sequences, labels = sequences.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(sequences)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Optimize the model
        optimizer.step()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        running_loss += loss.item()

    # Print statistics for the epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

    # if epoch%5 == 0: # change the saving frequency as you want
    #     model_path = os.path.join('./model_configs', f'image_lstm_model_{epoch}.pth')
    #     torch.save(model.state_dict(), model_path)

Epoch [1/30], Loss: 0.6339, Accuracy: 65.71%
Epoch [2/30], Loss: 0.5305, Accuracy: 72.86%
Epoch [3/30], Loss: 0.5036, Accuracy: 67.14%
Epoch [4/30], Loss: 0.5028, Accuracy: 75.71%
Epoch [5/30], Loss: 0.4288, Accuracy: 81.43%
Epoch [6/30], Loss: 0.4205, Accuracy: 84.29%
Epoch [7/30], Loss: 0.3022, Accuracy: 87.14%
Epoch [8/30], Loss: 0.3248, Accuracy: 85.71%
Epoch [9/30], Loss: 0.1837, Accuracy: 94.29%
Epoch [10/30], Loss: 0.1481, Accuracy: 94.29%
Epoch [11/30], Loss: 0.0988, Accuracy: 97.14%
Epoch [12/30], Loss: 0.0677, Accuracy: 98.57%
Epoch [13/30], Loss: 0.0667, Accuracy: 98.57%
Epoch [14/30], Loss: 0.1216, Accuracy: 94.29%
Epoch [15/30], Loss: 0.0769, Accuracy: 97.14%
Epoch [16/30], Loss: 0.0266, Accuracy: 98.57%
Epoch [17/30], Loss: 0.0321, Accuracy: 98.57%
Epoch [18/30], Loss: 0.0214, Accuracy: 100.00%
Epoch [19/30], Loss: 0.0116, Accuracy: 100.00%
Epoch [20/30], Loss: 0.0066, Accuracy: 100.00%
Epoch [21/30], Loss: 0.0065, Accuracy: 100.00%
Epoch [22/30], Loss: 0.0020, Accuracy: 