CMD section

In [42]:
%pip install torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


Import section

In [43]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import cv2

# for build nural network
import torch.nn as nn

# for adjust the weights of model
import torch.optim as optim

# for convert convert pytorch data to useable data 
# for perform mathematical operations
import numpy as np

Data Preprocess and Load

In [44]:
# declare transform for image
data_transform = transforms.Compose([
    # Ensure all images are the same size as required by the model
    transforms.Resize((256, 256)), 
    
    # convert the image to a multidimensional array which good for gpu
    transforms.ToTensor(),
    
    # convert image value from 0-255 to 0-1 for improve stability of model
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [45]:
# apply transform to dataset
datasets = datasets.ImageFolder(root='../data/train', transform=data_transform)

Split data

In [46]:
# define size of each dataset
train_size = int(0.7 * len(datasets))
val_size = int(0.2 * len(datasets))
test_size = len(datasets) - train_size - val_size

In [47]:
# split data into batches
train_dataset, val_dataset, test_dataset = random_split(datasets, [train_size, val_size, test_size])

Setup

In [48]:
# create data loaders
# each time it will load 32 images for balance between speed and memory usage
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [49]:
# Visualization of a batch
def show_batch(loader):
    batch = next(iter(loader))
    images, labels = batch
    fig, ax = plt.subplots(ncols=4, figsize=(20, 20))
    for idx, img in enumerate(images[:4]):
        ax[idx].imshow(img.permute(1, 2, 0).numpy() * 0.5 + 0.5)
        ax[idx].title.set_text(f'Label: {labels[idx].item()}')
    plt.show()

In [50]:
# model architecture
class EmotionCNN(nn.Module):
    def __init__(self):
        super(EmotionCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            # Kernel_size is a size od window that will slide over the image
            # Padding is the number of pixels that will be added to the image for the kernel to slide over
            # MaxPool2d is a function that will reduce the size of the image by taking the n maximum value of a window
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(32, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.classifier = nn.Sequential(
            # Flatten is a function that will convert a multidimensional array to a one-dimensional array to be used by the linear layer
            # linear layer is a layer to make large number of input to small number of output
            nn.Flatten(),
            nn.Linear(16*32*32, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # pass the image through the convolutional layers
        x = self.conv_layers(x)
        # pass the output of the convolutional layers to the classifier
        x = self.classifier(x)
        return x

In [51]:
# training setup
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = EmotionCNN().to(device)
# Loss function is a function that will calculate the difference between the predicted value and the actual value
criterion = nn.BCELoss()
# Optimizer is a function that will adjust the weights of the model to minimize the loss
optimizer = optim.Adam(model.parameters())

Training

In [52]:
# Training loop
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=20):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.float().to(device).unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.float().to(device).unsqueeze(1)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{epochs} - Train Loss: {train_loss/len(train_loader)} - Val Loss: {val_loss/len(val_loader)}')

In [53]:
# Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    true_positive = 0
    false_positive = 0
    false_negative = 0
    true_negative = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            predicted = (outputs > 0.5).float()
            
            total += labels.size(0)
            correct += (predicted.squeeze() == labels).sum().item()
            true_positive += ((predicted == 1) & (labels == 1)).sum().item()
            false_positive += ((predicted == 1) & (labels == 0)).sum().item()
            false_negative += ((predicted == 0) & (labels == 1)).sum().item()
            true_negative += ((predicted == 0) & (labels == 0)).sum().item()
    
    precision = true_positive / (true_positive + false_positive) if true_positive + false_positive > 0 else 0
    recall = true_positive / (true_positive + false_negative) if true_positive + false_negative > 0 else 0
    accuracy = correct / total

    print(f'Accuracy: {accuracy * 100:.2f}% - Precision: {precision:.2f} - Recall: {recall:.2f}')

Save & Load

In [54]:
# Save Model
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

In [55]:
# Load Model
def load_model(model, path):
    model.load_state_dict(torch.load(path))
    model.eval()  # Set the model to evaluation mode
    print(f"Model loaded from {path}")
    return model

Test

In [56]:
# Prediction a single image
def predict_image(model, image_path):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img_rgb, (256, 256))
    
    img_tensor = transforms.ToTensor()(img_resized)
    img_normalized = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(img_tensor)
    
    model.eval()
    with torch.no_grad():
        img_batch = img_normalized.unsqueeze(0).to(device)
        prediction = model(img_batch)
    
    if prediction.item() < 0.5:
        print('Happy')
    else:
        print('Sad')

Run

In [57]:
# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer)

Epoch 20/20 - Train Loss: 0.004167568795310217 - Val Loss: 1.389678943157196


In [58]:
# Save the model
save_model(model, '../saved_models/emotion_cnn.pth')

Model saved to ../saved_models/emotion_cnn.pth


In [59]:
# Evaluate the model
evaluate_model(model, test_loader)

Accuracy: 75.71% - Precision: 0.47 - Recall: 0.56


In [61]:
# Load the model (for future inference or training)
model = EmotionCNN().to(device)
load_model(model, '../saved_models/emotion_cnn.pth')

Model loaded from ../saved_models/emotion_cnn.pth


  model.load_state_dict(torch.load(path))


EmotionCNN(
  (conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=16384, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=1, bias=True)
    (4): Sigmoid()
  )
)

In [62]:
# Predict on a test image
predict_image(model, '../data/test/Laugh_face.jpg')

Happy
