## prepare dataset

In [11]:
import kagglehub

# Download latest version
data_root = kagglehub.dataset_download("aadityasinghal/facial-expression-dataset")

print("Path to dataset files:", data_root)

Path to dataset files: /home/ubuntu/.cache/kagglehub/datasets/aadityasinghal/facial-expression-dataset/versions/1


In [None]:
from pathlib import Path

data_root = Path(data_root)
data_root

PosixPath('/home/ubuntu/.cache/kagglehub/datasets/aadityasinghal/facial-expression-dataset/versions/1')

## build data transforms

In [None]:
from torchvision import transforms

train_transformer = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224),  # 224x224
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=15),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
val_transformer = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

## build dataset

In [None]:
from torchvision.datasets import ImageFolder

train_dataset = ImageFolder(
    root=data_root / "train" / "train", transform=train_transformer
)
val_dataset = ImageFolder(root=data_root / "test" / "test", transform=val_transformer)

In [20]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 28709
    Root location: /home/ubuntu/.cache/kagglehub/datasets/aadityasinghal/facial-expression-dataset/versions/1/train/train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
               RandomCrop(size=(224, 224), padding=None)
               RandomHorizontalFlip(p=0.5)
               RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=0)
               RandomVerticalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [22]:
train_dataset.classes

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [25]:
train_dataset[0][0].shape, train_dataset[0][1]

(torch.Size([3, 224, 224]), 0)

## build dataloader

In [None]:
from torch.utils.data import DataLoader

batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

## build model

In [30]:
# some code copies from ultralytics

In [None]:
import torch
import torch.nn as nn

In [None]:
def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = (
            d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]
        )  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

In [None]:
class Conv(nn.Module):
    """
    Standard convolution module with batch normalization and activation.

    Attributes:
        conv (nn.Conv2d): Convolutional layer.
        bn (nn.BatchNorm2d): Batch normalization layer.
        act (nn.Module): Activation function layer.
        default_act (nn.Module): Default activation function (SiLU).
    """

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """
        Initialize Conv layer with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int, optional): Padding.
            g (int): Groups.
            d (int): Dilation.
            act (bool | nn.Module): Activation function.
        """
        super().__init__()
        self.conv = nn.Conv2d(
            c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False
        )
        self.bn = nn.BatchNorm2d(c2)
        self.act = (
            self.default_act
            if act is True
            else act if isinstance(act, nn.Module) else nn.Identity()
        )

    def forward(self, x):
        """
        Apply convolution, batch normalization and activation to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        """
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """
        Apply convolution and activation without batch normalization.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        """
        return self.act(self.conv(x))