In [566]:
import torch
import torch.optim as optim
import torch.nn as nn
import lightning as L
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, Dataset
from torchmetrics.classification import MulticlassAccuracy, MulticlassF1Score
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from sklearn.model_selection import train_test_split
import random
torch.set_float32_matmul_precision('medium')

In [567]:
def set_seed(seed):
    random.seed(seed)                   # Python random module
    np.random.seed(seed)                # NumPy random seed
    torch.manual_seed(seed)             # PyTorch CPU seed
    torch.cuda.manual_seed(seed)        # PyTorch CUDA seed
    torch.cuda.manual_seed_all(seed)    # All CUDA devices if using multi-GPU
    torch.backends.cudnn.deterministic = True   # For reproducibility
    torch.backends.cudnn.benchmark = False      # Disable for reproducibility

# Example usage:
set_seed(42)

In [568]:
dataset_mean = [0.54148953, 0.42486119, 0.37428667]
dataset_std = [0.23021227, 0.2072772, 0.1976669 ]

class ExpDataset(Dataset):
    def __init__(self, imgs_path, csv_path, transform=None):
        super().__init__()
        self.imgs_path = imgs_path
        self.csv_data = pd.read_csv(csv_path)
        self.transform = transform

    def __len__(self):
        return len(self.csv_data)
    def __getitem__(self, index):
        record = self.csv_data.iloc[index]

        img_name = record['image_name']
        img_path = os.path.join(self.imgs_path, img_name+".jpg")
        img = Image.open(img_path).convert("RGB")

        # Apply transforms to the image
        if self.transform:
            img = self.transform(img)
        else:
            img = transforms.Compose([
                transforms.Resize((224, 224)).interpolation,
                transforms.ToTensor(),
                transforms.Normalize(mean=dataset_mean, std=dataset_std)
            ])(img)
        
        label = record['expression_label']
        return img, label

imgs_path = "../expW/origin_cleaned"
labels_path = "../expW/new_label.csv"

exp_dataset = ExpDataset(imgs_path, labels_path)
N = len(exp_dataset)
dataset_range = range(N)

train_indices, val_indices = train_test_split(
    dataset_range,
    train_size=0.8,
    random_state=42,
    stratify=exp_dataset.csv_data['expression_label']
)

In [569]:
train_transforms = transforms.Compose([
    # transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.25),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=2.5, translate=(0.05, 0.05), scale=(1.05, 1.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean=dataset_mean, std=dataset_std)
])

val_transforms = transforms.Compose([
    # transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=dataset_mean, std=dataset_std)
])

train_dataset = ExpDataset(imgs_path, labels_path, transform=train_transforms)
val_dataset = ExpDataset(imgs_path, labels_path, transform=val_transforms)

# Final Splits
train_dataset = Subset(train_dataset, train_indices)
val_dataset = Subset(val_dataset, val_indices)

img, label = train_dataset[36]
img
# len(train_dataset)

tensor([[[-2.3521, -2.1307, -2.1307,  ..., -2.1307, -2.1307, -2.1307],
         [-2.3521, -2.1307, -2.1307,  ..., -2.1307, -2.1307, -2.1307],
         [-2.3521, -2.1307, -2.1307,  ..., -2.1307, -2.1307, -2.1307],
         ...,
         [-2.3521, -2.3521, -2.3521,  ..., -2.1307, -2.1307, -2.1307],
         [-2.3521, -2.3521, -2.3521,  ..., -2.1307, -2.1307, -2.1307],
         [-2.3521, -2.3521, -2.3521,  ..., -2.1307, -2.1307, -2.1307]],

        [[-2.0497, -1.8038, -1.8038,  ..., -1.8038, -1.8038, -1.8038],
         [-2.0497, -1.8038, -1.8038,  ..., -1.8038, -1.8038, -1.8038],
         [-2.0497, -1.8038, -1.8038,  ..., -1.8038, -1.8038, -1.8038],
         ...,
         [-2.0497, -2.0497, -2.0497,  ..., -1.8038, -1.8038, -1.8038],
         [-2.0497, -2.0497, -2.0497,  ..., -1.8038, -1.8038, -1.8038],
         [-2.0497, -2.0497, -2.0497,  ..., -1.8038, -1.8038, -1.8038]],

        [[-1.8935, -1.6356, -1.6356,  ..., -1.6356, -1.6356, -1.6356],
         [-1.8935, -1.6356, -1.6356,  ..., -1

In [570]:
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False
)

In [571]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, padding=1, dropout=False):
        super(ResidualBlock, self).__init__()

        # Main path
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
            padding=padding,
            stride=stride,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.dropout1 = nn.Dropout2d(0.05) if dropout else nn.Identity()
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
            padding=padding,
            stride=1,         # always stride=1 for second conv
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.dropout2 = nn.Dropout2d(0.05) if dropout else nn.Identity()

        # Shortcut path
        if in_channels != out_channels or stride != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=1,
                    stride=stride,  # match spatial downsample
                    padding=0,
                    bias=False
                ),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        identity = x

        out = self.relu(self.bn1(self.conv1(x)))
        out = self.dropout1(out)

        out = self.bn2(self.conv2(out))
        out = self.dropout2(out)

        out += self.shortcut(identity)
        out = self.relu(out)
        return out


In [572]:
class Resnet3(nn.Module):
    def __init__(self, num_classes=7, in_channels=3):
        super(Resnet3, self).__init__()

        self.initial_layer = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # 224→112→56
        )

        # Residual layers
        self.layer1 = ResidualBlock(in_channels=16, out_channels=32, stride=2)   # 56→28
        self.layer2 = ResidualBlock(in_channels=32, out_channels=64, stride=2)   # 28→14
        self.layer3 = ResidualBlock(in_channels=64, out_channels=128, stride=2, dropout=True) # 14→7

        # Classifier
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.initial_layer(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [None]:
from torchmetrics.classification import MulticlassAccuracy 

class Resnet3Lightning(L.LightningModule):
    def __init__(self, num_classes=7, in_channels=3, learning_rate=1e-3, weight_decay=1e-4):
        super(Resnet3Lightning, self).__init__()
        self.save_hyperparameters()
        self.model = Resnet3(num_classes=num_classes, in_channels=in_channels)
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.criterion = nn.CrossEntropyLoss()
        self.train_acc = MulticlassAccuracy(num_classes=7, average='micro')
        self.val_acc = MulticlassAccuracy(num_classes=7, average='micro')

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.train_acc.update(preds, y)

        self.log('train_loss', loss, prog_bar=True, on_epoch=True)
        self.log('train_acc', self.train_acc, prog_bar=True, on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        
        preds = torch.argmax(logits, dim=1)
        self.val_acc.update(preds, y)

        self.log('val_loss', loss, prog_bar=True, on_epoch=True)
        self.log('val_acc', self.val_acc, prog_bar=True, on_epoch=True)

    def on_train_epoch_end(self):
        avg_train_loss = self.trainer.callback_metrics.get('train_loss')
        accuracy = self.train_acc.compute().item()
        print(f"Epoch {self.current_epoch+1} TRAIN loss: {avg_train_loss:.4f} | Train Accuracy: {accuracy:.4f}")
        self.train_acc.reset()
    
    def on_validation_epoch_end(self):
        avg_val_loss = self.trainer.callback_metrics.get('val_loss')
        accuracy = self.val_acc.compute().item()
        print(f"Epoch {self.current_epoch+1} VALIDATION loss: {avg_val_loss:.4f} | Val Accuracy: {accuracy:.4f}")
        self.val_acc.reset()

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-6)
        return {
            "optimizer": optimizer,
            'lr_scheduler': {
                "scheduler": lr_scheduler,
                "interval": "epoch",
                "frequency": 1
            }
        }

In [574]:
from lightning.pytorch.callbacks import TQDMProgressBar
class CustomProgressBar(TQDMProgressBar):
    def init_train_tqdm(self):
        bar = super().init_train_tqdm()
        bar.set_description("🔥 Training is running!")
        return bar
    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        bar.set_description("💧 Validating on validation set")
        return bar

In [None]:
model = Resnet3Lightning().to(device="cuda")
trainer = L.Trainer(
    accelerator='gpu',
    log_every_n_steps=1,
    max_epochs=15,
    precision="16-mixed",
    devices=1,
    logger=None,
    num_sanity_val_steps=1,
    callbacks=[CustomProgressBar()]
)
trainer.fit(
    model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader
)

Using 16bit Automatic Mixed Precision (AMP)
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | Resnet3            | 305 K  | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
---------------------------------------------------------
305 K     Trainable params
0         Non-trainable params
305 K     Total params
1.223     Total estimated model params size (MB)
44        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

d:\Coding\emotion_project\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 1 VALIDATION loss: 1.9761 | Val Accuracy: 0.0000


d:\Coding\emotion_project\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1 VALIDATION loss: 1.2142 | Val Accuracy: 0.5690
Epoch 1 TRAIN loss: 1.3762 | Train Accuracy: 0.4765


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2 VALIDATION loss: 1.0704 | Val Accuracy: 0.6256
Epoch 2 TRAIN loss: 1.1405 | Train Accuracy: 0.5981


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3 VALIDATION loss: 1.0071 | Val Accuracy: 0.6479
Epoch 3 TRAIN loss: 1.0510 | Train Accuracy: 0.6321


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4 VALIDATION loss: 0.9911 | Val Accuracy: 0.6554
Epoch 4 TRAIN loss: 1.0041 | Train Accuracy: 0.6481


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5 VALIDATION loss: 0.9638 | Val Accuracy: 0.6647
Epoch 5 TRAIN loss: 0.9730 | Train Accuracy: 0.6587
