# DAEN 429 Final Project
Sydney Flake, Maddie Bird, Jade Winebright

## Phase 0: Setup + ResNet 18

In [16]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models
from torchvision.models import resnet18, ResNet18_Weights


from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # use GPU if available
print("Using device:", device)

# For reproducibility
SEED = 429
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Turn on cuDNN benchmark for speed (optional)
torch.backends.cudnn.benchmark = True

Using device: cpu


In [None]:
# Download latest version
#path = kagglehub.dataset_download("grassknoted/asl-alphabet")

#print("Path to dataset files:", path)

Resuming download from 11534336 bytes (1089352698 bytes left)...
Resuming download from https://www.kaggle.com/api/v1/datasets/download/grassknoted/asl-alphabet?dataset_version_number=1 (11534336/1100887034) bytes left.


100%|██████████| 1.03G/1.03G [03:12<00:00, 5.65MB/s]

Extracting files...





Path to dataset files: /Users/flake/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1


In [9]:
# Define data transformations for training and validation sets

# ImageNet normalization (what ResNet-18 expects)
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])

In [11]:
# Load the full ASL dataset

DATA_ROOT = "/Users/flake/Documents/DAEN429/project/Datasets/asl_alphabet_train/asl_alphabet_train"

full_dataset = datasets.ImageFolder(root=DATA_ROOT, transform=None)
print("Total images:", len(full_dataset))
print("Number of classes:", len(full_dataset.classes))
print("Classes:", full_dataset.classes[:10], "...")


Total images: 87000
Number of classes: 29
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] ...


In [12]:
# ---- Stratified 80/20 split with seed = 429 ----
indices = np.arange(len(full_dataset))
labels = np.array(full_dataset.targets)  # class indices 0..(num_classes-1)

train_idx, val_idx = train_test_split(
    indices,
    test_size=0.2,
    stratify=labels,
    random_state=429
)

print("Train size (indices):", len(train_idx))
print("Val size (indices):", len(val_idx))

Train size (indices): 69600
Val size (indices): 17400


In [13]:
# ---- Subset wrapper that allows per-split transforms ----
class SubsetWithTransform(Subset):
    def __init__(self, dataset, indices, transform=None):
        super().__init__(dataset, indices)
        self.transform = transform

    def __getitem__(self, idx):
        real_idx = self.indices[idx]
        img, label = self.dataset[real_idx]  # base dataset has transform=None
        if self.transform is not None:
            img = self.transform(img)
        return img, label

# Make sure base dataset doesn’t apply transforms itself
full_dataset.transform = None

train_dataset = SubsetWithTransform(full_dataset, train_idx, transform=train_transform)
val_dataset   = SubsetWithTransform(full_dataset, val_idx,   transform=val_transform)

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))

Train size: 69600
Val size: 17400


In [None]:
# ---- DataLoaders ----
BATCH_SIZE = 64

pin = True if device.type == "cuda" else False

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    pin_memory=pin,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=pin,
)

print("Num train batches:", len(train_loader))
print("Num val batches:", len(val_loader))

Num train batches: 1088
Num val batches: 272


## Phase 1

In [15]:
num_classes = len(full_dataset.classes)
print("Number of classes:", num_classes)
print("Classes:", full_dataset.classes)

Number of classes: 29
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


In [17]:
def create_resnet18_model(num_classes, pretrained=True):
    """
    Create a ResNet-18 model with a custom classifier head for ASL classes.
    If pretrained=True → use ImageNet weights (for T-A, T-B, T-C).
    If pretrained=False → random init (for S-A).
    """
    if pretrained:
        weights = ResNet18_Weights.IMAGENET1K_V1
        model = resnet18(weights=weights)
        print("Loaded ResNet-18 with ImageNet pretrained weights.")
    else:
        model = resnet18(weights=None)
        print("Loaded ResNet-18 from scratch (no pretrained weights).")
    
    # Replace the final fully connected layer
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

In [18]:
# Freezing/unfreezing policies
def apply_freezing_policy(model, policy):
    """
    policy: one of {"T-A", "T-B", "T-C", "S-A"}
    
    T-A: Head-only, freeze all backbone, train fc.
    T-B: Freeze stem + layer1 + layer2 + layer3; train layer4 + fc.
    T-C: Freeze stem + layer1 + layer2; train layer3 + layer4 + fc.
    S-A: From scratch, train all layers (no freezing).
    """
    # First, freeze everything
    for param in model.parameters():
        param.requires_grad = False

    if policy == "T-A":
        # Train only the classifier head
        for param in model.fc.parameters():
            param.requires_grad = True

    elif policy == "T-B":
        # Train layer4 and head
        for param in model.layer4.parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True

    elif policy == "T-C":
        # Train layer3, layer4, and head
        for param in model.layer3.parameters():
            param.requires_grad = True
        for param in model.layer4.parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True

    elif policy == "S-A":
        # From scratch: train everything
        for param in model.parameters():
            param.requires_grad = True
    else:
        raise ValueError(f"Unknown policy: {policy}")

    # Optional: set BatchNorm layers in frozen parts to eval mode
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            # if all params in this BN are frozen, keep it in eval
            if not any(p.requires_grad for p in m.parameters()):
                m.eval()

    # Print a quick summary
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Policy {policy}: trainable params = {trainable_params}/{total_params}")


Reusable code for the models

In [20]:
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score

# Loss
criterion = nn.CrossEntropyLoss()

def get_optimizer(model, lr=1e-3, weight_decay=1e-4, optimizer_name="Adam"):
    """
    Returns an optimizer over ONLY trainable parameters.
    """
    params = [p for p in model.parameters() if p.requires_grad]

    if optimizer_name == "Adam":
        optimizer = optim.Adam(params, lr=lr, weight_decay=weight_decay)
    elif optimizer_name == "SGD":
        optimizer = optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")
    
    return optimizer


In [21]:
def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    running_loss = 0.0
    
    all_preds = []
    all_targets = []
    
    for batch_idx, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        preds = outputs.argmax(dim=1)
        all_preds.append(preds.detach().cpu())
        all_targets.append(labels.detach().cpu())
    
    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(all_targets, all_preds)
    epoch_f1 = f1_score(all_targets, all_preds, average="macro")

    return epoch_loss, epoch_acc, epoch_f1


In [22]:
def evaluate(model, dataloader, device):
    model.eval()
    running_loss = 0.0
    
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)

            preds = outputs.argmax(dim=1)
            all_preds.append(preds.detach().cpu())
            all_targets.append(labels.detach().cpu())
    
    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(all_targets, all_preds)
    epoch_f1 = f1_score(all_targets, all_preds, average="macro")

    return epoch_loss, epoch_acc, epoch_f1, all_preds, all_targets


In [23]:
def train_model(
    model,
    train_loader,
    val_loader,
    device,
    num_epochs=10,
    lr=1e-3,
    weight_decay=1e-4,
    optimizer_name="Adam",
    experiment_name="exp"
):
    optimizer = get_optimizer(model, lr=lr, weight_decay=weight_decay, optimizer_name=optimizer_name)

    history = {
        "train_loss": [],
        "train_acc": [],
        "train_f1": [],
        "val_loss": [],
        "val_acc": [],
        "val_f1": [],
    }

    best_val_f1 = -1.0
    best_state = None

    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc, train_f1 = train_one_epoch(model, train_loader, optimizer, device)
        val_loss, val_acc, val_f1, _, _ = evaluate(model, val_loader, device)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["train_f1"].append(train_f1)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        history["val_f1"].append(val_f1)

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            best_state = {
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "epoch": epoch,
                "val_f1": val_f1,
                "val_acc": val_acc,
            }

        print(
            f"[{experiment_name}] Epoch {epoch:02d}/{num_epochs:02d} | "
            f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, F1: {train_f1:.4f} | "
            f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1: {val_f1:.4f}"
        )

    print(f"\n[{experiment_name}] Best val macro-F1: {best_val_f1:.4f}")
    return model, history, best_state


### T-A: Head Only

In [None]:
# ===== T-A: Head-only finetuning =====
model_TA = create_resnet18_model(num_classes=num_classes, pretrained=True)
apply_freezing_policy(model_TA, policy="T-A")
model_TA = model_TA.to(device)

model_TA, history_TA, best_TA = train_model(
    model_TA,
    train_loader,
    val_loader,
    device,
    num_epochs=10,          # you can adjust
    lr=1e-3,
    weight_decay=1e-4,
    optimizer_name="Adam",
    experiment_name="T-A_head_only"
)


### T-B: Last Block Unfrozen

### T-C: Progressive Unfreezing

### S-A: Train From Scratch