<a href="https://colab.research.google.com/github/madelyn-redick/LearningASL/blob/cnn/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.models import ResNet50_Weights
from torch.utils.data import DataLoader

In [None]:
'''
Convolutional Neural Network for ASL sign recognition.
Extends pytorch's nn.Module.
'''
class ASL_CNN(nn.Module):
  '''
  Initializes the CNN Model.
  Architecture:
    - Transfer learning from resnet50
    - Freeze all layers except the last residual block
    - Replace fully connected layer with
      - Linear -> ReLU -> Dropout -> Linear
    - Output has 24 classes (24 static letter signs)
  '''
  def __init__(self):
    super(ASL_CNN, self).__init__()

    # Batch normalization as the first layer for input normalization
    self.batch_norm = nn.BatchNorm2d(3)

    self._base_model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

    for param in self._base_model.parameters():
      param.requires_grad = False

    for param in self._base_model.layer4.parameters():
      param.requires_grad = True

    num_features = self._base_model.fc.in_features # 2048
    self._base_model.fc = nn.Sequential(
        nn.Linear(num_features, 512),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(512, 28) # 28 letters (A-Z, DEL, SPACE)
    )

  '''
    Executes a forward pass of the CNN model.

    Parameters
    x : Tensor
      The input image.

    Returns a tensor of the model's prediction.
  '''
  def forward(self, x):
    x = self.batch_norm(x)
    return self._base_model(x)


In [None]:
'''
  Function to train a model.

  Parameters
  model : nn.Module
    A pytorch neural network model.
  loss_fn: (Tensor, Tensor) => Tensor (scalar)
    A criterion function to calculate loss given the predictions and labels.
  optimizer: nn.optim
    A pytorch optimizer.
  scheduler: torch.optim.lr_scheduler
    A pytorch learning rate scheduler.
  dataloader: dict
    A dictionary containing the DataLoaders for 'train' and 'validation' phases.
  num_epochs: int
    The number of epochs to train the model for.

  Returns the model with weights updated from the best epoch run.
'''
def train_model(model, loss_fn, optimizer, scheduler, dataloader, num_epochs=20):
  torch.save(model.state_dict(), best_model_params_path)
  best_accuracy = 0.0

  for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    for phase in ['train', 'validation']:
      if phase == 'train':
        model.train()
      else:
        model.eval()

      cumulative_loss = 0.0
      cumulative_corrects = 0
      dataset_size = len(dataloader[phase].dataset)

      for inputs, labels in dataloader[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          _, predictions = torch.max(outputs, 1)
          loss = loss_fn(outputs, labels)

          if phase == 'train':
            loss.backward()
            optimizer.step()

        cumulative_loss += loss.item() * inputs.size(0)
        cumulative_corrects += torch.sum(predictions == labels.data)

      if phase == 'train':
        scheduler.step()

      epoch_loss = cumulative_loss / dataset_size
      epoch_accuracy = cumulative_corrects.double() / dataset_size

      print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_accuracy:.4f}')

      if phase == 'validation' and epoch_accuracy > best_accuracy:
        best_accuracy = epoch_accuracy
        torch.save(model.state_dict(), best_model_params_path)

  model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
  return model


In [None]:
'''
  Function to evaluate a model.

  Parameters
  model : nn.Module
    A pytorch neural network model.
  loss_fn: (Tensor, Tensor) => Tensor (scalar)
    A criterion function to calculate loss given the predictions and labels.
  test_dataloader: DataLoader
    The DataLoader for the test dataset.

  Returns the test accuracy and loss.
'''
def evaluate_model(model, loss_fn, test_dataloader):
    model.eval()

    cumulative_loss = 0.0
    cumulative_corrects = 0
    test_dataset_size = len(test_dataloader.dataset)

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = loss_fn(outputs, labels)

            cumulative_loss += loss.item() * inputs.size(0)
            cumulative_corrects += torch.sum(predictions == labels.data)

    test_loss = cumulative_loss / test_dataset_size
    test_accuracy = cumulative_corrects.double() / test_dataset_size

    print(f'Test Loss: {test_loss:.4f} Test Accuracy: {test_accuracy:.4f}')
    return test_loss, test_accuracy


In [None]:
import sys
# Add path to Python's list of directories to search for modules (in order to find data_preparation file)
sys.path.append('../LearningASL')

In [None]:
import data_preprocessing

letter_train = data_preprocessing.letter_train
letter_val = data_preprocessing.letter_val
letter_test = data_preprocessing.letter_test

# Verify imported data
print(f"Type of letter_train: {type(letter_train)}")
print(f"Length of letter_train: {len(letter_train)}")


In [None]:
# Hyperparameter tuning
learning_rate = 1e-5 # small learning rate because we are using transfer learning and do not want to mess up pretrained weights
momentum = 0.9
lr_gamma = 0.9
epochs = 30
batch_size = 32

In [None]:
#GPU/CPU and Path Definition
best_model_params_path = './best_cnn_params.pth'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Create DataLoader instances for each phase in training
dataloader = {
    'train': DataLoader(letter_train, batch_size=batch_size, shuffle=True, num_workers=4),
    'validation': DataLoader(letter_val, batch_size=batch_size, shuffle=False, num_workers=4)
}

# Create DataLoader for test set
test_dataloader = DataLoader(letter_test, batch_size=batch_size, shuffle=False, num_workers=4)

# Model Instance
model = ASL_CNN()

In [None]:
# Optimize only the parameters that are not frozen (AKA requires_grad == True)
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=momentum)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=lr_gamma)
loss = nn.CrossEntropyLoss()

train_model(model=model, loss_fn=loss, optimizer=optimizer, scheduler=scheduler, dataloader=dataloader, num_epochs=epochs)

In [None]:
# To evaluate model, first load best model weights:
loaded_model = ASL_CNN()
loaded_model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
loaded_model = loaded_model.to(device)

test_loss, test_accuracy = evaluate_model(loaded_model, loss, test_dataloader)