# Method 1: Convolutional Neural Network

For the first method, we decided to use a CNN which benefits from images as input. We can apply filters and kernel functions to reduce the dimensionality of our images.

## 1. Getting Started

First, we import the libraries we are going to use to train our Convolutional Neural Network. We will also define the environments relative paths and some utility/auxiliary functions.

### 1.1 Imports

In [1]:
import os
import sys
import csv
import cv2
import numpy as np
import torch.nn as nn

### 1.2 Environment Configuration

In [2]:
# Setting the path of the training dataset (that was already provided to you)

running_local = True if os.getenv('JUPYTERHUB_USER') is None else False
DATASET_PATH = "../data/sign_lang_train"

# Set the location of the dataset
if running_local:
    # If running on your local machine, the sign_lang_train folder's path should be specified here
    local_path = "../data/sign_lang_train"
    if os.path.exists(local_path):
        DATASET_PATH = local_path
else:
    # If running on the Jupyter hub, this data folder is already available
    # You DO NOT need to upload the data!
    DATASET_PATH = "/data/mlproject22/sign_lang_train"

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

### 1.3 Auxiliary Functions

In [3]:
# Auxiliary function
def read_csv(csv_file):
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
    return data

### 1.4 Load the Dataset (Sign-Languages)

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

from string import ascii_lowercase

class SignLangDataset(Dataset):
    """Sign language dataset"""

    def __init__(self, csv_file, root_dir, class_index_map=None, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data = read_csv(os.path.join(root_dir,csv_file))
        self.root_dir = root_dir
        self.class_index_map = class_index_map
        self.transform = transform
        # List of class names in order
        self.class_names = list(map(str, list(range(10)))) + list(ascii_lowercase)

    def __len__(self):
        """
        Calculates the length of the dataset-
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Returns one sample (dict consisting of an image and its label)
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Read the image and labels
        image_path = os.path.join(self.root_dir, self.data[idx][1])
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        # The Shape of the image should be H, W, C where C=1
        image = np.expand_dims(image, 0)
        # The label is the index of the class name in the list ['0','1',...,'9','a','b',...'z']
        # because we should have integer labels in the range 0-35 (for 36 classes)
        label = self.class_names.index(self.data[idx][0])

        sample = {'image': image, 'label': label}
        if self.transform:
            sample = self.transform(sample)
        return sample

### 1.5 Import the Network

In [5]:
from src.conv_nn import ConvNN

#ConvNN?
#ConvNN??
#help(ConvNN)

cnn = ConvNN().float()
cnn.train()

filename = "../models/cnn_weights.pt"

if not os.path.exists(filename):
    cnn.init_weights()
    torch.save(cnn.state_dict(), filename)
    print(f"Initialized weights saved to '{filename}'")
else:
    print(f"File '{filename}' already exists, skipping save.")

cnn.load_state_dict(torch.load("../models/cnn_weights.pt"))

print("ConvNN imported successfully!")

Initialized weights saved to '../models/cnn_weights.pt'
ConvNN imported successfully!


### 1.6 Split the Data

To authentically test if our model performs well on new data (data that was never seen during training), we will need a test set. But also we need to make sure not to overfit our data during the training which we achieve by using a validation set during the training. Using a validation set will also help to perform early stopping and can also be used as an estimate for the generalization error.

In [6]:
from torch.utils.data import random_split, DataLoader

dataset = SignLangDataset(csv_file='labels.csv', root_dir=DATASET_PATH)
N = len(dataset)

train_size = int(0.8*N)
test_size = int(0.1*N)
validation_size = int(0.1*N)

training_dataset, test_dataset, validation_dataset = random_split(dataset, [train_size, test_size, validation_size])

### 2. Train the model

### 2.1 Use dataloader to create batches for split dataset

First, we will load our dataset by using a dataloader and test if everything works well by printing a random batch.

In [7]:
train_loader = DataLoader(training_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False)

In [8]:
column_width = 70
total_width = 100

### 2.2 Hyperparameters

In [9]:
EPOCHS = 100
BATCH_SIZE = 32
PATIENCE = 5

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)

In [11]:
def print_progress_bar(iteration, total, epo, length=40):
    current = iteration + epo * total
    total_all = total * EPOCHS

    percent = 100 * (current / total_all)
    filled_length = int(length * current // total_all)

    bar = '█' * filled_length + '-' * (length - filled_length)
    sys.stdout.write(f'Progress: |{bar}| {percent:.1f}% Complete\r')
    sys.stdout.flush()

    if current == total_all:
        sys.stdout.write('\n')


def print_train_details(epoch, iteration, loss, batch_acc, epoch_acc, total_batches):
    l_text = f"Epoch {epoch},\tBatch {iteration},\tloss: {loss.item():.4f}"
    r_text = f"Batch Accuracy: {batch_acc:.4f}%,\tEpoch Accuracy: {epoch_acc:.4f}%"
    aligned_text = (l_text.ljust(int(column_width)) + r_text.rjust(int(column_width)))
    print(aligned_text.center(total_width))
    print('[' + ''.center(column_width*2, '-') + ']')
    print_progress_bar(iteration + 1, total_batches, epo=epoch, length=40)

def print_validation_details(val_acc, best_acc, loss):
    l_text = f"loss: {loss.item():.4f},\tPatience-Level: {patience_counter}"
    r_text = f"Best Accuracy so far: {best_acc:.4f}%,\t Validation Accuracy: {val_acc:.4f}%"
    aligned_text = (l_text.ljust(int(column_width)) + r_text.rjust(int(column_width)))
    print(aligned_text.center(total_width))

In [12]:
best_val = float('-inf')
patience_counter = 0
for epoch in range(EPOCHS):
    text = "=====Training - Tracker====="
    print(text.center(column_width*2))
    cnn.train()

    epoch_total = 0
    trn_corr = 0
    batch_size = 0
    total_batches = len(train_loader)


    for i, batch in enumerate(train_loader):
        images = batch['image'].float()
        labels = batch['label']
        batch_size = images.size(0)

        y_pred = cnn(images)
        loss = criterion(y_pred, labels)

        predicted = torch.max(y_pred, 1)[1]
        batch_corr = (predicted == labels).sum()
        trn_corr += batch_corr
        epoch_total += batch_size

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 50 == 0:
            batch_acc = batch_corr / batch_size * 100
            epoch_acc = trn_corr / epoch_total * 100
            print_train_details(epoch, i, loss, batch_acc, epoch_acc, total_batches)

    text = "=====Validation - Analysis====="
    print(text.center(column_width*2))

    val_corr = 0
    loss = 0
    iter = 0
    improvement = False
    total_batches = len(validation_loader)
    num_samples = 0
    for i, batch in enumerate(validation_loader):
        images = batch['image'].float()
        labels = batch['label']
        batch_size = images.size(0)

        cnn.eval()
        y_pred = cnn(images)
        loss = criterion(y_pred, labels)

        predicted = torch.max(y_pred, 1)[1]
        batch_corr = (predicted == labels).sum()
        val_corr += batch_corr
        num_samples += batch_size
    loss = loss / total_batches
    val_acc = val_corr / num_samples * 100

    if val_acc > best_val:
        patience_counter = 0
        improvement = True
        best_val = val_acc
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("Patience exceeded! Breaking training...")
            break

    print_validation_details(val_acc, best_val, loss)


                                                        =====Training - Tracker=====                                                        
Epoch 0,	Batch 0,	loss: 23.1910                                                             Batch Accuracy: 6.2500%,	Epoch Accuracy: 6.2500%
[--------------------------------------------------------------------------------------------------------------------------------------------]
Epoch 0,	Batch 50,	loss: 2.8540                                                           Batch Accuracy: 21.8750%,	Epoch Accuracy: 13.3578%
[--------------------------------------------------------------------------------------------------------------------------------------------]
Epoch 0,	Batch 100,	loss: 1.9268                                                          Batch Accuracy: 46.8750%,	Epoch Accuracy: 27.0421%
[--------------------------------------------------------------------------------------------------------------------------------------------]
Epoch 0