ASL classification model

Library Loading

In [1]:
import torch.nn as nn
import pandas as pd
import torch
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

In [2]:
#error handling
import torch._dynamo
torch._dynamo.config.suppress_errors = True

Download and import American Sign Language Dataset available at Kaggle

In [3]:
training_df = pd.read_csv("data/asl_dataset/sign_training.csv")
validation_df = pd.read_csv("data/asl_dataset/sign_validation.csv")

In [None]:
'''Explore dataframes using training_df.head() and validation_df.head()'''

Custom Dataset: reshaping data into 28x28 pixel format. 

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

False

In [5]:
IMG_CHS = 1
IMG_HEIGHT = 28
IMG_WIDTH = 28

class MyDataset(Dataset):
    def __init__(self, base_df):
        x_df = base_df.copy()
        y_df = x_df.pop('label')
        x_df = x_df.values / 255
        x_df = x_df.reshape(-1, IMG_CHS, IMG_HEIGHT, IMG_WIDTH)
        self.xs = torch.tensor(x_df).float().to(device)
        self.ys = torch.tensor(y_df).to(device)

    def __getitem__(self, idx):
        x = self.xs[idx]
        y = self.ys[idx]
        return x, y

    def __len__(self):
        return len(self.xs)

Creating DataLoader

In [6]:
BATCH_SIZE = 32

training_data = MyDataset(training_df)
training_loader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)
training_N = len(training_loader.dataset)

validation_data = MyDataset(validation_df)
validation_loader = DataLoader(validation_data, batch_size=BATCH_SIZE)
validation_N = len(validation_loader.dataset)

In [None]:
'''
training_loader
batch = next(iter(train_loader))
batch 
'''

Building model: Convolutional Neural Network

In [None]:
'''24 categories because 2 of 26 sign letters require movement to be shown'''

In [11]:
image_size = 75*3*3
kernel_size = 3
n_categories = 24 

In [13]:
model = nn.Sequential(
    # First convolution
    nn.Conv2d(IMG_CHS, 25, kernel_size, stride=1, padding=1),  # 25 x 28 x 28
    nn.BatchNorm2d(25),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),  # 25 x 14 x 14
    # Second convolution
    nn.Conv2d(25, 50, kernel_size, stride=1, padding=1),  # 50 x 14 x 14
    nn.BatchNorm2d(50),
    nn.ReLU(),
    nn.Dropout(.2),
    nn.MaxPool2d(2, stride=2),  # 50 x 7 x 7
    # Third convolution
    nn.Conv2d(50, 75, kernel_size, stride=1, padding=1),  # 75 x 7 x 7
    nn.BatchNorm2d(75),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),  # 75 x 3 x 3
    # Flatten to Dense
    nn.Flatten(),
    nn.Linear(image_size, 512),
    nn.Dropout(.3),
    nn.ReLU(),
    nn.Linear(512, n_categories)
)

model = torch.compile(model.to(device))
optimizer = Adam(model.parameters())
loss_function = nn.CrossEntropyLoss()
model

OptimizedModule(
  (_orig_mod): Sequential(
    (0): Conv2d(1, 25, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(25, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(50, 75, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(75, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (13): Flatten(start_dim=1, end_dim=-1)
    (14): Linear(in_features=675, out_features=512, bias=True)
    (15): Dropout

Defining functions for training and validation

In [14]:
def training():
    loss = 0
    accuracy = 0

    model.train()
    for x, y in training_loader:
        output = model(x)
        optimizer.zero_grad()
        batch_loss = loss_function(output, y)
        batch_loss.backward()
        optimizer.step()

        loss += batch_loss.item()
        accuracy += get_batch_accuracy(output, y, training_N)
    print('Train - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))
    
def validation():
    loss = 0
    accuracy = 0

    model.eval()
    with torch.no_grad():
        for x, y in validation_loader:
            output = model(x)

            loss += loss_function(output, y).item()
            accuracy += get_batch_accuracy(output, y, validation_N)
    print('Valid - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))

def get_batch_accuracy(output, y, N):
    pred = output.argmax(dim=1, keepdim=True)
    correct = pred.eq(y.view_as(pred)).sum().item()
    return correct / N

Training

In [15]:
epochs = 20

for epoch in range(epochs):
    print('Epoch: {}'.format(epoch))
    training()
    validation()

Epoch: 0


Train - Loss: 263.1308 Accuracy: 0.9101
Valid - Loss: 27.6110 Accuracy: 0.9642
Epoch: 1
Train - Loss: 21.0076 Accuracy: 0.9933
Valid - Loss: 17.1617 Accuracy: 0.9752
Epoch: 2
Train - Loss: 10.5446 Accuracy: 0.9965
Valid - Loss: 33.2016 Accuracy: 0.9526
Epoch: 3
Train - Loss: 6.0426 Accuracy: 0.9983
Valid - Loss: 10.8950 Accuracy: 0.9766
Epoch: 4
Train - Loss: 12.8118 Accuracy: 0.9955
Valid - Loss: 23.4554 Accuracy: 0.9650
Epoch: 5
Train - Loss: 9.3466 Accuracy: 0.9969
Valid - Loss: 31.5810 Accuracy: 0.9534
Epoch: 6
Train - Loss: 2.3197 Accuracy: 0.9993
Valid - Loss: 20.8554 Accuracy: 0.9739
Epoch: 7
Train - Loss: 12.4656 Accuracy: 0.9960
Valid - Loss: 18.9023 Accuracy: 0.9784
Epoch: 8
Train - Loss: 4.0436 Accuracy: 0.9987
Valid - Loss: 25.7996 Accuracy: 0.9587
Epoch: 9
Train - Loss: 3.9236 Accuracy: 0.9988
Valid - Loss: 15.3980 Accuracy: 0.9791
Epoch: 10
Train - Loss: 0.2225 Accuracy: 0.9999
Valid - Loss: 11.3071 Accuracy: 0.9791
Epoch: 11
Train - Loss: 9.5604 Accuracy: 0.9969
Valid - 