## Baseline CNN Model

In [None]:
!pip install opendatasets --upgrade --quiet # To download the opendatasets library

In [None]:
import opendatasets as od

from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder

import os 
import shutil

import torch.utils.data as td
import random, time
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
import torchvision.transforms as tt
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models

from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import numpy as np
import copy
%matplotlib inline

In [None]:
dataset_url = 'https://www.kaggle.com/grassknoted/asl-alphabet'

if os.path.exists('./asl-alphabet'):
  shutil.rmtree('./asl-alphabet')

od.download(dataset_url) # Enter the username, and the kaggle public API key to download the dataset

In [None]:
TRAIN_DATA_PATH = './asl-alphabet/asl_alphabet_train/asl_alphabet_train'

TRANSFORM_IMG = transforms.Compose([
    transforms.Resize(128), # resize the image to the size 128
    transforms.RandomCrop(128), # apply random cropping
    transforms.Grayscale(1), # converting the image to grayscale
    transforms.ToTensor(), # converting the PIL Image object to a torch tensor
    transforms.Normalize((0.485), (0.229)) # normalizing the image
    ])

train_data_l = datasets.ImageFolder(root = TRAIN_DATA_PATH, transform=TRANSFORM_IMG) # Load the dataset, 
                                                                    # and apply the transformation

n_train_examples = int(len(train_data_l) * 0.8)
n_valid_examples = len(train_data_l) - n_train_examples

train_data, valid_data = data.random_split(train_data_l,
                                           [n_train_examples, n_valid_examples])

# use test transform for validation
valid_data = copy.deepcopy(valid_data)

In [None]:
class_list = train_data_l.classes # List of all the classes

In [None]:
TEST_DATA_PATH = "/content/asl-alphabet/asl_alphabet_test"

TRANSFORM_IMG_TEST = transforms.Compose([
    transforms.Resize(128),
    transforms.RandomCrop(128),
    transforms.Grayscale(1), 
    transforms.ToTensor(),
    transforms.Normalize((0.485), (0.229))
    ])

test_data_l = datasets.ImageFolder(root = TEST_DATA_PATH, transform=TRANSFORM_IMG_TEST)

test_labels = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','nothing','space']

In [None]:
BATCH_SIZE = 32
test_iterator = data.DataLoader(test_data_l, batch_size=BATCH_SIZE)

Check the number of samples in each dataset

In [None]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

Define a function to plot images

In [None]:
def plot_images(images, labels, classes, normalize=False):

    n_images = len(images)

    rows = int(np.sqrt(n_images))
    cols = int(np.sqrt(n_images))

    fig = plt.figure(figsize=(10, 10))

    for i in range(rows*cols):

        ax = fig.add_subplot(rows, cols, i+1)

        image = images[i]

        if normalize:
            image_min = image.min()
            image_max = image.max()
            image.clamp_(min=image_min, max=image_max)
            image.add_(-image_min).div_(image_max - image_min + 1e-5)

        ax.imshow(image.permute(1, 2, 0).cpu().numpy()[:,:,0],cmap='gray')
        ax.set_title(classes[labels[i]])
        ax.axis('off')

In [None]:
N_IMAGES = 30

images, labels = zip(*[(image, label) for image, label in
                       [train_data[i] for i in range(N_IMAGES)]])

classes = train_data_l.classes

plot_images(images, labels, classes, normalize=True)

Define the model using the architecture of CNN Network

In [None]:
output_dim = 29

class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = nn.Sequential(
            # Defining a 2D convolution layer
            nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Defining another 2D convolution layer
            nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(4 * 32 * 32, output_dim)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

Define the batch size and the iterators

In [None]:
train_iterator = data.DataLoader(train_data,
                                 shuffle=True,
                                 batch_size=BATCH_SIZE)

valid_iterator = data.DataLoader(valid_data,
                                 batch_size=BATCH_SIZE)

OUTPUT_DIM = len(train_data_l.classes) 

model = Net() # Defining the model

def initialize_parameters(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
        nn.init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight.data, gain=nn.init.calculate_gain('relu'))
        nn.init.constant_(m.bias.data, 0)

model.apply(initialize_parameters) # Initialzing the model parameters

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-3) # Adam optimizer with Learning rate of 0.001
criterion = nn.CrossEntropyLoss() # Cross entropy loss for calculating the loss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # finding what device to run the model on

model = model.to(device)
criterion = criterion.to(device)

Define a function to calculate the accuracy

In [None]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

Define the training process

In [None]:
def train(model, iterator, optimizer, criterion, device):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for (x, y) in tqdm(iterator, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()

        y_pred = model(x)

        loss = criterion(y_pred, y)

        acc = calculate_accuracy(y_pred, y)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion, device):

    epoch_loss = 0
    epoch_acc = 0

    # turn off the dropout during evaluation time
    model.eval()

    with torch.no_grad():

        for (x, y) in tqdm(iterator, desc="Evaluating", leave=False):

            x = x.to(device)
            y = y.to(device)

            y_pred = model(x)

            loss = criterion(y_pred, y)

            acc = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
EPOCHS = 20

best_valid_loss = float('inf')

list_training_acc = []
list_val_acc = []
list_training_loss = []
list_val_loss = []

for epoch in trange(EPOCHS, desc="Epochs"):

    start_time = time.monotonic()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)

    list_training_acc.append(train_acc)
    list_val_acc.append(valid_acc)

    list_training_loss.append(train_loss)
    list_val_loss.append(valid_loss)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'baseline_cnn-model.pt')

    end_time = time.monotonic()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

In [None]:
plt.plot(list_training_acc)
plt.plot(list_val_acc)
plt.legend(["Training Acc", 'Validation Acc'])
plt.title('Baseline CNN Accuracy performance')
plt.show()

plt.plot(list_training_loss)
plt.plot(list_val_loss)
plt.legend(["Training Acc", 'Validation Acc'])
plt.title('Baseline CNN Loss')
plt.show()

Test the model

In [None]:
model.load_state_dict(torch.load('/content/baseline_cnn-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion, device)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
def get_predictions(model, iterator, device):

    model.eval()

    images = []
    labels = []
    probs = []

    with torch.no_grad():

        for (x, _), gt in zip(iterator, test_labels):

            x = x.to(device)

            y_pred = model(x)

            y_prob = F.softmax(y_pred, dim=-1)

            images.append(x.cpu())
            probs.append(y_prob.cpu())

    images = torch.cat(images, dim=0)
    probs = torch.cat(probs, dim=0)

    return images, probs

In [None]:
images, probs = get_predictions(model, test_iterator, device)
pred_labels = torch.argmax(probs, 1)

In [None]:


pred_labels_f = []

for l in range(len(pred_labels)):
    pred_labels_f.append(class_list[pred_labels[l]])

In [None]:
def acc(true,pred):
    assert len(true) == len(pred)
    cnt=0
    for i,j in zip(true,pred):
        if i==j:
            cnt+=1
    return cnt/len(true)

In [None]:
acc(pred_labels_f, test_labels) # Test Accuracy 