# Sign Language Classification using Pytorch CNN
![image](https://storage.googleapis.com/kagglesdsdata/datasets/3258/5337/amer_sign2.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20210413%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20210413T174110Z&X-Goog-Expires=172799&X-Goog-SignedHeaders=host&X-Goog-Signature=b69ad067b5ff1071d329cd7922d620bda295ba78680512b24c4284bc23d71b785a40e83cf1ddb08e3370b23174393b0b76f7122507ded87d340a5e38432995f3ddde3bbc41ac3ff5267f123b870bdc6c9d9a214c7ffd564b04935d42386100d8fa1a0ca4564e5ecb51cfa4dfb7f973edc71b77276abe409768d4f21705fe440cae4b4b83a21155a60057d50b1508bf68c1576a50579298c5bb435ac574a28ee0f88a4ed5fd3a4a5f03a4cd2a211716c9161d517d3af76a8f8922cc2718b8f3f94bd06266df257361908ab698c9f6e9fdaaa2d91189c2c9e404d0fe1590a93f0eeca9120598e79e2adeca7cb665101d54ca8ca0f20692d2cac5d4532406bce603)
### Data Info
the dataset is saved as a csv containing pixel values for 784 pixels resulting in images of size 28 * 28 * 1 with one color channel.


In [None]:
!pip -q install torchsummary

In [None]:
# imports
import string
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from torchsummary import summary
from tqdm import tqdm

# some settings
# set background color to white
matplotlib.rcParams['figure.facecolor'] = '#ffffff'

# set default figure size
matplotlib.rcParams['figure.figsize'] = (15, 7)

In [None]:
# read data
train_df = pd.read_csv("../input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv")
test_df = pd.read_csv("../input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv")

each row in the data represents an image with the first column being the label for the image

In [None]:
# checkout data
train_df.head()

In [None]:
train_df.describe()

In [None]:
train_df.info()

In [None]:
test_df.info()

In [None]:
# create a dictionary for mapping numbers to letters
alpha_dict = {idx:letter for idx, letter in enumerate(string.ascii_lowercase)}
alpha_dict

In [None]:
# check class distribution
# convert to actual letters using dict
alpha_labels = train_df.label.apply(lambda x: alpha_dict[x])
sns.countplot(x=alpha_labels)
plt.show()

In [None]:
# create custom pytorch dataset class
class SignDataset(Dataset) :
    def __init__(self, img, label) :
        self.classes = np.array(label)
        img = img / 255.0
        self.img = np.array(img).reshape(-1, 28, 28, 1)
        print(self.img.shape)
        self.transform = T.Compose([
            T.ToTensor()
        ])
        
    def __len__(self) :
        return len(self.img)
    
    def __getitem__(self, index) :
        label = self.classes[index]
        img = self.img[index]
        img = self.transform(img)
        
        label = torch.LongTensor([label])
        img = img.float()
        
        return img, label

In [None]:
# create datasets
train_set = SignDataset(train_df.drop('label', axis=1), train_df['label'])
test_set = SignDataset(test_df.drop('label', axis=1), test_df['label'])

In [None]:
# show a single image
def show_image(img, label, dataset):
    plt.imshow(img.permute(1, 2, 0))
    print(img.shape)
    plt.axis('off')
    plt.title(f"Label: {dataset.classes[label]}\nAlpha Label: {alpha_dict[dataset.classes[label]]}")

In [None]:
import cv2
import os
timg = cv2.resize(cv2.imread('../input/test-fist/test_fist.JPG', 0), (28,28), interpolation = cv2.INTER_AREA)
plt.imshow(timg)

In [None]:
timg.reshape(-1,28, 28, 1).shape

In [None]:
show_image(*test_set[4], test_set)

In [None]:
show_image(*train_set[45], train_set)

In [None]:
batch_size = 128
train_dl = DataLoader(train_set, batch_size=batch_size)
test_dl = DataLoader(test_set, batch_size=batch_size)

In [None]:
# visualize a batch of images
def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(20, 8))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=16).permute(1, 2, 0))
        break

In [None]:
# show a batch of images (128 images)
show_batch(train_dl)

In [None]:
# convlutional block with batchnorm, max pooling and dropout
def conv_block(in_channels, out_channels, pool=False, drop=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    if drop: layers.append(nn.Dropout())
    return nn.Sequential(*layers)

In [None]:
# network architecture
class SignConvNet(nn.Module):
    def __init__(self, in_channels, out_classes):
        super().__init__()
        self.conv1 = conv_block(in_channels, 16)
        self.conv2 = conv_block(16, 32, pool=True)
        self.conv3 = conv_block(32, 64, pool=True, drop=True)
        self.fc =  nn.Sequential(*[
                        nn.Flatten(),
                        nn.Linear(7 * 7 * 64, out_classes)
                    ])
        
    def forward(self, img):
        img = self.conv1(img)
        img = self.conv2(img)
        img = self.conv3(img)
        return self.fc(img)

In [None]:
# get number of classes
num_classes = len(alpha_dict)

# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# create model, optim  and loss
model = SignConvNet(1, num_classes).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optim = torch.optim.Adam(model.parameters(), lr=1e-3)

# checkout model layer output shapes, and memory usage
summary(model, (1, 28, 28))

In [None]:
epochs = 10
losses = []
for epoch in range(epochs):
    # for custom progress bar
    with tqdm(train_dl, unit="batch") as tepoch:
        epoch_loss = 0
        epoch_acc = 0
        for data, target in tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}")
            data, target = data.to(device), target.to(device) # move input to GPU
            out = model(data)
            loss = criterion(out, target.squeeze())
            epoch_loss += loss.item()
            loss.backward()
            optim.step()
            optim.zero_grad()
            tepoch.set_postfix(loss = loss.item()) # show loss and per batch of data
    losses.append(epoch_loss)

In [None]:
# plot losses
sns.set_style("dark")
sns.lineplot(data=losses).set(title="loss change during training", xlabel="epoch", ylabel="loss")
plt.show()

In [None]:
# predict on testing data samples (the accuracy here is batch accuracy)
y_pred_list = []
y_true_list = []
with torch.no_grad():
    with tqdm(test_dl, unit="batch") as tepoch:
        for inp, labels in tepoch:
            inp, labels = inp.to(device), labels.to(device)
            print(inp.shape, type(inp))
            y_test_pred = model(inp)
            _, y_pred_tag = torch.max(y_test_pred, dim = 1)
            y_pred_list.append(y_pred_tag.cpu().numpy())
            y_true_list.append(labels.cpu().numpy())

In [None]:
# show a single image
def show_image(img, label, dataset):
    print(type(img))
    print(type(label))
    plt.imshow(img.permute(1, 2, 0))
    print(img.shape)
    plt.axis('off')
    plt.title(f"Label: {dataset.classes[label]}\nAlpha Label: {alpha_dict[dataset.classes[label]]}")

In [None]:
#plt.imshow(train_set[4][0].permute(1,2,0))
#show_image(*train_set[45], train_set)
train_set[4][0].unsqueeze(0).shape

In [None]:
t = train_set[4][0].unsqueeze(0)
t.to('cpu')
model.to('cpu')
y_pred = model(t)
torch.max(y_pred, dim = 1)

In [None]:
# flatten prediction and true lists
flat_pred = []
flat_true = []
for i in range(len(y_pred_list)):
    for j in range(len(y_pred_list[i])):
        flat_pred.append(y_pred_list[i][j])
        flat_true.append(y_true_list[i][j])
        
print(f"number of testing samples results: {len(flat_pred)}")

In [None]:
# calculate total testing accuracy
print(f"Testing accuracy is: {accuracy_score(flat_true, flat_pred) * 100:.2f}%")

In [None]:
# Display 15 random picture of the dataset with their labels
inds = np.random.randint(len(test_set), size=15)
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in zip(inds, axes.flat):
    img, label = test_set[i]
    ax.imshow(img.permute(1, 2, 0))
    dict_real = alpha_dict[test_set.classes[label]]
    dict_pred = alpha_dict[test_set.classes[flat_pred[i]]]
    ax.set_title(f"True: {test_set.classes[label]}, {dict_real}\nPredicted: {test_set.classes[flat_pred[i]]}, {dict_pred}")
plt.tight_layout()
plt.show()

In [None]:
# classification report
print(classification_report(flat_true, flat_pred))

In [None]:
# plot confusion matrix
confusion_matrix_df = pd.DataFrame(confusion_matrix(flat_true, flat_pred)).rename(columns=alpha_dict, index=alpha_dict)
plt.figure(figsize=(20, 10))
sns.heatmap(confusion_matrix_df, annot=True, fmt='').set(title="confusion matrix", xlabel="Predicted Label", ylabel="True Label")
plt.show()

In [None]:
nimg = torch.tensor(timg.reshape(-1,1,28,28))
nimg = nimg.float()
print(nimg.shape)
plt.imshow(nimg.reshape(28,28,1))

In [None]:
pred = model(nimg)

In [None]:
model = model.cpu()

In [None]:
nimg = nimg.cpu()

In [None]:
pred
_, answer = torch.max(pred, dim = 1)
print(answer.numpy())

In [None]:
c = 0
for batch, label  in test_dl:
    print(batch.shape, label.shape)
    batch, label = batch.to('cpu'), label.to('cpu')
    y_pred = model(batch)
    _, y_pred_tag = torch.max(y_pred, dim = 1)
    print(y_pred_tag)
    print("**")
    break
    

In [None]:
y_pred_tag[:20]

In [None]:
label[:20]

In [None]:
index = 4
plt.imshow(batch[index].reshape(28, 28))
print(label[index])

In [None]:
y_pred_tag[0].numpy()

In [None]:
fig = plt.figure(figsize=(30,30))
limit = 32
for i in range(limit):
    plt.subplot(8,4,i+1)
    plt.title("actual: "+str(label.numpy()[i][0])+" prediction: "+str(y_pred_tag.numpy()[i]))
    plt.imshow(batch[i].reshape(28,28))
    #plt.plot([1,2,3], [2,4,6])
plt.show()