In [1]:
import cv2
import os
import torch, torchvision
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
from torchinfo import summary
import numpy as np

In [2]:
# !pip install git+https://github.com/davidbau/baukit

In [3]:
from baukit import show

## Cuting the frame into 48x48

In [None]:
# # Setting up the directory to transfer the data to
# directory = 'SignImage48x48'
# if not os.path.exists(directory):
#     os.mkdir(directory)
# if not os.path.exists(f'{directory}/blank'):
#     os.mkdir(f'{directory}/blank')

In [None]:
# # range 65 to 91 is just the alphabet from A to Z when transform from into character
# for i in range(65, 91):
#     letter = chr(i)
#     if not os.path.exists(f'{directory}/{letter}'):
#         os.mkdir(f'{directory}/{letter}')

In [None]:
# cap = cv2.VideoCapture(0)
# while True:
#     _, frame = cap.read()
#     count = {}

## ASL CNN MODEL

In [4]:
class ASL_Model(nn.Module):
    def __init__(self, num_classes):
        super(ASL_Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 128, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(p=0.4)
        self.conv2 = nn.Conv2d(128, 256, kernel_size=3)
        self.conv3 = nn.Conv2d(256, 512, kernel_size=3)
        self.conv4 = nn.Conv2d(512, 512, kernel_size=3)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128, 512)  # Adjust the size accordingly
        self.dropout1 = nn.Dropout(p=0.4)
        self.fc2 = nn.Linear(512, 64)
        self.dropout2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(64, 256)
        self.dropout3 = nn.Dropout(p=0.3)
        self.fc4 = nn.Linear(256, 64)
        self.dropout4 = nn.Dropout(p=0.2)
        self.fc5 = nn.Linear(64, 256)
        self.dropout5 = nn.Dropout(p=0.3)
        self.fc6 = nn.Linear(256, 6)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        
        x = F.relu(self.conv4(x))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = self.flatten(x)
        
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        x = self.dropout3(x)
        x = F.relu(self.fc4(x))
        x = self.dropout4(x)
        x = F.relu(self.fc5(x))
        x = self.dropout5(x)
        
        x = self.fc6(x)
        return F.softmax(x, dim=num_classes)


## Loading the ASL Dataset

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
train_path = "aslsigndataset/splitdataset48x48/train"
val_path = "aslsigndataset/splitdataset48x48/val"

In [30]:
train_set = ImageFolder(train_path, transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
#     transforms.Resize((48, 48)),
transforms.ToTensor()]))

val_set = ImageFolder(val_path, transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
#     transforms.Resize((48, 48)),
transforms.ToTensor()]))

print("Number of images in the training set =", len(train_set))

Number of images in the training set = 1473


In [18]:
idx = 1470
item = train_set[idx]
print(f"{idx}th item is a pair", item)

1470th item is a pair (tensor([[[0.8863, 0.7529, 0.7843,  ..., 0.3765, 0.3373, 0.3804],
         [0.8706, 0.7843, 0.6863,  ..., 0.2392, 0.2431, 0.3137],
         [0.8000, 0.7412, 0.8196,  ..., 0.2392, 0.2275, 0.3020],
         ...,
         [0.4000, 0.2667, 0.2902,  ..., 0.6196, 0.6275, 0.6549],
         [0.3922, 0.2627, 0.2392,  ..., 0.3765, 0.3804, 0.4314],
         [0.4471, 0.3294, 0.2706,  ..., 0.4471, 0.4588, 0.5373]]]), 5)


In [19]:
train_set[1470][0].shape

torch.Size([1, 48, 48])

In [31]:
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size = 128,
    shuffle = True,
    num_workers=2,
    pin_memory = True
)

val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size = 128,
    shuffle = True,
    num_workers=2,
    pin_memory = True
)

In [21]:
images, labels = next(train_loader.__iter__())
print(f"{images.shape=}, {labels.shape=}")

images.shape=torch.Size([128, 1, 48, 48]), labels.shape=torch.Size([128])


In [22]:
def train_model(model, train_loader, loss_fn, optimizer):
    model.train()
    # initiate a loss monitor
    train_loss = []
    correct_predictions = 0

    for images, labels in train_loader:
        # predict the class
        images, labels = images.to(device), labels.to(device)
        predicted = model(images)
        loss = loss_fn(predicted, labels)
        correct_predictions += (predicted.argmax(dim=1) == labels).sum().item()
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss.append(loss.item())
    
    return np.mean(train_loss), correct_predictions / len(train_loader.dataset)

In [36]:
def evaluate_model(model, val_loader, loss_fn, return_confusion_matrix = False):
    model.eval()
    val_loss = []
    correct_predictions = 0
    
    if return_confusion_matrix:
        confusion_matrix = torch.zeros(
            len(val_loader.dataset.classes), len(val_loader.dataset.classes))
        
    for images, labels in val_loader:
            # predict the class
            images, labels = images.to(device), labels.to(device)
            predicted = model(images)
            loss = loss_fn(predicted, labels)
            correct_predictions += (predicted.argmax(dim=1) == labels).sum().item() 
            val_loss.append(loss.item())
            
    val_loss = np.mean(val_loss)
    val_acc = correct_predictions/ len(val_loader.dataset)
    
    if return_confusion_matrix:
        return val_loss, val_acc, confusion_matrix
    else:
        return val_loss, val_acc

In [23]:
model = nn.Sequential(
            nn.Conv2d(in_channels = 1, out_channels = 128, kernel_size=3),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.4),
            nn.Conv2d(128, 256, kernel_size=3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(256, 512, kernel_size=3),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(512, 512, kernel_size=3),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.AdaptiveAvgPool2d(output_size = (1,1)),
            nn.Flatten(),
            nn.Linear(in_features = 512, out_features = 512),  # Adjust the size accordingly
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(512, 64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(64, len(train_set.classes)),
            nn.Softmax()
        ).to(device)

In [24]:
def view_network_parameters(model):
    tensor_list = list(model.state_dict().items())
    total_parameters = 0
    print('ModelSummary\n')
    for layer_tensor_name, ternsor in tensor_list:
        total_parameters += int(torch.numel(tensor))
        print('{}: {} elements'.format(layer_tensor_name, torch.numel(tensor)))
    print(f'\nTotal Trainable Parameters: {total_parameters})!')

def view_network_shapes(model, input_shape):
    print(summary(model, input_size = input_shape))

In [25]:
input_shape = (1,1,48,48)
view_network_shapes(model, torch.randn(input_shape).shape)

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1, 6]                    --
├─Conv2d: 1-1                            [1, 128, 46, 46]          1,280
├─BatchNorm2d: 1-2                       [1, 128, 46, 46]          256
├─ReLU: 1-3                              [1, 128, 46, 46]          --
├─MaxPool2d: 1-4                         [1, 128, 23, 23]          --
├─Dropout: 1-5                           [1, 128, 23, 23]          --
├─Conv2d: 1-6                            [1, 256, 21, 21]          295,168
├─BatchNorm2d: 1-7                       [1, 256, 21, 21]          512
├─ReLU: 1-8                              [1, 256, 21, 21]          --
├─MaxPool2d: 1-9                         [1, 256, 10, 10]          --
├─Conv2d: 1-10                           [1, 512, 8, 8]            1,180,160
├─BatchNorm2d: 1-11                      [1, 512, 8, 8]            1,024
├─ReLU: 1-12                             [1, 512, 8, 8]          

  return self._call_impl(*args, **kwargs)


In [27]:
# model = ASL_Model(len(train_set.classes)).to(device)
epochs = 120
learning_rate = 0.001
weight_decay = 0.001
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
training_losses = []


for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_model(model, train_loader, loss_fn, optimizer)
    training_losses.append(train_loss)
    print(f"epoch {epoch+1}/{epochs} | train loss={np.mean(train_loss):.4f}, {train_acc=:.4f}")

  1%|          | 1/100 [00:06<10:43,  6.50s/it]

epoch 1/100 | train loss=1.1000, train_acc=0.9443


  2%|▏         | 2/100 [00:13<10:38,  6.52s/it]

epoch 2/100 | train loss=1.0917, train_acc=0.9579


  3%|▎         | 3/100 [00:19<10:36,  6.56s/it]

epoch 3/100 | train loss=1.0901, train_acc=0.9572


  4%|▍         | 4/100 [00:26<10:24,  6.50s/it]

epoch 4/100 | train loss=1.0885, train_acc=0.9593


  5%|▌         | 5/100 [00:32<10:15,  6.47s/it]

epoch 5/100 | train loss=1.0907, train_acc=0.9552


  6%|▌         | 6/100 [00:39<10:13,  6.52s/it]

epoch 6/100 | train loss=1.0797, train_acc=0.9667


  7%|▋         | 7/100 [00:45<10:07,  6.54s/it]

epoch 7/100 | train loss=1.0860, train_acc=0.9586


  8%|▊         | 8/100 [00:52<10:04,  6.57s/it]

epoch 8/100 | train loss=1.0814, train_acc=0.9647


  9%|▉         | 9/100 [00:58<09:59,  6.59s/it]

epoch 9/100 | train loss=1.0861, train_acc=0.9599


 10%|█         | 10/100 [01:05<09:51,  6.57s/it]

epoch 10/100 | train loss=1.0842, train_acc=0.9599


 11%|█         | 11/100 [01:12<09:47,  6.60s/it]

epoch 11/100 | train loss=1.0713, train_acc=0.9742


 12%|█▏        | 12/100 [01:18<09:45,  6.66s/it]

epoch 12/100 | train loss=1.0818, train_acc=0.9661


 13%|█▎        | 13/100 [01:25<09:36,  6.63s/it]

epoch 13/100 | train loss=1.0837, train_acc=0.9627


 14%|█▍        | 14/100 [01:32<09:29,  6.62s/it]

epoch 14/100 | train loss=1.0748, train_acc=0.9722


 15%|█▌        | 15/100 [01:38<09:27,  6.67s/it]

epoch 15/100 | train loss=1.0704, train_acc=0.9749


 16%|█▌        | 16/100 [01:45<09:22,  6.70s/it]

epoch 16/100 | train loss=1.0688, train_acc=0.9769


 17%|█▋        | 17/100 [01:52<09:16,  6.70s/it]

epoch 17/100 | train loss=1.0694, train_acc=0.9756


 18%|█▊        | 18/100 [01:59<09:14,  6.77s/it]

epoch 18/100 | train loss=1.0718, train_acc=0.9742


 19%|█▉        | 19/100 [02:06<09:09,  6.78s/it]

epoch 19/100 | train loss=1.0681, train_acc=0.9783


 20%|██        | 20/100 [02:12<08:59,  6.74s/it]

epoch 20/100 | train loss=1.0724, train_acc=0.9749


 21%|██        | 21/100 [02:19<08:53,  6.75s/it]

epoch 21/100 | train loss=1.0684, train_acc=0.9790


 22%|██▏       | 22/100 [02:26<08:43,  6.71s/it]

epoch 22/100 | train loss=1.0716, train_acc=0.9749


 23%|██▎       | 23/100 [02:33<08:47,  6.84s/it]

epoch 23/100 | train loss=1.0742, train_acc=0.9735


 24%|██▍       | 24/100 [02:40<08:43,  6.89s/it]

epoch 24/100 | train loss=1.0690, train_acc=0.9783


 25%|██▌       | 25/100 [02:47<08:36,  6.89s/it]

epoch 25/100 | train loss=1.0725, train_acc=0.9735


 26%|██▌       | 26/100 [02:54<08:31,  6.91s/it]

epoch 26/100 | train loss=1.0735, train_acc=0.9742


 27%|██▋       | 27/100 [03:00<08:20,  6.86s/it]

epoch 27/100 | train loss=1.0693, train_acc=0.9776


 28%|██▊       | 28/100 [03:07<08:03,  6.72s/it]

epoch 28/100 | train loss=1.0689, train_acc=0.9790


 29%|██▉       | 29/100 [03:13<07:56,  6.72s/it]

epoch 29/100 | train loss=1.0669, train_acc=0.9790


 30%|███       | 30/100 [03:20<07:50,  6.72s/it]

epoch 30/100 | train loss=1.0673, train_acc=0.9776


 31%|███       | 31/100 [03:27<07:39,  6.66s/it]

epoch 31/100 | train loss=1.0641, train_acc=0.9837


 32%|███▏      | 32/100 [03:33<07:28,  6.59s/it]

epoch 32/100 | train loss=1.0649, train_acc=0.9790


 33%|███▎      | 33/100 [03:39<07:17,  6.53s/it]

epoch 33/100 | train loss=1.0653, train_acc=0.9823


 34%|███▍      | 34/100 [03:46<07:08,  6.49s/it]

epoch 34/100 | train loss=1.0699, train_acc=0.9776


 35%|███▌      | 35/100 [03:52<07:00,  6.47s/it]

epoch 35/100 | train loss=1.0637, train_acc=0.9810


 36%|███▌      | 36/100 [03:59<06:51,  6.44s/it]

epoch 36/100 | train loss=1.0635, train_acc=0.9823


 37%|███▋      | 37/100 [04:05<06:44,  6.42s/it]

epoch 37/100 | train loss=1.0674, train_acc=0.9790


 38%|███▊      | 38/100 [04:12<06:38,  6.44s/it]

epoch 38/100 | train loss=1.0680, train_acc=0.9783


 39%|███▉      | 39/100 [04:18<06:31,  6.42s/it]

epoch 39/100 | train loss=1.0622, train_acc=0.9844


 40%|████      | 40/100 [04:24<06:24,  6.41s/it]

epoch 40/100 | train loss=1.0641, train_acc=0.9803


 41%|████      | 41/100 [04:31<06:17,  6.40s/it]

epoch 41/100 | train loss=1.0682, train_acc=0.9803


 42%|████▏     | 42/100 [04:37<06:11,  6.40s/it]

epoch 42/100 | train loss=1.0634, train_acc=0.9837


 43%|████▎     | 43/100 [04:43<06:04,  6.39s/it]

epoch 43/100 | train loss=1.0617, train_acc=0.9857


 44%|████▍     | 44/100 [04:50<05:59,  6.42s/it]

epoch 44/100 | train loss=1.0659, train_acc=0.9817


 45%|████▌     | 45/100 [04:56<05:51,  6.40s/it]

epoch 45/100 | train loss=1.0634, train_acc=0.9830


 46%|████▌     | 46/100 [05:03<05:45,  6.39s/it]

epoch 46/100 | train loss=1.0665, train_acc=0.9817


 47%|████▋     | 47/100 [05:09<05:39,  6.41s/it]

epoch 47/100 | train loss=1.0659, train_acc=0.9796


 48%|████▊     | 48/100 [05:15<05:32,  6.39s/it]

epoch 48/100 | train loss=1.0646, train_acc=0.9844


 49%|████▉     | 49/100 [05:22<05:26,  6.41s/it]

epoch 49/100 | train loss=1.0610, train_acc=0.9864


 50%|█████     | 50/100 [05:28<05:19,  6.39s/it]

epoch 50/100 | train loss=1.0615, train_acc=0.9864


 51%|█████     | 51/100 [05:35<05:12,  6.39s/it]

epoch 51/100 | train loss=1.0620, train_acc=0.9851


 52%|█████▏    | 52/100 [05:41<05:07,  6.40s/it]

epoch 52/100 | train loss=1.0595, train_acc=0.9891


 53%|█████▎    | 53/100 [05:47<05:00,  6.40s/it]

epoch 53/100 | train loss=1.0594, train_acc=0.9871


 54%|█████▍    | 54/100 [05:54<04:54,  6.40s/it]

epoch 54/100 | train loss=1.0621, train_acc=0.9851


 55%|█████▌    | 55/100 [06:00<04:47,  6.39s/it]

epoch 55/100 | train loss=1.0617, train_acc=0.9871


 56%|█████▌    | 56/100 [06:07<04:42,  6.42s/it]

epoch 56/100 | train loss=1.0626, train_acc=0.9851


 57%|█████▋    | 57/100 [06:13<04:37,  6.46s/it]

epoch 57/100 | train loss=1.0646, train_acc=0.9817


 58%|█████▊    | 58/100 [06:20<04:34,  6.53s/it]

epoch 58/100 | train loss=1.0560, train_acc=0.9905


 59%|█████▉    | 59/100 [06:26<04:27,  6.52s/it]

epoch 59/100 | train loss=1.0611, train_acc=0.9857


 60%|██████    | 60/100 [06:33<04:18,  6.47s/it]

epoch 60/100 | train loss=1.0593, train_acc=0.9864


 61%|██████    | 61/100 [06:40<04:17,  6.60s/it]

epoch 61/100 | train loss=1.0574, train_acc=0.9891


 62%|██████▏   | 62/100 [06:46<04:12,  6.65s/it]

epoch 62/100 | train loss=1.0571, train_acc=0.9885


 62%|██████▏   | 62/100 [06:49<04:11,  6.61s/it]


KeyboardInterrupt: 

In [39]:
torch.save(model.state_dict(), "C:/Users/Chem's bbi/Downloads/ASL-Sign-Language")


RuntimeError: File archive cannot be opened.