# Static ASL Interpreter Using RGB Images

In [1]:
# Requirements
# import numpy as np
# import cv2
# from matplotlib import pyplot as plt
# import torch

In [2]:
from tqdm import tqdm
import os
from shutil import copy

for dirname, _, filenames in os.walk('/kaggle/input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/'):
    for filename in tqdm(filenames):
        src_path = os.path.join(dirname, filename)
        label = filename.split('_')[0]
        dest_path = os.path.join("/kaggle/working/asl_alphabet_test/", label, filename)
        os.makedirs('/kaggle/working/asl_alphabet_test/' + label)
        copy(src_path, dest_path)

100%|██████████| 28/28 [00:00<00:00, 173.07it/s]


In [3]:
from enum import Enum

class Alphabet(Enum):
    A = 0
    B = 1
    C = 2
    D = 3
    E = 4
    F = 5
    G = 6
    H = 7
    I = 8
    J = 9
    K = 10
    L = 11
    M = 12
    N = 13
    O = 14
    P = 15
    Q = 16
    R = 17
    S = 18
    T = 19
    U = 20
    V = 21
    W = 22
    X = 23
    Y = 24
    Z = 25
    DEL = 26
    NOTHING = 27
    SPACE = 28

In [4]:
# # CONSTANTS
# IMG_SIZE = 50

In [5]:
# import os
# import numpy as np
# from tqdm import tqdm
# import cv2
# import matplotlib.pyplot as plt

# class StaticASLRGB():
#     TOTAL = 29
#     T_TO_V_RATIO = 0.1
#     train_count = 0
#     validate_count = 0
#     t_count = 0
#     set = []
        
#     def make(self):
#         self.train_count = 0
#         for dirname, _, filenames in os.walk('/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train/'):
#             label = dirname.split('/')[-1]
#             if(label != ""):
#                 for filename in tqdm(filenames):
#                     path = os.path.join(dirname, filename)
#                     img = cv2.imread(path)
#                     self.train_data.append([np.array(img, dtype=np.uint8), np.eye(29)[Alphabet[label.upper()].value]])
#                     self.train_count += 1
#             else:
#                 pass
                    
#     def shuffle_training(self):
#         shuffled = np.random.shuffle(self.train_data)
#         print("Shuffle Completed!")
#         print(shuffled)
#         return shuffled
    
#     def print_train_set()

In [6]:
# staticASLRGB = StaticASLRGB()
# # x = staticASLRGB.make_train_data()

In [7]:
import torch
from torchvision import datasets, transforms

train_transforms = transforms.Compose([transforms.Resize(224),
                                       transforms.RandomRotation(30),
#                                        transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5, 0.5, 0.5], 
                                                            [0.5, 0.5, 0.5])])

train_dataset = datasets.ImageFolder('/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train/', transform=train_transforms)
# print(dataset)

In [8]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)

In [9]:
test_transforms = transforms.Compose([transforms.Resize(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5, 0.5, 0.5], 
                                                            [0.5, 0.5, 0.5])
                                     ])

test_dataset = datasets.ImageFolder('/kaggle/working/asl_alphabet_test/', transform=test_transforms)

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [10]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Executing on: {}".format(DEVICE))

Executing on: cuda:0


In [11]:
IMG_SIZE = None
for image, label in train_dataloader:
    IMG_SIZE = image.shape
    print(IMG_SIZE)
    break

torch.Size([64, 3, 224, 224])


In [12]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    _to_linear = None
    def __init__(self):
        super(Net, self).__init__()
        
        # define NET
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.conv2 = nn.Conv2d(64, 128, 3)
        self.conv3 = nn.Conv2d(128, 256, 3)
        self.conv4 = nn.Conv2d(256, 512, 3)
        
        x = torch.randn(1, IMG_SIZE[1], IMG_SIZE[2], IMG_SIZE[3])
        self._to_linear = None
        self.convs(x)
        
        self.fc1 = nn.Linear(self._to_linear, 29)
#         self.fc2 = nn.Linear(4048, 1024)
#         self.fc3 = nn.Linear(1024, 29)
        
        
    
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv4(x)), (2,2))
                              
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    def forward(self, x):
        x = self.convs(x)
        
        x = torch.flatten(x, 1)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
        x = F.softmax(self.fc1(x), dim=1)
        return x
    
net = Net()
net.to(DEVICE)
print(net)

Net(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=73728, out_features=29, bias=True)
)


In [13]:
import torch.optim as optim

LR = 1e-4
loss_func = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters(), lr=LR)

In [14]:
def count(outputs, labels):
    _, predicted = torch.max(outputs.data, 1)
    total = labels.size(0)
    correct = (predicted == labels).sum().item()
    return [total, correct]

def print_count(total, correct):
    print('Total   : {}'.format(total))
    print('Correct : {}'.format(correct))
    print('Accuracy:  %.3f' %(100 * correct / total))

In [15]:
def train(net,dataloader, loss_fun, optimizer, epochs=10, info=False):
    total, correct = 0, 0
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(tqdm(dataloader)):
            inputs, labels = data[0].to(DEVICE), data[1].to(DEVICE)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            c = count(outputs, labels)
            total += c[0]
            correct += c[1]
        if(info):
            print('[epoch: %d] loss: %.3f%%' %(epoch + 1, running_loss))
            print_count(total, correct)
            running_loss = 0.0
            total, correct = 0, 0

    print("Training Completed...")

In [16]:
train(net, train_dataloader, loss_func, optimizer, 60, True)

100%|██████████| 1360/1360 [07:16<00:00,  3.12it/s]


[epoch: 1] loss: 4404.215%
Total   : 87000
Correct : 16085
Accuracy:  18.489


100%|██████████| 1360/1360 [05:19<00:00,  4.26it/s]


[epoch: 2] loss: 4112.126%
Total   : 87000
Correct : 35081
Accuracy:  40.323


100%|██████████| 1360/1360 [05:22<00:00,  4.21it/s]


[epoch: 3] loss: 3967.285%
Total   : 87000
Correct : 44367
Accuracy:  50.997


100%|██████████| 1360/1360 [05:23<00:00,  4.20it/s]


[epoch: 4] loss: 3862.409%
Total   : 87000
Correct : 51062
Accuracy:  58.692


100%|██████████| 1360/1360 [05:25<00:00,  4.18it/s]


[epoch: 5] loss: 3767.457%
Total   : 87000
Correct : 57151
Accuracy:  65.691


100%|██████████| 1360/1360 [05:26<00:00,  4.16it/s]


[epoch: 6] loss: 3715.458%
Total   : 87000
Correct : 60456
Accuracy:  69.490


100%|██████████| 1360/1360 [05:30<00:00,  4.12it/s]


[epoch: 7] loss: 3658.771%
Total   : 87000
Correct : 64112
Accuracy:  73.692


100%|██████████| 1360/1360 [05:32<00:00,  4.08it/s]


[epoch: 8] loss: 3613.554%
Total   : 87000
Correct : 66989
Accuracy:  76.999


100%|██████████| 1360/1360 [05:25<00:00,  4.18it/s]


[epoch: 9] loss: 3568.734%
Total   : 87000
Correct : 69863
Accuracy:  80.302


100%|██████████| 1360/1360 [05:35<00:00,  4.05it/s]


[epoch: 10] loss: 3531.203%
Total   : 87000
Correct : 72244
Accuracy:  83.039


100%|██████████| 1360/1360 [05:30<00:00,  4.12it/s]


[epoch: 11] loss: 3503.778%
Total   : 87000
Correct : 74005
Accuracy:  85.063


100%|██████████| 1360/1360 [05:30<00:00,  4.11it/s]


[epoch: 12] loss: 3487.859%
Total   : 87000
Correct : 75003
Accuracy:  86.210


100%|██████████| 1360/1360 [05:27<00:00,  4.15it/s]


[epoch: 13] loss: 3479.414%
Total   : 87000
Correct : 75503
Accuracy:  86.785


100%|██████████| 1360/1360 [05:31<00:00,  4.10it/s]


[epoch: 14] loss: 3476.297%
Total   : 87000
Correct : 75691
Accuracy:  87.001


100%|██████████| 1360/1360 [05:31<00:00,  4.10it/s]


[epoch: 15] loss: 3472.553%
Total   : 87000
Correct : 75884
Accuracy:  87.223


100%|██████████| 1360/1360 [05:45<00:00,  3.93it/s]


[epoch: 16] loss: 3469.947%
Total   : 87000
Correct : 76033
Accuracy:  87.394


100%|██████████| 1360/1360 [05:45<00:00,  3.94it/s]


[epoch: 17] loss: 3457.270%
Total   : 87000
Correct : 76876
Accuracy:  88.363


100%|██████████| 1360/1360 [05:39<00:00,  4.00it/s]


[epoch: 18] loss: 3422.132%
Total   : 87000
Correct : 79135
Accuracy:  90.960


100%|██████████| 1360/1360 [05:35<00:00,  4.05it/s]


[epoch: 19] loss: 3404.779%
Total   : 87000
Correct : 80226
Accuracy:  92.214


100%|██████████| 1360/1360 [05:42<00:00,  3.98it/s]


[epoch: 20] loss: 3396.874%
Total   : 87000
Correct : 80746
Accuracy:  92.811


100%|██████████| 1360/1360 [05:41<00:00,  3.98it/s]


[epoch: 21] loss: 3390.214%
Total   : 87000
Correct : 81146
Accuracy:  93.271


100%|██████████| 1360/1360 [05:42<00:00,  3.97it/s]


[epoch: 22] loss: 3383.686%
Total   : 87000
Correct : 81569
Accuracy:  93.757


100%|██████████| 1360/1360 [05:45<00:00,  3.93it/s]


[epoch: 23] loss: 3380.931%
Total   : 87000
Correct : 81737
Accuracy:  93.951


100%|██████████| 1360/1360 [05:36<00:00,  4.04it/s]


[epoch: 24] loss: 3381.180%
Total   : 87000
Correct : 81724
Accuracy:  93.936


100%|██████████| 1360/1360 [05:33<00:00,  4.08it/s]


[epoch: 25] loss: 3379.409%
Total   : 87000
Correct : 81820
Accuracy:  94.046


100%|██████████| 1360/1360 [05:28<00:00,  4.14it/s]


[epoch: 26] loss: 3378.843%
Total   : 87000
Correct : 81849
Accuracy:  94.079


100%|██████████| 1360/1360 [05:28<00:00,  4.14it/s]


[epoch: 27] loss: 3378.729%
Total   : 87000
Correct : 81865
Accuracy:  94.098


100%|██████████| 1360/1360 [05:34<00:00,  4.07it/s]


[epoch: 28] loss: 3377.487%
Total   : 87000
Correct : 81940
Accuracy:  94.184


100%|██████████| 1360/1360 [05:31<00:00,  4.10it/s]


[epoch: 29] loss: 3376.448%
Total   : 87000
Correct : 82011
Accuracy:  94.266


100%|██████████| 1360/1360 [05:35<00:00,  4.05it/s]


[epoch: 30] loss: 3375.994%
Total   : 87000
Correct : 82037
Accuracy:  94.295


100%|██████████| 1360/1360 [05:52<00:00,  3.86it/s]


[epoch: 31] loss: 3371.198%
Total   : 87000
Correct : 82336
Accuracy:  94.639


100%|██████████| 1360/1360 [05:42<00:00,  3.97it/s]


[epoch: 32] loss: 3371.022%
Total   : 87000
Correct : 82322
Accuracy:  94.623


100%|██████████| 1360/1360 [05:36<00:00,  4.05it/s]


[epoch: 33] loss: 3369.390%
Total   : 87000
Correct : 82447
Accuracy:  94.767


100%|██████████| 1360/1360 [05:25<00:00,  4.18it/s]


[epoch: 34] loss: 3368.642%
Total   : 87000
Correct : 82491
Accuracy:  94.817


100%|██████████| 1360/1360 [05:39<00:00,  4.00it/s]


[epoch: 35] loss: 3367.418%
Total   : 87000
Correct : 82573
Accuracy:  94.911


100%|██████████| 1360/1360 [05:30<00:00,  4.11it/s]


[epoch: 36] loss: 3368.421%
Total   : 87000
Correct : 82504
Accuracy:  94.832


100%|██████████| 1360/1360 [05:26<00:00,  4.16it/s]


[epoch: 37] loss: 3367.435%
Total   : 87000
Correct : 82568
Accuracy:  94.906


100%|██████████| 1360/1360 [05:29<00:00,  4.13it/s]


[epoch: 38] loss: 3367.612%
Total   : 87000
Correct : 82552
Accuracy:  94.887


100%|██████████| 1360/1360 [05:34<00:00,  4.07it/s]


[epoch: 39] loss: 3365.660%
Total   : 87000
Correct : 82675
Accuracy:  95.029


100%|██████████| 1360/1360 [05:29<00:00,  4.12it/s]


[epoch: 40] loss: 3363.563%
Total   : 87000
Correct : 82816
Accuracy:  95.191


100%|██████████| 1360/1360 [05:25<00:00,  4.18it/s]


[epoch: 41] loss: 3359.929%
Total   : 87000
Correct : 83051
Accuracy:  95.461


100%|██████████| 1360/1360 [05:25<00:00,  4.17it/s]


[epoch: 42] loss: 3360.284%
Total   : 87000
Correct : 83029
Accuracy:  95.436


100%|██████████| 1360/1360 [05:26<00:00,  4.17it/s]


[epoch: 43] loss: 3359.754%
Total   : 87000
Correct : 83061
Accuracy:  95.472


100%|██████████| 1360/1360 [05:27<00:00,  4.15it/s]


[epoch: 44] loss: 3358.055%
Total   : 87000
Correct : 83160
Accuracy:  95.586


100%|██████████| 1360/1360 [05:45<00:00,  3.93it/s]


[epoch: 45] loss: 3359.388%
Total   : 87000
Correct : 83068
Accuracy:  95.480


100%|██████████| 1360/1360 [05:36<00:00,  4.04it/s]


[epoch: 46] loss: 3357.995%
Total   : 87000
Correct : 83162
Accuracy:  95.589


100%|██████████| 1360/1360 [05:38<00:00,  4.02it/s]


[epoch: 47] loss: 3356.932%
Total   : 87000
Correct : 83218
Accuracy:  95.653


100%|██████████| 1360/1360 [05:38<00:00,  4.02it/s]


[epoch: 48] loss: 3348.216%
Total   : 87000
Correct : 83794
Accuracy:  96.315


100%|██████████| 1360/1360 [05:38<00:00,  4.01it/s]


[epoch: 49] loss: 3327.983%
Total   : 87000
Correct : 85088
Accuracy:  97.802


100%|██████████| 1360/1360 [05:34<00:00,  4.07it/s]


[epoch: 50] loss: 3319.097%
Total   : 87000
Correct : 85661
Accuracy:  98.461


100%|██████████| 1360/1360 [05:25<00:00,  4.17it/s]


[epoch: 51] loss: 3315.915%
Total   : 87000
Correct : 85854
Accuracy:  98.683


100%|██████████| 1360/1360 [05:29<00:00,  4.13it/s]


[epoch: 52] loss: 3314.959%
Total   : 87000
Correct : 85919
Accuracy:  98.757


100%|██████████| 1360/1360 [05:35<00:00,  4.05it/s]


[epoch: 53] loss: 3314.218%
Total   : 87000
Correct : 85961
Accuracy:  98.806


100%|██████████| 1360/1360 [05:26<00:00,  4.17it/s]


[epoch: 54] loss: 3312.821%
Total   : 87000
Correct : 86054
Accuracy:  98.913


100%|██████████| 1360/1360 [05:27<00:00,  4.16it/s]


[epoch: 55] loss: 3313.621%
Total   : 87000
Correct : 85994
Accuracy:  98.844


100%|██████████| 1360/1360 [05:37<00:00,  4.03it/s]


[epoch: 56] loss: 3312.887%
Total   : 87000
Correct : 86046
Accuracy:  98.903


100%|██████████| 1360/1360 [05:35<00:00,  4.05it/s]


[epoch: 57] loss: 3312.713%
Total   : 87000
Correct : 86055
Accuracy:  98.914


100%|██████████| 1360/1360 [05:28<00:00,  4.14it/s]


[epoch: 58] loss: 3311.187%
Total   : 87000
Correct : 86157
Accuracy:  99.031


100%|██████████| 1360/1360 [05:29<00:00,  4.13it/s]


[epoch: 59] loss: 3312.917%
Total   : 87000
Correct : 86041
Accuracy:  98.898


100%|██████████| 1360/1360 [05:29<00:00,  4.13it/s]

[epoch: 60] loss: 3311.083%
Total   : 87000
Correct : 86168
Accuracy:  99.044
Training Completed...





In [17]:
PATH = '/kaggle/working/static_asl_rgb.pth'
torch.save(net.state_dict(), PATH)

In [18]:
def test(net, dataloader):
    correct = 0
    total = 0

    with torch.no_grad():
        for data in dataloader:
            images, labels = data[0].to(DEVICE), data[1].to(DEVICE)

            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print_count(total, correct)
    print('Better than random: {}'.format(100 * 1 / 28 < (100 * correct / total)))

In [19]:
test(net, test_dataloader)

Total   : 28
Correct : 26
Accuracy:  92.857
Better than random: True
