In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from tqdm import tqdm
import matplotlib.pyplot as plt
import random

In [3]:
IMAGE_SIZE = 100

In [1]:
label_mappings = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D',
    4: 'E',
    5: 'F',
    6: 'G',
    7: 'H',
    8: 'I',
#     9: 'J',
    10: 'K',
    11: 'L',
    12: 'M',
    13: 'N',
    14: 'O',
    15: 'P',
    16: 'Q',
    17: 'R',
    18: 'S',
    19: 'T',
    20: 'U',
    21: 'V',
    22: 'W',
    23: 'X',
    24: 'Y',
#     25: 'Z',
#     26: 'del',
#     27: 'space',
#     28: 'nothing'
}


In [4]:
class Net(nn.Module):
    def __init__(self, kernel_size = 5):
        super().__init__() # just run the init of parent class (nn.Module)
        self.kernel_size = kernel_size
        
        self.conv1 = nn.Conv2d(1, 32, self.kernel_size ) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64, self.kernel_size ) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.norm1 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, self.kernel_size )
        self.conv4 = nn.Conv2d(128, 256, self.kernel_size )
        self.conv5 = nn.Conv2d(256, 512, self.kernel_size )
        self.norm2 = nn.BatchNorm2d(512)
  

        x = torch.randn(IMAGE_SIZE,IMAGE_SIZE).view(-1,1,IMAGE_SIZE,IMAGE_SIZE)
        self._to_linear = None
        self.convs(x)

        
        self.dropout = nn.Dropout(0.4)
        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 256) 
        self.norm1d = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256,len(label_mappings))

    def convs(self, x):
        # average pooling over 2x2
        x = F.avg_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.avg_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = self.norm1(x)
        x = F.avg_pool2d(F.relu(self.conv3(x)), (2, 2))
        x = F.avg_pool2d(F.relu(self.conv4(x)), (2, 2))
        x = F.avg_pool2d(F.relu(self.conv5(x)), (2, 2))
        x = self.norm2(x)
        


        
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = self.dropout(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x)) # bc this is our output layer. No activation here.
        x = self.norm1d(x)
        x = self.fc3(x)
        return F.softmax(x, dim=1)
    
net = Net(kernel_size=1)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.get_device_name(0)
net.to(device)

cuda:0


Net(
  (conv1): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
  (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
  (conv5): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
  (norm2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.4, inplace=False)
  (fc1): Linear(in_features=4608, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (norm1d): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=256, out_features=24, bias=True)
)

In [6]:
training_data = np.load('mediapipe_all_data.npy',allow_pickle=True)            
np.random.shuffle(training_data)

In [7]:
torch.cuda.empty_cache()   
import gc
try: 
    del X
    del y
except NameError:
    pass
gc.collect()

115

In [8]:
torch.Tensor(training_data[0][0]).view(-1,1,IMAGE_SIZE,IMAGE_SIZE).shape


torch.Size([1, 1, 100, 100])

In [9]:
import torch.optim as optim
X = torch.Tensor([i[0] for i in training_data]).view(-1,1,IMAGE_SIZE,IMAGE_SIZE)
X= X/255.0
y = torch.Tensor([i[1] for i in training_data])

torch.save(X, 'piped_X_tensor.pt')
torch.save(y, 'piped_y_tensor.pt')
# X = torch.load('X_tensor.pt')
# y = torch.load('y_tensor.pt')
# VAL_PCT = 0.33
# val_size = int(len(X)*VAL_PCT)
# print(val_size)

In [30]:
X = X.to(device)
y = y.to(device)

RuntimeError: CUDA out of memory. Tried to allocate 4.38 GiB (GPU 0; 8.00 GiB total capacity; 5.13 GiB already allocated; 1019.62 MiB free; 5.15 GiB reserved in total by PyTorch)

In [None]:
train_X = X[:-val_size]
train_y = y[:-val_size]
test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X),len(test_X))

In [None]:
train_X[0].shape

In [None]:
test_y.shape

In [None]:
71567/128

In [None]:
sum = 0.0
for x in range(560):
    sum+=0.9
print(sum)

In [None]:
def test(x_test, y_test):
    outputs = net(x_test)
   
    acc = acc_score(outputs,y_test)
    loss = loss_function(outputs,y_test)
    return acc,loss
def acc_score(outputs, y_test):
    matches  = [torch.argmax(i)==torch.argmax(j) for i, j in zip(outputs, y_test)]
    acc = matches.count(True)/len(matches)
    return acc


In [31]:
optimizer = optim.Adam(net.parameters(),lr=0.001)
loss_function = nn.BCELoss()
BATCH_SIZE = 128
EPOCHS = 3
VAL_BATCH = 128
for epoch in range(EPOCHS):
    for i in range(0,len(train_X),BATCH_SIZE):
        batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,IMAGE_SIZE,IMAGE_SIZE)
        batch_y = train_y[i:i+BATCH_SIZE]
        
        net.zero_grad()
        outputs = net(batch_X)
        loss = loss_function(outputs ,batch_y)
        loss.backward()
        optimizer.step()
        acc = acc_score(outputs,batch_y)
    print(f'epoch:{epoch} ====> Train_loss:{round(loss.item(),4)} ====>  Train_acc:{round(acc,4)}')
    if epoch % 1 == 0:
        rand = random.randint(0,len(test_X)-VAL_BATCH)
        val_acc,val_loss = test(test_X[rand:rand+VAL_BATCH],test_y[rand:rand+VAL_BATCH])
        print(f'epoch:{epoch} ====> val_loss:{round(val_loss.item(),4) } ====>  val_acc:{round(val_acc,4)}')
            

epoch:0 ====> Train_loss:0.0294 ====>  Train_acc:0.8189
epoch:0 ====> val_loss:0.0352 ====>  val_acc:0.7734
epoch:1 ====> Train_loss:0.0185 ====>  Train_acc:0.9291
epoch:1 ====> val_loss:0.0189 ====>  val_acc:0.8984
epoch:2 ====> Train_loss:0.0114 ====>  Train_acc:0.9528
epoch:2 ====> val_loss:0.0156 ====>  val_acc:0.9375


In [36]:
#Accuracy P
correct = 0 
total = 0 
net.eval()
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1,1,IMAGE_SIZE,IMAGE_SIZE))[0]
        predicted_class = torch.argmax(net_out)
        if predicted_class == real_class:
            correct+=1
        total+=1
    print('Accuracy:',round(correct/total,3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 38771/38771 [00:29<00:00, 1311.44it/s]

Accuracy: 0.795





In [None]:
len(test_X[0:2])

In [None]:
net(test_X[0].view(-1,1,IMAGE_SIZE,IMAGE_SIZE))

In [None]:
net(test_X[0:2].view(-1,1,IMAGE_SIZE,IMAGE_SIZE))[0]

In [None]:
# Accuracy (class-wise)
from collections import Counter
outputs = []
correct_outputs = []
with torch.no_grad():
    for test_case in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[test_case])
        real_label = label_mappings[real_class.item()]
        
        model_output = net(test_X[test_case].view(-1,1, IMAGE_SIZE,IMAGE_SIZE))
        predicted_label = label_mappings[torch.argmax(model_output).item()]
        outputs.append(real_label)
        if predicted_label == real_label:
            correct_outputs.append(predicted_label)
            
class_wise_accuracies = Counter(correct_outputs)
class_counts = Counter(outputs)
average_accuracy = 0
for key, corr_otpt_cnt in class_wise_accuracies.items():
    class_accuracy = corr_otpt_cnt
    class_count = class_counts[key]
    average_acc = class_accuracy / class_count
    average_accuracy+=average_acc
print(f"Class wise accuracy : {round((average_accuracy / len(label_mappings)),3)}")

In [None]:
with torch.no_grad():
    preds = [net(i.view(-1,1,IMAGE_SIZE,IMAGE_SIZE).to(device)).cpu().detach().numpy() for i in test_X]

In [None]:
y_test = test_y.cpu().detach().numpy()

In [None]:
np.argmax(preds[0])

In [None]:
np.argmax(y_test[0])

In [None]:
from sklearn.metrics import f1_score,accuracy_score
f1  = f1_score(np.argmax(preds),np.argmax(y_test))
# acc  = accuracy_score(preds[:100],y_test[:100])
    
print(f'F1 Score for this model : {f1}')
# print(f'accuracy Score for this model : {acc}')


In [None]:
# class_wise_accuracies

In [None]:
# class_counts.most_common()

In [None]:
torch.save(net.state_dict(),'fully_connected_all_data_test.pt')

In [None]:
import cv2
import numpy as np
import random 
index_to_test = random.randint(0,24)
label_to_show = test_y[index_to_test]
img_to_show = test_X[index_to_test][0]
img_to_show_cpu = torch.Tensor.cpu(img_to_show)

inference = torch.argmax(net(img_to_show.view(-1,1,IMAGE_SIZE,IMAGE_SIZE)))


print(f'label_to_show: {label_mappings[torch.argmax(label_to_show).item()]}')
print(f'inference: {inference}')
print(f'inference_label: {label_mappings[inference.item()]}')
cv2.imshow("re",np.array(img_to_show_cpu))
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
import torch

x = torch.arange(10)

In [None]:
x

In [None]:
x.view(-1,1)