# Custom Datasets

https://www.kaggle.com/datamunge/sign-language-mnist

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

%matplotlib inline

import torch
from torch.autograd import Variable
from __future__ import print_function
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [None]:
signs = {'0': 'A', '1': 'B', '2': 'C', '3': 'D', '4': 'E', '5': 'F', 
         '6': 'G', '7': 'H', '8': 'I', '10': 'K', '11': 'L', '12': 'M', 
         '13': 'N', '14': 'O', '15': 'P', '16': 'Q', '17': 'R', '18': 'S', 
         '19': 'T', '20': 'U', '21': 'V', '22': 'W', '23': 'X', '24': 'Y' }

In [None]:
signs_lang_dataset = pd.read_csv('dataset/sign_mnist_train/sign_mnist_train.csv')

In [None]:
class SignsLanguageDataset(Dataset):
    
    def __init__(self, train = True):
        
        self.train = train
        
        if self.train == True:
            self.signs_lang_dataset = pd.read_csv('dataset/sign_mnist_train/sign_mnist_train.csv')
        else:
            self.signs_lang_dataset = pd.read_csv('dataset/sign_mnist_test/sign_mnist_test.csv')
            
        self.X_set = self.signs_lang_dataset.iloc[:, 1:].values
        self.y_set = self.signs_lang_dataset.iloc[:, 0].values
        
        self.X_set = np.reshape(self.X_set, (self.X_set.shape[0], 1, 28, 28)) / 255
        self.y_set = np.array(self.y_set)
            
        
    def __getitem__(self, index):
        
        image = self.X_set[index, :, :]
        
        label = self.y_set[index]
        
        sample = {'image_sign': image, 'label': label}
        
        return sample
        
    def __len__(self):
        return self.X_set.__len__()

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 40, kernel_size = 5)
        self.conv2 = nn.Conv2d(40, 20, kernel_size = 5)
        
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0)
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0)
        
        self.batch_norm1 = nn.BatchNorm2d(40)
        self.batch_norm2 = nn.BatchNorm2d(20)
        
        self.fc1 = nn.Linear(320, 100)
        self.fc2 = nn.Linear(100, 25)
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), -1)
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        x = F.log_softmax(x, dim=1)
        
        return x

In [None]:
def train(model, optimizer, epoch, device, train_loader, log_interval):
    model.train()
    for batch_idx, data in enumerate(train_loader):
        
        img = data['image_sign']
        img = img.type(torch.FloatTensor).to(device)
        target = data['label']
        target = target.type(torch.LongTensor).to(device)
        
        
        optimizer.zero_grad()
        
        output = model(img)

        loss = F.nll_loss(output, target)
        
        loss.backward()
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(img), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader):
            
            img = data['image_sign']
            img = img.type(torch.FloatTensor).to(device)
            target = data['label']
            target = target.type(torch.LongTensor).to(device)
            
            output = model(img)
            test_loss += F.nll_loss(output, target).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
batch_size_train = 5
batch_size_test = 4

dataset_train = SignsLanguageDataset(train = True)
dataset_test = SignsLanguageDataset(train = False)
train_loader = DataLoader(dataset = dataset_train, batch_size = batch_size_train)
test_loader = DataLoader(dataset = dataset_test, batch_size = batch_size_test)


In [None]:
torch.manual_seed(123)

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")

learning_rate = 0.001
num_epochs = 7
model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.2, weight_decay = 0.002)

log_interval = 27455


In [None]:
for epoch in range(1, num_epochs + 1):
    train(model, optimizer, epoch, device, train_loader, log_interval)
    test(model, device, test_loader)

#### Save model

In [None]:
torch.save(model, 'model_trained.pt')

# Predicción en real time

In [None]:
import numpy as np
import cv2
import torch

In [None]:
cap = cv2.VideoCapture(0)

cap.set(3, 700)
cap.set(4, 480)

In [None]:
modelo = torch.load('model_trained.pt')
modelo.eval()

signs = {'0': 'A', '1': 'B', '2': 'C', '3': 'D', '4': 'E', '5': 'F', '6': 'G', '7': 'H', '8': 'I',
        '10': 'K', '11': 'L', '12': 'M', '13': 'N', '14': 'O', '15': 'P', '16': 'Q', '17': 'R',
        '18': 'S', '19': 'T', '20': 'U', '21': 'V', '22': 'W', '23': 'X', '24': 'Y' }

while True:
    ret, frame = cap.read()

    # Lugar de la imagen donde se toma la muestra
    img = frame[20:250, 20:250]

    res = cv2.resize(img, dsize=(28, 28), interpolation = cv2.INTER_CUBIC)
    res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)

    res1 = np.reshape(res, (1, 1, 28, 28)) / 255
    res1 = torch.from_numpy(res1)
    res1 = res1.type(torch.FloatTensor)

    out = modelo(res1)
    # Probabilidades
    probs, label = torch.topk(out, 25)
    probs = torch.nn.functional.softmax(probs, 1)

    pred = out.max(1, keepdim=True)[1]

    if float(probs[0,0]) < 0.5:
        texto_mostrar = 'Signo no detectado'
    else:
        texto_mostrar = signs[str(int(pred))] + ': ' + '{:.2f}'.format(float(probs[0,0])) + '%'

    font = cv2.FONT_HERSHEY_SIMPLEX
    frame = cv2.putText(frame, texto_mostrar, (60,285), font, 1, (255,0,0), 2, cv2.LINE_AA)

    frame = cv2.rectangle(frame, (20, 20), (250, 250), (0, 255, 0), 3)

    cv2.imshow('Cam', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()