# Training Model for Sign Language Classification and Recognition 💡💡
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
import numpy as np

In [2]:
# lOADING DATASET
dataset = np.load('signLanguage-Dataset-Alpha.npy')

In [3]:
# Labels mapping to indices
labels = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','SPACE','BACKSPACE','NEXT']

In [4]:
# Dividing the dataset into imagearray and respective labels
X = dataset[:, :-1]
y = dataset[:, -1]
np.unique(y,return_counts = True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24]),
 array([ 15,  15,  16,  18,  18,  26,  27,  20,  35,  23,  25,  45,  53,
        113, 141,  89, 131,  89,  83, 134,  94, 151,  89, 176], dtype=int64))

In [5]:
# Normalizing data to values between 0-1
X_train = X.reshape(X.shape[0], 300, 300, 1)
X_train = X_train.astype('float32')
X_train/=255

In [6]:
# Building Model

model = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(380, activation='relu'),
    keras.layers.Dense(200, activation='relu'),
    keras.layers.Dense(29, activation='softmax')
])

model.compile(optimizer='adam', 
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])

In [7]:
# Fitting Data into Model

model.fit(X_train, y, epochs=5, batch_size=32) #32 IMAGES ARE SUPPLIED TO MODEL AT EACH TIME,EPOCHES IS HOW MANY TIMES WE HAVE TO TRAIN OUR DATA
              

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x227ddbd71f0>

In [8]:
testData = np.load('signLanguage-Dataset.npy')
np.random.shuffle(testData)

In [9]:
testData

array([[ 0,  0,  0, ...,  0,  0, 27],
       [ 0,  0,  0, ...,  0,  0, 15],
       [ 0,  0,  0, ...,  0,  0, 10],
       ...,
       [ 0,  0,  0, ...,  0,  0, 16],
       [ 0,  0,  0, ...,  0,  0, 18],
       [ 0,  0,  0, ...,  0,  0, 17]])

In [10]:
X_test = testData[:200, :-1]
y_test = testData[:200, -1]

In [11]:
print(X_test.shape)
print(y_test.shape)

(200, 90000)
(200,)


In [12]:
# Normalizing data to values between 0-1
X_test = X_test.reshape(X_test.shape[0], 300, 300, 1)
X_test = X_test.astype('float32')
X_test/=255

In [13]:
model.evaluate(X_test, y_test)



[0.7532983422279358, 0.9399999976158142]

In [14]:
predicted = model.predict(X_test)
np.argmax(predicted, axis=1)

array([24, 15, 10, 23, 15, 23, 23,  6, 24, 24, 20, 24, 14, 14,  1, 17, 15,
       18, 13, 17, 16, 24, 15, 13,  4, 23, 18, 23, 18, 18, 18, 19, 18, 18,
       18,  2, 14, 23, 13, 24, 14, 18, 15, 19,  7, 20, 18, 24, 19, 23, 11,
       24, 20, 24, 16, 12, 14, 24, 13, 17,  5, 24, 21, 21,  6, 22, 14, 12,
       20, 15,  2, 17, 20, 15, 20, 15, 17, 18, 20, 24, 24,  0, 21, 24, 14,
       21, 18, 15, 22, 13, 15, 22, 19, 24, 18, 14, 14,  3, 20, 19, 14, 24,
       18, 19, 16, 10, 16,  1, 18,  5, 24, 23, 24, 22, 24, 20, 18, 16, 18,
       22, 17, 24, 24, 22, 19, 23, 20,  8, 14, 24, 14, 13, 21, 23, 17, 21,
       18, 22, 20,  5, 16, 24, 22, 17, 11, 23, 15, 12, 24, 24, 19,  3, 18,
        1, 14, 17, 22, 15, 19,  0, 17, 19, 24, 16, 12, 16, 11, 17,  5, 24,
       14, 18, 15, 24, 15, 13, 13, 16, 17,  6, 17, 15, 15, 17, 17, 20, 20,
       22, 17, 17, 14, 18, 24, 22,  8, 15, 17, 20, 16,  2], dtype=int64)

In [15]:
y_test

array([27, 15, 10, 23, 15, 23, 23,  6, 24, 24, 20, 24, 14, 14,  1, 17, 15,
       18, 13, 17, 16, 24, 15, 13,  4, 23, 18, 23, 18, 28, 28, 19, 18, 18,
       18,  2, 14, 23, 13, 24, 14, 26, 15, 19,  7, 20, 18, 24, 19, 23, 11,
       24, 20, 24, 16, 12, 14, 24, 13, 17,  5, 24, 21, 21,  6, 22, 14, 12,
       20, 15,  2, 17, 20, 15, 20, 15, 17, 18, 20, 27, 24,  0, 21, 24, 14,
       21, 18, 15, 22, 13, 15, 22, 19, 26, 28, 14, 14,  3, 20, 19, 14, 24,
       18, 19, 16, 10, 16,  1, 18,  5, 24, 23, 24, 22, 24, 20, 18, 16, 28,
       22, 17, 24, 24, 22, 19, 23, 20,  8, 14, 24, 14, 13, 21, 23, 17, 21,
       18, 22, 20,  5, 16, 24, 22, 17, 11, 23, 15, 12, 26, 26, 19,  3, 18,
        1, 14, 17, 22, 15, 19,  0, 17, 19, 24, 16, 12, 16, 11, 17,  5, 24,
       14, 18, 15, 26, 15, 13, 13, 16, 17,  6, 17, 15, 15, 17, 17, 20, 20,
       22, 17, 17, 14, 27, 24, 22,  8, 15, 17, 20, 16,  2])

In [16]:
X_test[0:1, :, :,:].shape

(1, 300, 300, 1)

In [17]:
pred = model.predict(X_test[0:1, : , : , : ])
pred = np.argmax(pred, axis=1)
pred = labels[pred[0]]

In [18]:
ImagesCaptured = np.empty(shape=(1,300,300,1), dtype=np.uint8)

In [19]:
lapse = 0
prevPred = 'A'

In [20]:
def getPred(img):
    global lapse
    global prevPred
    if(lapse>5):
        lapse = 0 
        pred = model.predict(img)
        pred = np.argmax(pred, axis=1)
        pred = labels[pred[0]]
        prevPred = pred
        
        return pred
    else:
        lapse+=1
        return prevPred

In [21]:
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
import matplotlib.pyplot as plt
import pandas as pd

In [22]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [24]:
cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():

        ret, frame = cap.read()
        
        #BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Set flag to False
        image.flags.writeable = False
        
        # Detections
        results = hands.process(image)
        
        # Set flag to True
        image.flags.writeable = True
        
        # RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        img = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)    
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                x = [landmark.x for landmark in hand.landmark]
                y = [landmark.y for landmark in hand.landmark]

                center = np.array([np.mean(x)*image.shape[1], np.mean(y)*image.shape[0]]).astype('int32')
                start_point = (center[0]-150,center[1]-150)
                end_point = (center[0]+150,center[1]+150)
                
                width = end_point[0] - start_point[0]
                height = end_point[1] - start_point[1]

                cv2.circle(image, tuple(center), 10, (255,0,0), 1)  #for checking the center 
                cv2.rectangle(image, (center[0]-150,center[1]-150), (center[0]+150,center[1]+150), (255,0,0), 1)
               
                
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                         mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=4)
                                         )
                mp_drawing.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(255,0,0), thickness=2, circle_radius=4),
                                         mp_drawing.DrawingSpec(color=(0,255,0), thickness=2, circle_radius=4)
                                         )
                img = img[start_point[1]:end_point[1],start_point[0]:end_point[0],:]
     
    
        if(img.shape==(300,300,3)):
            global ImagesCaptured
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            gray = gray.reshape(300, 300, 1)
            gray = gray.astype('float32')
            gray/=255
            
            ImagesCaptured = np.append(ImagesCaptured , [gray] , axis=0)
            out = getPred(ImagesCaptured[len(ImagesCaptured)-1:len(ImagesCaptured), :, :, :])
            font = cv2.FONT_HERSHEY_SIMPLEX
            org = (50, 50)
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2
            image = cv2.putText(image, out, org, font, 
                   fontScale, color, thickness, cv2.LINE_AA)
#             cv2.waitKey(10)
        
        cv2.imshow('Hand Tracking', image)
        k = cv2.waitKey(10)
    
        if k == ord('q'):
            break
            
cap.release()
cv2.destroyAllWindows()