# 1. Introduction

The purpose of this notebook is to create a simple Convolutionnal Neural Network using Tensorflow in order to recognize hand gestures for sign language. We shall then try to use it with the camera using OpenCV.

## 1.1. Imports

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D, Flatten, Input
import cv2
import matplotlib.pyplot as plt
import random as rd

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## 1.2. Dataset

We import the train dataset and show a random image and its label. 

In [None]:
df_train = pd.read_csv("/kaggle/input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv")
df_train

In [None]:
alphabet=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
n = rd.randrange(df_train.shape[0])
ar = np.array(df_train.loc[n][1:]).reshape((28,28))
plt.imshow(ar, cmap='gray')
plt.title(alphabet[df_train.loc[n][0]])
plt.show()

# 2. Data Preprocessing

First we separate our labels from our data. We then have to normalize the data of all images, and create a simple one hot encoding of the labels.

In [None]:
y = df_train["label"]
X = df_train.drop(['label'], axis=1)

X = np.array(X)/255
y = np.array(y)

Y = np.zeros((len(alphabet),df_train.shape[0]))
for i in range(len(y)):
  Y[y[i],i] = 1
X = X.reshape((-1, 28,28,1))
Y = Y.reshape((26,-1))

# 3. Convolutionnal Neural Network

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Convolution2D(32, (3, 3), activation='relu', input_shape=(28,28,1),padding='same'))
model.add(tf.keras.layers.Convolution2D(32, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=None,padding='same'))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=None,padding='same'))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(556, activation='relu'))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(26, activation='softmax'))

model.summary()

model.compile(loss="sparse_categorical_crossentropy",optimizer='adam',metrics=["accuracy"])

In [None]:
history = model.fit(X,y,batch_size=64,epochs=3, validation_split=0.2) #training

The predictions are remarkably good ever since the first epoch.

# 4. Validation

I decided to use the test dataset as validation.

In [None]:
df_valid = pd.read_csv("/kaggle/input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv")

#preprocessing
n = rd.randrange(df_valid.shape[0])
y = df_valid["label"]
X = df_valid.drop(['label'], axis=1)

ar = np.array(df_valid.loc[n][1:]).reshape((28,28))

X = np.array(X)/255
y = np.array(y)

Y = np.zeros((26,df_valid.shape[0]))
for i in range(len(y)):
  Y[y[i],i] = 1
X = X.reshape((-1, 28,28,1))
Y = Y.reshape((26,-1))

plt.imshow(ar, cmap='gray')
plt.title(f"Prediction :  {alphabet[ np.argmax(model.predict(X[n].reshape(1,28,28,1)))]} | had to predict {alphabet[df_valid.loc[n][0]]}")
plt.show()

# 5. Using Webcam with OpenCV 

*Warning : Doesn't work online, works on user machine*

In [None]:
cap = cv2.VideoCapture(0)
while(True):
    ret, frame = cap.read()
    cv2.rectangle(frame, (100, 100), (300, 300), (0, 255, 0), 0)
    roi = frame[100:300, 100:300]
    f = cv2.resize(roi, (28, 28))
    gray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY)
    cv2.imshow('frame',frame)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        print("____")
        model.predict(gray.reshape(1,28,28,1))
        print(alphabet[np.argmax(model.predict(gray.reshape(1,28,28,1)))])
        
cap.release()
cv2.destroyAllWindows()

Using the webcam shows a problem, the use of a solid gray background in the dataset makes it difficult for the CNN to generalize. A way to solve this issue would be to extract the hand from the background and having a "binary image" using OpenCV.  *TO BE CONTINUED...*