# Identifying Sign Language Digits with CNN

We'll build a ConvNet that can differentiate between 6 sign language digits, going from 0 to 5. 

## Packages

In [None]:
import math
import scipy
import h5py

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as tfl
import matplotlib.pyplot as plt

from matplotlib.pyplot import imread
from tensorflow.python.framework import ops
from PIL import Image

from cnn_utils import *

%matplotlib inline

<a name='1'></a>
## 1. Dataset

<a name='1.1'></a>
### 1.1 - Loading the dataset

In [None]:
# Loading the data (signs)
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_signs_dataset()

<center><img src="images/SIGNS.png" width="60%" lenght="60%"></center>

In [None]:
# Example of an image from the dataset
index = 0
plt.imshow(X_train_orig[index])
plt.title("y = " + str(np.squeeze(Y_train_orig[:, index])))
plt.show()

<a name='1-2'></a>
### 1.2 - Data treatment

In [None]:
# Normalizing the input data
X_train = X_train_orig/255.
X_test = X_test_orig/255.

# Converting Y to its one-hot representation
Y_train = convert_to_one_hot(Y_train_orig, 6).T
Y_test = convert_to_one_hot(Y_test_orig, 6).T

print("number of training examples = " + str(X_train.shape[0]))
print("number of test examples = " + str(X_test.shape[0]))
print()
print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(Y_train.shape))
print("X_test shape: " + str(X_test.shape))
print("Y_test shape: " + str(Y_test.shape))

<a name='2'></a>
## 2 - Building the model

<a name='2.1'></a>
### 2.1 - Model architecture

We'll implement the following architecture to build our model:

`Conv2D -> ReLU -> MaxPool -> Conv2D -> ReLU -> MaxPool -> Flatten -> Dense`

In [None]:
def convolutional_model(input_shape, parameters):
    """
    Implements the forward propagation for the model.
    
    Arguments:
    input_img -- input dataset, of shape 'input_shape'
    parameters -- dict containing cnn layers parameters

    Returns:
    model -- TF Keras model
    """
    
    # Retrieving paramenters
    n1, n2 = parameters['n1'], parameters['n2']
    f1, f2 = parameters['f1'], parameters['f2']
    mp_f1, mp_f2 = parameters['mp_f1'], parameters['mp_f2']
    mp_s1, mp_s2 = parameters['mp_s1'], parameters['mp_s2']
    n = parameters['n_s']
    
    # Input
    input_img = tf.keras.Input(shape=input_shape)
    
    # First block
    Z1 = tfl.Conv2D(n1, f1, padding='same')(input_img)
    A1 = tfl.ReLU()(Z1)
    P1 = tfl.MaxPooling2D(pool_size=(mp_f1,mp_f1), strides=mp_s1, padding='same')(A1)
    
    # Second block
    Z2 = tfl.Conv2D(n2, f2, padding='same')(P1)
    A2 = tfl.ReLU()(Z2)
    P2 = tfl.MaxPooling2D(pool_size=(mp_f2,mp_f2), strides=mp_s2, padding='same')(A2)
    
    # Output
    F = tfl.Flatten()(P2)
    outputs = tfl.Dense(n, activation='softmax')(F)

    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    
    return model

<a name='2-2'></a>
### 2.2 - Training the Model

In [None]:
# Slicing the dataset in minibatches
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train)).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test)).batch(64)

In [None]:
# Selecting parameters
parameters = {}

# block 1
parameters['n1'] = 8
parameters['f1'] = 4
parameters['mp_f1'] = 8 
parameters['mp_s1'] = 8

# block 2
parameters['n2'] = 16
parameters['f2'] = 2
parameters['mp_f2'] = 4
parameters['mp_s2'] = 4

# output 
parameters['n_s'] = Y_train.shape[1]

model = convolutional_model(X_train.shape[1:], parameters)

In [None]:
# Compiling the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# fittig the model 
history = model.fit(train_dataset, epochs=500, validation_data=test_dataset)

In [None]:
# Checking history
df_loss_acc = pd.DataFrame(history.history)

df_loss = df_loss_acc[['loss','val_loss']].copy()
df_loss.rename(columns = {'loss':'train', 'val_loss':'validation'}, inplace=True)

df_acc = df_loss_acc[['accuracy','val_accuracy']].copy()
df_acc.rename(columns = {'accuracy':'train', 'val_accuracy':'validation'}, inplace=True)

# Plotting loss
plt.figure(figsize=(5,4))
plt.plot(df_loss)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('')

# Plotting accuracy
plt.figure(figsize=(5,4))
plt.plot(df_acc)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title('')

plt.show()

<a name='3'></a>
## 3 - Results

Sign languages digits with red legends indicate that the model wasn't able to corretly predict the output.

In [None]:
n_rows = 4
n_cols = 5

# Selecting random instances from X_test
random = list(np.random.randint(0, high=X_test.shape[0] + 1, size=n_rows*n_cols, dtype=int))
test = X_test[random,:,:,:]

# Predicting the output from above selection
pred = model.predict(test)
pred = np.argmax(pred, axis=-1)

# Comparing the results
fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(4*n_cols, 4*n_rows))
ax = ax.reshape((n_rows*n_cols,))

for i in range(n_rows*n_cols):
    ax[i].imshow(test[i,:,:,:])
    
    if int(np.squeeze(Y_test_orig[0, random[i]])) == pred[i]:
        ax[i].set_xlabel(pred[i], fontsize=14, color='k')
    
    else:
        ax[i].set_xlabel(pred[i], fontsize=14, color='r')
        
    ax[i].grid(False)
    ax[i].set_xticks([])
    ax[i].set_yticks([])
    
# plt.subplots_adjust(wspace = 0.5)
plt.show()