In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib import pyplot
import string
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPooling2D,Dropout
from sklearn.metrics import classification_report,confusion_matrix

import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
filepath_train = '/kaggle/input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv'
filepath_test = '/kaggle/input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv'

def import_data(filepath):
    
    X = pd.read_csv(filepath).drop('label',axis= 1)
    y = pd.read_csv(filepath).loc[:,'label']
    
    return (np.array(X), np.array(y))

X_train,y_train = import_data(filepath_train)
X_test,y_test = import_data(filepath_test)


#we want the labels ordered from 0 to 23
for i in range(10,25,1):
  y_train[y_train == i] = i-1
  y_test[y_test == i] = i-1


In [None]:
print('Shape of X_train : {} , Shape of X_test : {}' .format(X_train.shape,X_test.shape))
print('Shape of y_train : {} , Shape of y_test : {}' .format(y_train.shape,y_test.shape))

In [None]:
#dictionnaire key = label and value = letters
num_letters = {0 : 'A' , 1 : 'B', 2 : 'C' , 3 : 'D', 4: 'E' , 5 : 'F',
                     6 : 'G' , 7 : 'H' , 8 : 'I' , 9 : 'K' , 
                     10 : 'L' , 11 :'M' , 12 : 'N' , 13 : 'O' , 14 : 'P',
                     15 : 'Q' , 16 : 'R' , 17 : 'S' , 18 :'T' , 19 : 'U',
                     20 : 'V' , 21 :'W' , 22 : 'X', 23 : 'Y'}

plt.figure(figsize = (10,10))

for i in range(9):
    
    ind = random.randint(0,X_train.shape[0])
    
    plt.subplot(3,3,i+1)
    plt.imshow(X_train[ind].reshape(28,28),cmap = 'gray')
    
    plt.title(num_letters[y_train[ind]],fontdict={'color' : 'Black', 'fontsize': 20})
    
plt.show()

In [None]:
# we reshape the train and test data set :
X_train = X_train.reshape(X_train.shape[0],28,28,1)
X_test = X_test.reshape(X_test.shape[0],28,28,1)

In [None]:
plt.figure(figsize = (30,15))

plt.xticks(size=30)
sns.countplot(y_train,linewidth = 3,edgecolor=sns.color_palette("dark"))
plt.title('Repartition of label in the train dataset', fontdict={'color' : 'Black' , 'fontsize' : 30})

plt.show()

In [None]:
# we are going to use data generator to train the CNN
train_datagen = ImageDataGenerator(rescale=1./255 ,
                                   rotation_range = 40,
                                   horizontal_flip=True,
                                   zoom_range=0.2,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   fill_mode = 'nearest')
X_test = X_test/255


# we transform y_train and y_test into vector with 0 at all position and 1 when the label is 

y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

In [None]:
model = Sequential()

model.add(Conv2D(filters = 256,kernel_size = (5,5),padding = 'same',activation = 'relu',input_shape = (28,28,1)))
model.add(MaxPooling2D())
model.add(Conv2D(filters = 128,kernel_size = (5,5),padding = 'same',activation = 'relu'))
model.add(MaxPooling2D())
model.add(Conv2D(filters = 64,kernel_size = (5,5),padding = 'same',activation = 'relu'))
model.add(MaxPooling2D())
model.add(Conv2D(filters = 32,kernel_size = (5,5),padding = 'same',activation = 'relu'))


model.add(Flatten())

model.add(Dense(units = 256,activation = 'relu'))
model.add(Dense(units = 128,activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(24,activation = 'softmax'))

model.summary()

In [None]:
model.compile(optimizer='adam',metrics = ['accuracy'],loss = 'categorical_crossentropy')
model.fit(train_datagen.flow(X_train,y_train,batch_size = 300),validation_data=(X_test,y_test),epochs = 50)

In [None]:
y_pred = model.predict(X_test)

y_pred2 = np.array([np.argmax(y_pred[i]) for i in range(len(y_pred))])
y_test2 = np.array([np.argmax(y_test[i]) for i in range(len(y_test))])

In [None]:
plt.figure(figsize = (15,15))

sns.heatmap(confusion_matrix(y_test2,y_pred2),annot= True,cbar = False , cmap = 'Blues',annot_kws={'size' : 8})
plt.xlabel('Predicted',fontdict={'size': 20})
plt.ylabel('True' ,fontdict={'size': 20})
plt.title('Confusion Matrix',fontdict={'size': 20,'color':'Blue'})
plt.show()

In [None]:
plt.figure(figsize = (15,15))
for i in range(10):
  ind = np.random.randint(0,len(X_test))
  plt.subplot(5,2,i+1)
  plt.imshow(X_test[ind].reshape(28,28),cmap = 'gray')
  plt.title('True : {} , Predicted : {} with {} % of probabilities '.format(num_letters[y_test2[ind]],
                                                                            num_letters[y_pred2[ind]],
                                                                            round(y_pred[ind][y_pred2[ind]]*100,2)
                                                                            ))
plt.show()