# Intro
Welcome to the [Sign Language MINST](https://www.kaggle.com/datamunge/sign-language-mnist) dataset
![](https://storage.googleapis.com/kaggle-datasets-images/3258/5337/0dade1b95b22cceac471b309fc8a8f63/dataset-cover.png)
<span style="color: royalblue;">Please vote the notebook up if it helps you. Thank you. </span>

# Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

# Path

In [None]:
path = '/kaggle/input/sign-language-mnist/'
os.listdir(path)

# Load Data

In [None]:
train_data = pd.read_csv(path+'sign_mnist_train.csv')
test_data = pd.read_csv(path+'sign_mnist_test.csv')

# Overview

In [None]:
print('Number samples train:', len(train_data))
print('Number samples test:', len(test_data))

In [None]:
train_data.head()

In [None]:
test_data.head()

# Exploratory Data Analysis

In [None]:
labels = list(train_data['label'].value_counts().keys().sort_values())

In [None]:
print('Number of image pixel:', len(train_data.columns[1:]))
print('Quadratic image size:', int(np.sqrt(len(train_data.columns[1:]))))
print('Number of labels:', len(labels))

Distribution of the labels in the train and test set:

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    
axs[0].bar(labels, 100*train_data['label'].value_counts().sort_index().values/len(train_data), color='yellowgreen')
axs[1].bar(labels, 100*test_data['label'].value_counts().sort_index().values/len(test_data), color='sandybrown')
axs[0].grid()
axs[1].grid()
axs[0].set_title('Train labels')
axs[1].set_title('Test labels')
axs[0].set_ylabel('%')
plt.show()

# Prepare Data

In [None]:
X_train = train_data[train_data.columns[1:]]
X_test = test_data[test_data.columns[1:]]

X_train = X_train.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)

y_train = train_data['label']
y_test = test_data['label']

y_train = to_categorical(y_train, num_classes = 25)
y_test = to_categorical(y_test, num_classes = 25)

In [None]:
fig, axs = plt.subplots(4, 6, figsize=(15, 10))
fig.subplots_adjust(hspace = 0.2, wspace=.1)
axs = axs.ravel()
for i in range(24):
    idx = train_data[train_data['label']==labels[i]].index[0]
    axs[i].imshow(X_train[idx][:,:,0], cmap='gray')
    axs[i].set_title(y_train[idx].argmax())
    axs[i].set_xticklabels([])
    axs[i].set_yticklabels([])

Scale Data

In [None]:
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

# Define Model

In [None]:
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(25, activation = "softmax"))

In [None]:
optimizer = RMSprop(lr=0.001,rho=0.9, epsilon=1e-08, decay=0.0)

In [None]:
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 5
batch_size = 64

# Define Image Data Generator

In [None]:
datagen = ImageDataGenerator(featurewise_center=False,
                             samplewise_center=False,
                             featurewise_std_normalization=False,
                             samplewise_std_normalization=False,
                             zca_whitening=False,
                             rotation_range=10,
                             zoom_range = 0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             horizontal_flip=False,
                             vertical_flip=False)
datagen.fit(X_train)

# Train Model

In [None]:
history = model.fit(datagen.flow(X_train, y_train,
                                 batch_size=batch_size),
                                 epochs = epochs,
                                 validation_data = (X_test, y_test),
                                 )

# Analyse Training

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(20, 6))
fig.subplots_adjust(hspace = .2, wspace=.2)
axs = axs.ravel()
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
axs[0].plot(epochs, loss, 'bo', label='loss_train')
axs[0].plot(epochs, loss_val, 'ro', label='loss_val')
axs[0].set_title('Value of the loss function')
axs[0].set_xlabel('epochs')
axs[0].set_ylabel('value of the loss function')
axs[0].legend()
axs[0].grid()
acc = history.history['accuracy']
acc_val = history.history['val_accuracy']
axs[1].plot(epochs, acc, 'bo', label='accuracy_train')
axs[1].plot(epochs, acc_val, 'ro', label='accuracy_val')
axs[1].set_title('Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Value of accuracy')
axs[1].legend()
axs[1].grid()
plt.show()

# Analyse Wrong Predictions

In [None]:
y_pred = model.predict(X_test)

In [None]:
conf_mat = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

fig, ax = plot_confusion_matrix(conf_mat=conf_mat,
                                show_normed=False,
                                show_absolute=True,
                                figsize=(12, 12))
fig.show()