In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [None]:
train = pd.read_csv('/kaggle/input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv')
test  = pd.read_csv('/kaggle/input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv')

In [None]:
train.head()

## Visualizing training dataset

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(train['label'])

* Number of classes are pretty equal.
* Now we can easily Seprate dependent variable from independent variables.

In [None]:
y_train = train['label']
y_test  = test['label']
del train['label']
del test['label']

## Encoding
* Labels to OneHot.
* Images to values.

In [None]:
lB = LabelBinarizer()
y_train = lB.fit_transform(y_train)
y_test  = lB.fit_transform(y_test)

In [None]:
X_train = train.values
X_test  = test.values

## Normalizing
As we know our machine knows values 0 & 1 so we have to convert our values b/w 0 & 1.


In [None]:
X_train = X_train / 255
X_test  = X_test / 255

## Reshaping 
Neural network entertains values in 3D like(pixel, pixle, color_channel).

In [None]:
X_train = X_train.reshape(-1, 28, 28, 1)
X_test  = X_test.reshape(-1, 28, 28, 1)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

## Spliting data for Validation
* It is always good to have validation data so that our model wont see test data.

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2)

In [None]:
X_train.shape, X_test.shape, X_valid.shape, y_train.shape, y_test.shape, y_valid.shape

## Visualizing Few images from training set


In [None]:
f, ax = plt.subplots(2,5)
f.set_size_inches(10,10)
k = 0
for i in range(2):
    for j in range(5):
        ax[i,j].imshow(X_train[k].reshape(28,28), cmap='gray')
        k += 1
    plt.tight_layout() 

 # ** ############# Data Augmentation ################**
* with data augmentation we can save us from overfitting.

In [None]:
dataGen = ImageDataGenerator(rotation_range=10,
                             zoom_range=0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1)

dataGen.fit(X_train)

## Model Creation

In [None]:
model = Sequential()
model.add(Conv2D(75 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (28,28,1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(50 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(25 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))

model.add(Flatten())
model.add(Dense(units = 512 , activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(units = 24 , activation = 'softmax'))


model.summary()

In [None]:
model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
history = model.fit(dataGen.flow(X_train,y_train, batch_size = 128) ,epochs = 5 , 
                    validation_data = (X_valid, y_valid))

In [None]:
print("Test accuracy of the model:- " , model.evaluate(X_test,y_test)[1]*100 , "%")

## Plotting Loss/Accuracy Graph

In [None]:
plt.figure(1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['training','validation'])
plt.title('Loss')
plt.xlabel('epoch')
plt.figure(2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['training','validation'])
plt.title('Accuracy')
plt.xlabel('epoch')
plt.show()

## Model Testing

In [None]:
className = {0:'A', 1:'B', 2:'C', 3:'D', 4:'E', 5:'F', 6:'G',
             7:'H', 8:'I', 9:'K', 10:'L', 11:'M', 12:'N',
             13:'O', 14:'P', 15:'Q', 16:'R', 17:'S', 18:'T', 19:'U',
             20:'V', 21:'W', 22:'X', 23:'Y'}

In [None]:
predictions = model.predict_classes(X_test)

## Visualizing few prediction with therir actual label

In [None]:
plt.figure(figsize=(10,10))
for i in range(15):
    plt.subplot(3,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_test[i].reshape(28,28), cmap='gray')
    plt.title(className[np.argmax(y_test[i])])
    plt.xlabel(className[predictions[i]])
plt.show()