In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.backend import clear_session
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report, confusion_matrix

### Load Data

In [None]:
train = pd.read_csv("../input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv")
test = pd.read_csv("../input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv")

In [None]:
print(f"Train data have {train.shape[0]} records")
print(f"Test data have {test.shape[0]} records")

### Dsiplay data

In [None]:
train.head()

In [None]:
test.head()

### Check for train, test dataset for na

In [None]:
train.isna().any().sum(), test.isna().any().sum()

### Check for train data for labels

In [None]:
df_digit_counts =  train.label.value_counts().reset_index()

plt.figure(figsize=(20,8))
ax = sns.barplot(x='index', y='label', data=df_digit_counts)

for i in ax.patches:
    v1 = round((i.get_height()/len(train))*100, 2)
    ax.annotate(f'{v1}%', (i.get_x()+0.4, i.get_height()), ha='center', va='bottom',color= 'black')

plt.title("Digit Count")
plt.ylabel("Counts")
plt.xlabel("Digits")
plt.show()

### Configs

In [None]:
IMG_W = 28
IMG_H = 28
IMG_C = 1

EPOCHS = 20
BATCH_SIZE=16

CLASSES = len(train['label'].unique())
CLASSES

### Let see some images

In [None]:
rows = len(train['label'].unique())
cols = 10
fig, axs = plt.subplots(rows, cols, figsize=(36, 36))
for i,lbl in enumerate(train['label'].unique()):
    imgs = np.array(train[train['label'] == lbl].iloc[0:cols,1:])
    for j,img in enumerate(imgs):
        img = img.reshape(IMG_W,IMG_H)
        axs[i,j].matshow(img)
        axs[i,j].axis('off')
        axs[i,j].set_title(f'label - {str(lbl).upper()}', fontsize=24)
fig.tight_layout()

### Create features, labels

In [None]:
train_X, train_y = train.drop(columns=['label']), train["label"]
test_X, test_y = test.drop(columns=['label']), test["label"]

### Convert to np.array

In [None]:
train_X = np.array(train_X)
train_y = np.array(train_y)

test_X = np.array(test_X)
test_y = np.array(test_y)

### Divide features by 255 to Normalize

In [None]:
train_X = train_X / 255
test_X = test_X / 255

### Reshape features 

In [None]:
train_X = train_X.reshape(-1,28,28,1)
test_X = test_X.reshape(-1,28,28,1)

### Transform labels using LabelBinarizer

In [None]:
label_binarizer = LabelBinarizer()
train_y = label_binarizer.fit_transform(train_y)
test_y = label_binarizer.fit_transform(test_y)

In [None]:
train_y

### Create train, validation data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train_X, train_y, test_size=0.2, random_state=42, shuffle=True)

In [None]:
def create_model():
    clear_session()
    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (IMG_W,IMG_H,IMG_C)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Dense(CLASSES, activation = "softmax"))
    
    return model

In [None]:
model = create_model()
model.summary()

In [None]:
model.compile(optimizer='adamax', 
              loss = 'categorical_crossentropy', 
              metrics=['accuracy'])

lr_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
es = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

### Generate Image using ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.12,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.12,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images
datagen.fit(X_train)

### Train model

In [None]:
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=BATCH_SIZE),
                              epochs = EPOCHS, 
                              validation_data = (X_val,y_val), 
                              steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
                              callbacks=[lr_reduction, es], 
                              shuffle=True)

### Visualize performance

In [None]:
# Plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

### Check performance on test data

In [None]:
ypred = model.predict(test_X)
ypred = np.argmax(ypred, axis=1)
ytest = np.argmax(test_y, axis=1)

cf_matrix = confusion_matrix(ytest, ypred)

plt.figure(figsize=(20,8))
ax = sns.heatmap(cf_matrix, annot=True, fmt='g')
plt.show()

print("\n\n")
print(classification_report(ytest, ypred))

In [None]:
all_classes = ["Class " + str(i) for i in range(25) if i != 9]
print(classification_report(ytest, ypred, target_names = all_classes))