In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, Activation, Dropout
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam, SGD, RMSprop
from keras.activations import relu, selu, elu
from keras.callbacks import ModelCheckpoint
plt.style.use('ggplot')
plt.rcParams['font.size'] = 12
plt.rcParams['lines.color'] ='#000000'
np.random.seed(1234567)

In [None]:
train_path = '../input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv'
test_path = '../input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv'

In [None]:
# read data from CSV file
train = pd.read_csv(train_path)
train.head()

In [None]:
# high level summary
train.describe()

In [None]:
# empty(null) value inspection
# inner sum, sums over rows. outer sum, sums over columns
print(f"total empty values: {train.isna().sum().sum()}")

In [None]:
X = train.drop('label', axis=1) # features
y = to_categorical(train['label']) # one-hot encoded target

In [None]:
# utility function to reshape square black and white image
# assumes that image has equal width and height in pixels and image is a black and white image
def reshape_bw(X):
    samples, width, channel = X.shape[0], int(X.shape[1] ** (1/2)) ,1
    return X.values.reshape(samples, width, width, channel)  

In [None]:
X = reshape_bw(X)
width, height, channel = X.shape[1], X.shape[2], X.shape[3]

In [None]:
# helper function to help plotting image
def plot_image(X):
    _ = plt.imshow(X, cmap='gray')
    plt.axis('off')
    plt.show()
    
plot_image(X[0]) # plot sample image

In [None]:
# split data into train and validation set with same target distribution as before splitiing
X_train, X_val, y_train, y_val = train_test_split(X,y, stratify=y, test_size=0.15)

In [None]:
# create data preprocessor and fit on train set
generator = ImageDataGenerator(
    featurewise_center=True, 
    featurewise_std_normalization=True, 
)
generator.fit(X_train)

In [None]:
# initialize model
model = Sequential()
model.add(Conv2D(32, (5, 5), padding="same", input_shape=(width, height, channel)))
model.add(BatchNormalization())
model.add(Activation(relu))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (5, 5), padding="same"))
model.add(BatchNormalization())
model.add(Activation(relu))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (5, 5),padding="same"))
model.add(BatchNormalization())
model.add(Activation(relu))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation(relu))
model.add(Dropout(0.25))
model.add(Dense(y.shape[1], activation='softmax'))

In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# initialize callback to save best model so far
cb = [ModelCheckpoint('model.h5', monitor='val_loss', mode='min',save_best_only=True)]

In [None]:
# train model
history = model.fit_generator(generator.flow(X_train, y_train, batch_size=32), \
                              epochs=25, \
                              validation_data=generator.flow(X_val,y_val,batch_size=32), \
                              callbacks=cb)

In [None]:
# observe model performance during training
_ = pd.DataFrame(history.history).plot(figsize=(8,8))

In [None]:
# evaluate model on unseen data
test = pd.read_csv(test_path)
test.head()

In [None]:
X_test = test.drop('label', axis=1)
y_test = test['label']
X_test = reshape_bw(X_test)
y_test = to_categorical(y_test)
X_test.shape

In [None]:
print(f"label: {np.argmax(model.predict(np.expand_dims([X_test[0]], axis=-1), verbose=0))}")
plot_image(X_test[0])

In [None]:
loss, acc = model.evaluate(generator.flow(X_test,y_test,batch_size=32))
print()
print(f"model loss: {loss}")
print(f"model accuracy:{round(acc*100,2)} %")