In [1]:

import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:

# Load sign language MNIST training data
slmnist = pd.read_csv('data/sign_mnist_train.csv')
slmnist.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,3,107,118,127,134,139,143,146,150,153,...,207,207,207,207,206,206,206,204,203,202
1,6,155,157,156,156,156,157,156,158,158,...,69,149,128,87,94,163,175,103,135,149
2,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
3,2,211,211,212,212,211,210,211,210,210,...,235,234,233,231,230,226,225,222,229,163
4,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179


In [3]:
slmnist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27455 entries, 0 to 27454
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 164.4 MB


In [4]:

# Get number of target classes
num_classes = slmnist['label'].unique().shape[0]

In [5]:

# Check class representation in training data
slmnist['label'].value_counts() # label=9 is missing

17    1294
16    1279
11    1241
22    1225
5     1204
18    1199
3     1196
14    1196
19    1186
23    1164
8     1162
20    1161
13    1151
2     1144
0     1126
24    1118
10    1114
6     1090
15    1088
21    1082
12    1055
7     1013
1     1010
4      957
Name: label, dtype: int64

In [6]:

y_train = slmnist['label'].to_numpy()
X_train = slmnist.drop('label', axis=1).to_numpy()

In [7]:

# Callback for early stopping monitor
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [8]:

# Define Sequential model
model = tf.keras.Sequential()

In [9]:

# Define a hidden layer and output layer
model.add(tf.keras.layers.Dense(20, activation='relu', input_shape=(X_train.shape[1],)))
model.add(tf.keras.layers.Dense(num_classes+1, activation='softmax')) # peculiarity of using SCE as loss

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                15700     
_________________________________________________________________
dense_1 (Dense)              (None, 25)                525       
Total params: 16,225
Trainable params: 16,225
Non-trainable params: 0
_________________________________________________________________


In [10]:

# Compile model with optimizer, loss function and evalution metric
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01), 
              loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [11]:

# Train model and validate on 10% split
model.fit(X_train, y_train, 
          epochs=50,
         validation_split=0.5,
         callbacks=[early_stopping_callback])

Train on 13727 samples, validate on 13728 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


<tensorflow.python.keras.callbacks.History at 0x7fc0f8fb5090>

In [12]:

# Load test data
slmnist = pd.read_csv('data/sign_mnist_test.csv')
slmnist.head()


y_test = slmnist['label'].to_numpy()
X_test = slmnist.drop('label', axis=1).to_numpy()

In [13]:

# Evaluate model on test data
model.evaluate(X_test, y_test)



[3.20232663604047, 0.034300055]