# Import Packages

In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
os.makedirs('model', exist_ok=True)
os.makedirs('output', exist_ok=True)

# Import Data

In [3]:
train_data = np.genfromtxt('data/MNIST/train.csv', delimiter=',', skip_header=1)
test_data = np.genfromtxt('data/MNIST/test.csv', delimiter=',', skip_header=1)

In [4]:
(train_data.shape, test_data.shape)

((42000, 785), (28000, 784))

In [5]:
X_train, y_train =  train_data[:,1:].copy(), train_data[:,0].copy() # features, labels

In [6]:
X_test = test_data.copy()

In [7]:
(X_train.shape, y_train.shape, X_test.shape)

((42000, 784), (42000,), (28000, 784))

# Simple Pre-Process Data

In [8]:
# Scale and convert the train images and add channels
X_train /= 255
X_train = X_train.reshape((-1, 28, 28, 1))

# Scale and convert the train images and add channels
X_test /= 255
X_test = X_test.reshape((-1, 28, 28, 1))

# One Hot Encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)

In [9]:
X_train.shape, y_train.shape, X_test.shape

((42000, 28, 28, 1), (42000, 10), (28000, 28, 28, 1))

TODO: Plot images

# Build NN Model

## Model 1

In [None]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation=tf.nn.relu, input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation=tf.nn.relu))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))

## Model 2

In [None]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(tf.keras.layers.Conv2D(filters=36, kernel_size=(5, 5), activation=tf.nn.relu))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))

## Model 3

In [10]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))

In [11]:
# Keras vs TF optimizer
#optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001)
optimizer = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)  

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 8, 8, 64)          51264     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               131200    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total para

# Fit Model

## With validation

In [12]:
cb1 = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
       tf.keras.callbacks.ModelCheckpoint(filepath='model/best_model_val.h5', 
                                                monitor='val_loss',
                                                save_best_only=True)]

In [13]:
model.fit(x=X_train, y=y_train, epochs=20, batch_size=256, validation_split=0.25, callbacks=cb1)

Train on 31500 samples, validate on 10500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


<tensorflow.python.keras.callbacks.History at 0x23f024e80b8>

## Using full training data

In [None]:
cb2 = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
       tf.keras.callbacks.ModelCheckpoint(filepath='model/best_model_all.h5', 
                                                monitor='val_loss',
                                                save_best_only=True)]

In [14]:
model.fit(x=X_train, y=y_train, epochs=20, batch_size=256, callbacks=None)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x23f6e632a90>

TODO: Data Augmentation, SGD with restart

# Save Model

In [16]:
tf.keras.models.save_model(
    model,
    "model/digit-recognizer-TF-Keras-CNN3.h5",
    include_optimizer=False
)

# Load Model

In [17]:
md = tf.keras.models.load_model("model/digit-recognizer-TF-Keras-CNN3.h5", compile=False)

# Make Prediction on Test Data

In [18]:
prediction = md.predict(X_test)

In [19]:
label = tf.argmax(prediction,axis=1)
image_id = np.array(np.arange(1,len(X_test)+1))

## Convert result Tensor back to Numpy Array

In [20]:
from keras import backend as K

np_label = K.eval(label)
result = np.column_stack((image_id, np_label))

Using TensorFlow backend.


## Save results as csv

In [21]:
np.savetxt("output/digit-recognizer-TF-Keras-CNN3.csv", result, fmt=('%d,%d'), delimiter=",", \
           header="ImageId,Label",comments='')

!kaggle competitions submit -c digit-recognizer -f output/digit-recognizer-TF-Keras-CNN2.csv -m "TF/Keras CNN2"

Kaggle Score: 

- 0.98514 (20 epochs, 5 epochs)
- ? (30 epochs, 30 epochs)