# Import Packages

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model, model_from_json
from tensorflow.keras.layers import Dense, Dropout, Activation, Reshape, Conv2D, AveragePooling2D, Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [4]:
os.makedirs('models', exist_ok=True)
os.makedirs('submissions', exist_ok=True)

# Import Data

In [5]:
%%time
# train_data = pd.read_csv('data/MNIST/train.csv
train_data = pd.read_csv('data/MNIST/mnist_train.csv')
test_data = pd.read_csv('data/MNIST/test.csv')

CPU times: user 8.11 s, sys: 601 ms, total: 8.71 s
Wall time: 8.71 s


In [6]:
(train_data.shape, test_data.shape)

((60000, 785), (28000, 784))

In [7]:
X_train = np.array(train_data.iloc[:, 1:])
y_train = np.array(train_data.iloc[:, 0])

In [8]:
X_test = np.array(test_data)

In [9]:
(X_train.shape, y_train.shape, X_test.shape)

((60000, 784), (60000,), (28000, 784))

# Simple Pre-Process Data

In [10]:
# Scale and convert the train images and add channels
X_train = X_train / 255.0
X_train = X_train.reshape((-1, 28, 28, 1))

# Scale and convert the train images and add channels
X_test = X_test / 255.0
X_test = X_test.reshape((-1, 28, 28, 1))

# One Hot Encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)

In [11]:
X_train.shape, y_train.shape, X_test.shape

((60000, 28, 28, 1), (60000, 10), (28000, 28, 28, 1))

# Build NN Model

## Model 1

In [12]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 2

In [13]:
model = Sequential()

model.add(Conv2D(filters=16, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=36, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 3

In [14]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 4

In [15]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))

set_optm(model)

## Set Optimizer

In [13]:
def set_optm(model):
    # optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001)
    # optimizer = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)  

    optimizer = tf.train.AdamOptimizer(learning_rate=0.0005, beta1=0.9, beta2=0.999, epsilon=1e-08)

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 28, 14, 16)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 28, 14, 32)        4640      
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 28, 14, 32)        9248      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 28, 7, 16)         0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 28, 7, 64)         9280      
__________

# Fit Model

## With validation

In [17]:
num_epoch = 100

In [18]:
callback_val = [EarlyStopping(monitor='val_loss', patience=10),
                ModelCheckpoint(filepath='models/weights.{epoch:02d}-{val_loss:.2f}_val.h5',
                                monitor='val_loss',
                                save_best_only=True)]

In [19]:
%%time
model.fit(X_train, y_train, epochs=num_epoch, batch_size=256, validation_split=0.33, callbacks=callback_val)

Train on 40199 samples, validate on 19801 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
CPU times: user 4min 24s, sys: 55.9 s, total: 5min 20s
Wall time: 5min 44s


<tensorflow.python.keras.callbacks.History at 0x7fb3e63b4e48>

## With full training data

Without validation set, have to set the num epochs manually without early stopping.

In [20]:
num_epoch = 80

In [21]:
model.fit(X_train, y_train, epochs=num_epoch, batch_size=256)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<tensorflow.python.keras.callbacks.History at 0x7fb3e6c45828>

# Saving Model

## Save Entire Model

In [22]:
save_model(model, 'models/model_4.h5')



## Save Architecture and Weights Separately

In [23]:
with open('models/model_4_architecture.json', 'w') as f:
    f.write(model.to_json())

In [24]:
model.save_weights('models/model_4_weights.h5')

# Load Model

## Load Entire Model

In [12]:
# model = load_model("models/model_4.h5")

model = load_model("models/weights.28-0.03_val.h5")
set_optm(model)



## Load Architecture and Weights

In [26]:
with open('models/model_4_architecture.json', 'r') as f:
    model = model_from_json(f.read())

In [30]:
model.load_weights('models/model_4_weights.h5')

# Predict on Test Data

In [17]:
%%time
prediction = model.predict(X_test)
prediction = np.argmax(prediction,axis=1)

CPU times: user 4.4 s, sys: 1.04 s, total: 5.43 s
Wall time: 4.34 s


In [18]:
n_samples_test = X_test.shape[0]

In [19]:
data_predict = {"ImageId":range(1, n_samples_test+1), "Label":prediction}
data_predict = pd.DataFrame(data_predict)
data_predict.to_csv("submissions/val.csv", index=False)

Kaggle Score: 

- 0.98514 (20 epochs, 5 epochs)
- 0.99328 (80 epochs, Model 6)
- 0.99142 (14 epochs with validation, Model 6)
- 0.99914 (using full mnist data)