# Import Packages

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import os

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, save_model, load_model, model_from_json
from tensorflow.keras.layers import Dense, Dropout, Activation, Reshape, Conv2D, AveragePooling2D, Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [4]:
os.makedirs('models', exist_ok=True)
os.makedirs('submissions', exist_ok=True)

# Import Data

In [5]:
%%time
train_data = pd.read_csv('data/MNIST/train.csv')
# train_data = pd.read_csv('data/MNIST/mnist_train.csv')
test_data = pd.read_csv('data/MNIST/test.csv')

Wall time: 4.49 s


In [6]:
(train_data.shape, test_data.shape)

((42000, 785), (28000, 784))

In [7]:
X_train = np.array(train_data.iloc[:, 1:])
y_train = np.array(train_data.iloc[:, 0])

In [8]:
X_test = np.array(test_data)

In [9]:
(X_train.shape, y_train.shape, X_test.shape)

((42000, 784), (42000,), (28000, 784))

# Simple Pre-Process Data

In [10]:
# Scale and convert the train images and add channels
X_train = X_train / 255.0
X_train = X_train.reshape((-1, 28, 28, 1))

# Scale and convert the train images and add channels
X_test = X_test / 255.0
X_test = X_test.reshape((-1, 28, 28, 1))

# One Hot Encoding
y_train = keras.utils.to_categorical(y_train, 10)

In [11]:
X_train.shape, y_train.shape, X_test.shape

((42000, 28, 28, 1), (42000, 10), (28000, 28, 28, 1))

# Build NN Model

## Model 1

In [None]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 2

In [None]:
model = Sequential()

model.add(Conv2D(filters=16, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=36, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 3

In [None]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

set_optm(model)

## Model 4

In [16]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))

set_optm(model)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 28, 14, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 14, 32)        4640      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 14, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 7, 16)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 28, 7, 64)         9280      
__________

## Set Optimizer

In [15]:
# optimizer = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)  
optimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Fit Model

## With validation

In [17]:
num_epoch = 100

In [18]:
callback_val = [EarlyStopping(monitor='val_loss', patience=10),
                ModelCheckpoint(filepath='models/weights.{epoch:02d}-{val_loss:.2f}_val.h5',
                                monitor='val_loss',
                                save_best_only=True)]

In [19]:
%%time
model.fit(X_train, y_train, epochs=num_epoch, batch_size=256, validation_split=0.33, callbacks=callback_val)

Train on 28139 samples, validate on 13861 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Wall time: 5min 33s


<tensorflow.python.keras.callbacks.History at 0x24b6ef82128>

## With full training data

Without validation set, have to set the num epochs manually without early stopping.

In [None]:
num_epoch = 80

In [None]:
model.fit(X_train, y_train, epochs=num_epoch, batch_size=256)

# Saving Model

## Save Entire Model

In [21]:
save_model(model, 'models/model_4.h5')

## Save Architecture and Weights Separately

In [22]:
with open('models/model_4_architecture.json', 'w') as f:
    f.write(model.to_json())

In [23]:
model.save_weights('models/model_4_weights.h5')

# Load Model

## Load Entire Model

In [20]:
# model = load_model("models/model_4.h5")

model = load_model("models/weights.11-0.03_val.h5")

## Load Architecture and Weights

In [None]:
with open('models/model_4_architecture.json', 'r') as f:
    model = model_from_json(f.read())

In [None]:
model.load_weights('models/model_4_weights.h5')

# Predict on Test Data

In [24]:
%%time
prediction = model.predict(X_test)
prediction = np.argmax(prediction,axis=1)

Wall time: 5.42 s


In [25]:
n_samples_test = X_test.shape[0]

In [26]:
data_predict = {"ImageId":range(1, n_samples_test+1), "Label":prediction}
data_predict = pd.DataFrame(data_predict)
data_predict.to_csv("submissions/val.csv", index=False)

!kaggle competitions submit -c digit-recognizer -f submissions/submission_using_mnist_full_no_val.csv -m "using_mnist_full_no_val"

Kaggle Score: 

- 0.98514 (20 epochs, 5 epochs)
- 0.99328 (80 epochs, Model 6)
- 0.99142 (14 epochs with validation, Model 6)
- 0.99914 (using full mnist data)