In [1]:
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization
from keras.layers import Dropout, Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Reshape
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

Using TensorFlow backend.


In [2]:
training = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
target = 'label'
trainSplitRandomSeed = 1

In [3]:
X_training = training.loc[:, training.columns != target]
X_training = X_training / 255.0
test = test / 255.0
y_training = training[target]

In [4]:
X_train, X_validation, y_train, y_validation = train_test_split(X_training, y_training, stratify = y_training,
                                                                random_state = trainSplitRandomSeed, test_size=0.1)

In [5]:
X_train = np.array(X_train).reshape(X_train.shape[0], 28, 28, 1)
X_validation = np.array(X_validation).reshape(X_validation.shape[0], 28, 28, 1)
test = np.array(test).reshape(test.shape[0], 28, 28, 1)

y_train = np_utils.to_categorical(y_train, 10)
y_validation = np_utils.to_categorical(y_validation, 10)

In [6]:
def CreateNNModel(dropoutRate):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    model.add(Conv2D(64,(3, 3)))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    model.add(Flatten())

    # Fully connected layer
    model.add(Dense(512))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropoutRate))
    model.add(Dense(10))

    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [7]:
nnRegressor = KerasClassifier(build_fn=CreateNNModel, verbose=1)

estimators = [('nn', nnRegressor)]

pipeline = Pipeline(estimators)
param_grid = {
  'nn__dropoutRate': [0.2],
  'nn__batch_size' : [500],
  'nn__validation_split' : [0.1],
  'nn__epochs' : [20]
}

In [8]:
reg = GridSearchCV(pipeline, param_grid, cv=3)

In [9]:
reg.fit(X_train, y_train)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 22680 samples, validate on 2520 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 22680 samples, validate on 2520 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 22680 samples, validate on 2520 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 34020 samples, validate on 3780 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('nn', <keras.wrappers.scikit_learn.KerasClassifier object at 0x000002006F50C278>)]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'nn__dropoutRate': [0.2], 'nn__batch_size': [500], 'nn__validation_split': [0.1], 'nn__epochs': [20]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [10]:
reg.score(X_validation, y_validation)



0.9914285739262899

In [14]:
result = reg.predict(test)
resultDf = pd.DataFrame(data=result.flatten())
resultDf.columns = ['Label']
resultDf = pd.DataFrame(data=result.flatten())
resultDf.index = np.arange(1, len(resultDf) + 1)
resultDf.to_csv('result.txt', header = ['Label'], index_label = 'ImageId')

