In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.callbacks import EarlyStopping
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

In [3]:
%config IPCompleter.greedy=True

In [4]:
#Load in the data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [5]:
Y_train = train["label"]

# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1) 

# free some space
del train 


#Count frequency of each value
Y_train.value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

In [6]:
# Normalize the data
X_train = X_train / 255.0
test = test / 255.0

In [7]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

In [8]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = to_categorical(Y_train, num_classes = 10)

In [9]:
# Set the random seed
random_seed = 1404

In [10]:
# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=random_seed)

In [11]:
#Double check the shape
X_train.shape

(37800, 28, 28, 1)

In [15]:
dropout_rate = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
batch_size = [100, 200, 250]
epochs = [5]
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
#learn_rate = [0.001, 0.005, 0.01, 0.1, 0.2, 0.3]
#momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]

In [24]:
X = X_train
Y = Y_train

#Create the model we will use on the grid search
def create_model(dropout_rate=0.25, weight_constraint=0, optimizer = 'RMSProp'):
    # create model
    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(dropout_rate))


    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(dropout_rate))
    
    #FROM TWO SECTIONS DOWN
    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation = "softmax"))
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

In [None]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=2)
# define the grid search parameters
param_grid = dict(batch_size=batch_size, epochs=epochs, dropout_rate=dropout_rate, optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Epoch 1/5
 - 177s - loss: 1.8108 - acc: 0.4717
Epoch 2/5
 - 175s - loss: 0.4207 - acc: 0.8684
Epoch 3/5
 - 164s - loss: 0.2448 - acc: 0.9252
Epoch 4/5
 - 167s - loss: 0.1787 - acc: 0.9438
Epoch 5/5
 - 165s - loss: 0.1442 - acc: 0.9566
Epoch 1/5
 - 165s - loss: 1.5872 - acc: 0.4892
Epoch 2/5
 - 165s - loss: 0.3805 - acc: 0.8831
Epoch 3/5
 - 165s - loss: 0.2416 - acc: 0.9267
Epoch 4/5
 - 165s - loss: 0.1746 - acc: 0.9467
Epoch 5/5
 - 166s - loss: 0.1385 - acc: 0.9575
Epoch 1/5
 - 165s - loss: 1.7556 - acc: 0.4328
Epoch 2/5
 - 165s - loss: 0.3683 - acc: 0.8862
Epoch 3/5
 - 164s - loss: 0.2175 - acc: 0.9358
Epoch 4/5
 - 165s - loss: 0.1565 - acc: 0.9527
Epoch 5/5
 - 167s - loss: 0.1307 - acc: 0.9606
Epoch 1/5
 - 166s - loss: 0.2569 - acc: 0.9166
Epoch 2/5
 - 167s - loss: 0.0596 - acc: 0.9817
Epoch 3/5
 - 165s - loss: 0.0404 - acc: 0.9873
Epoch 4/5
 - 165s - loss: 0.0301 - acc: 0.9906
Epoch 5/5
 - 165s - loss: 0.0221 - acc: 0.9935
Epoch 1/5
 - 166s - loss: 0.2543 - acc: 0.9189
Epoch 2/5
 - 

Epoch 1/5
 - 173s - loss: 0.2545 - acc: 0.9193
Epoch 2/5
 - 171s - loss: 0.0642 - acc: 0.9795
Epoch 3/5
 - 171s - loss: 0.0439 - acc: 0.9865
Epoch 4/5
 - 171s - loss: 0.0341 - acc: 0.9894
Epoch 5/5
 - 171s - loss: 0.0262 - acc: 0.9910
Epoch 1/5
 - 181s - loss: 0.2424 - acc: 0.9250
Epoch 2/5
 - 173s - loss: 0.0726 - acc: 0.9780
Epoch 3/5
 - 170s - loss: 0.0492 - acc: 0.9845
Epoch 4/5
 - 171s - loss: 0.0378 - acc: 0.9876
Epoch 5/5
 - 170s - loss: 0.0278 - acc: 0.9909
Epoch 1/5
 - 172s - loss: 0.2636 - acc: 0.9162
Epoch 2/5
 - 171s - loss: 0.0697 - acc: 0.9789
Epoch 3/5
 - 170s - loss: 0.0480 - acc: 0.9849
Epoch 4/5
 - 170s - loss: 0.0367 - acc: 0.9884
Epoch 5/5
 - 172s - loss: 0.0281 - acc: 0.9910
Epoch 1/5
 - 172s - loss: 0.2684 - acc: 0.9143
Epoch 2/5
 - 171s - loss: 0.0768 - acc: 0.9765
Epoch 3/5
 - 171s - loss: 0.0514 - acc: 0.9841
Epoch 4/5
 - 170s - loss: 0.0392 - acc: 0.9885
Epoch 5/5
 - 172s - loss: 0.0300 - acc: 0.9904
Epoch 1/5
 - 175s - loss: 0.2227 - acc: 0.9292
Epoch 2/5
 - 

Epoch 1/5
 - 181s - loss: 0.4533 - acc: 0.8559
Epoch 2/5
 - 177s - loss: 0.1023 - acc: 0.9677
Epoch 3/5
 - 176s - loss: 0.0743 - acc: 0.9769
Epoch 4/5
 - 178s - loss: 0.0618 - acc: 0.9812
Epoch 5/5
 - 177s - loss: 0.0530 - acc: 0.9828
Epoch 1/5
 - 181s - loss: 0.3938 - acc: 0.8724
Epoch 2/5
 - 178s - loss: 0.0888 - acc: 0.9720
Epoch 3/5
 - 177s - loss: 0.0633 - acc: 0.9801
Epoch 4/5
 - 177s - loss: 0.0500 - acc: 0.9842
Epoch 5/5
 - 177s - loss: 0.0421 - acc: 0.9866
Epoch 1/5
 - 184s - loss: 0.3301 - acc: 0.8910
Epoch 2/5
 - 179s - loss: 0.0796 - acc: 0.9750
Epoch 3/5
 - 180s - loss: 0.0570 - acc: 0.9833
Epoch 4/5
 - 181s - loss: 0.0431 - acc: 0.9865
Epoch 5/5
 - 180s - loss: 0.0341 - acc: 0.9894
Epoch 1/5
 - 185s - loss: 0.3624 - acc: 0.8850
Epoch 2/5
 - 181s - loss: 0.0766 - acc: 0.9756
Epoch 3/5
 - 181s - loss: 0.0551 - acc: 0.9824
Epoch 4/5
 - 196s - loss: 0.0405 - acc: 0.9868
Epoch 5/5
 - 183s - loss: 0.0303 - acc: 0.9912
Epoch 1/5
 - 185s - loss: 0.3651 - acc: 0.8860
Epoch 2/5
 - 

Epoch 1/5
 - 195s - loss: 2.0524 - acc: 0.2673
Epoch 2/5
 - 195s - loss: 0.7365 - acc: 0.7610
Epoch 3/5
 - 191s - loss: 0.4224 - acc: 0.8669
Epoch 4/5
 - 189s - loss: 0.3125 - acc: 0.9014
Epoch 5/5
 - 189s - loss: 0.2478 - acc: 0.9219
Epoch 1/5
 - 197s - loss: 1.9716 - acc: 0.3019
Epoch 2/5
 - 190s - loss: 0.6863 - acc: 0.7794
Epoch 3/5
 - 190s - loss: 0.4071 - acc: 0.8724
Epoch 4/5
 - 189s - loss: 0.3002 - acc: 0.9063
Epoch 5/5
 - 190s - loss: 0.2456 - acc: 0.9238
Epoch 1/5
 - 196s - loss: 2.0745 - acc: 0.2696
Epoch 2/5
 - 190s - loss: 0.7529 - acc: 0.7498
Epoch 3/5
 - 189s - loss: 0.4377 - acc: 0.8629
Epoch 4/5
 - 189s - loss: 0.3204 - acc: 0.9015
Epoch 5/5
 - 190s - loss: 0.2522 - acc: 0.9208
Epoch 1/5
 - 199s - loss: 0.2793 - acc: 0.9104
Epoch 2/5
 - 191s - loss: 0.0825 - acc: 0.9748
Epoch 3/5
 - 191s - loss: 0.0628 - acc: 0.9819
Epoch 4/5
 - 191s - loss: 0.0526 - acc: 0.9835
Epoch 5/5
 - 191s - loss: 0.0452 - acc: 0.9866
Epoch 1/5
 - 200s - loss: 0.2994 - acc: 0.9017
Epoch 2/5
 - 

Epoch 1/5
 - 206s - loss: 0.3466 - acc: 0.8891
Epoch 2/5
 - 198s - loss: 0.0747 - acc: 0.9767
Epoch 3/5
 - 197s - loss: 0.0497 - acc: 0.9845
Epoch 4/5
 - 196s - loss: 0.0354 - acc: 0.9884
Epoch 5/5
 - 197s - loss: 0.0284 - acc: 0.9904
Epoch 1/5
 - 208s - loss: 0.3548 - acc: 0.8900
Epoch 2/5
 - 198s - loss: 0.0850 - acc: 0.9749
Epoch 3/5
 - 197s - loss: 0.0583 - acc: 0.9820
Epoch 4/5
 - 200s - loss: 0.0450 - acc: 0.9855
Epoch 5/5
 - 199s - loss: 0.0356 - acc: 0.9880
Epoch 1/5
 - 208s - loss: 0.3563 - acc: 0.8865
Epoch 2/5
 - 198s - loss: 0.0792 - acc: 0.9751
Epoch 3/5
 - 199s - loss: 0.0556 - acc: 0.9822
Epoch 4/5
 - 198s - loss: 0.0410 - acc: 0.9866
Epoch 5/5
 - 198s - loss: 0.0338 - acc: 0.9892
Epoch 1/5
 - 207s - loss: 0.3576 - acc: 0.8848
Epoch 2/5
 - 197s - loss: 0.0859 - acc: 0.9734
Epoch 3/5
 - 198s - loss: 0.0585 - acc: 0.9820
Epoch 4/5
 - 198s - loss: 0.0434 - acc: 0.9865
Epoch 5/5
 - 200s - loss: 0.0369 - acc: 0.9888
Epoch 1/5
 - 209s - loss: 0.3123 - acc: 0.9035
Epoch 2/5
 - 

In [81]:
#dropout_rate = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
batch_size = 100
epochs = 1
#optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
#learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
#momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]

In [77]:
lr= 0.001
rho= 0.8, 0.9
epsilon=1e-08
decay=0.0
optimizer = 'RMSprop'

In [14]:
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [32]:
#Set up early stopping
earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=2)

In [68]:
# With data augmentation to prevent overfitting

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(X_train)

In [78]:
    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))


    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))
    
    #FROM TWO SECTIONS DOWN
    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = "softmax"))
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [61]:
X_train.shape

(37800, 28, 28, 1)

In [73]:
print(X_train.shape)
print(Y_train.shape)
print(X_val.shape)
print(Y_val.shape)

(37800, 28, 28, 1)
(37800, 10)
(4200, 28, 28, 1)
(4200, 10)


In [82]:
# Fit the model
history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction, earlystop])

Epoch 1/1
 - 262s - loss: 0.4378 - acc: 0.8584 - val_loss: 0.0475 - val_acc: 0.9845


In [None]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=2)
# define the grid search parameters
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
fit(self, x, y, batch_size=32, epochs=10, verbose=1, callbacks=None, validation_split=0.0, 
    validation_data=None, shuffle=True, class_weight=None, sample_weight=None, 
    initial_epoch=0)
which has a default batch_size=32 and epochs=10. Wheras the fit_generator() looks like:

fit_generator(self, generator, steps_per_epoch, epochs=1, verbose=1,
              callbacks=None, validation_data=None, validation_steps=None, 
              class_weight=None, max_queue_size=10, workers=1,
              use_multiprocessing=False, initial_epoch=0)