In [1]:
#Importing Generic Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import time
import datetime as dt

#Importing Keras Libraries
from keras.models import Sequential
from keras import backend as K
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import History 
history = History()
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import RMSprop

#Importing sklearn Libraries
from sklearn import grid_search
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# There are 10 digits : 0..9
number_of_classes = 10
batch_size = 128

Using TensorFlow backend.


In [2]:
start = dt.datetime.now()

# Load the data
train = pd.read_csv("../input/train.csv")
Y_test = pd.read_csv("../input/test.csv")

X_train = train.drop(labels = ["label"],axis = 1) 
Y_train = train["label"]

#Printing shape of the data set
print("X_train original shape", X_train.shape)
print("Y_train original shape", Y_train.shape)
print("Y_test original shape", Y_test.shape)

('X_train original shape', (42000, 784))
('Y_train original shape', (42000,))
('Y_test original shape', (28000, 784))


In [3]:
#Rescaling so that each pixel lies in the interval [0, 1] instead of [0, 255]
X_train/=255.0
Y_test/=255.0

#Normalization and reshaping of input
#As images are in grayscale, the number of channels is 1. For color images, it's be 3 (R, G, B).
X_train = X_train.values.reshape(X_train.shape[0], 28, 28, 1)
Y_test  = Y_test.values.reshape(Y_test.shape[0], 28, 28, 1)

X_train = X_train.astype('float32')
Y_test  = Y_test.astype('float32')

#After normalizing the shape of data\n,
print("X_train original shape", X_train.shape)
print("Y_test original shape", Y_test.shape)

('X_train original shape', (42000, 28, 28, 1))
('Y_test original shape', (28000, 28, 28, 1))


In [4]:
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(Y_train, number_of_classes)

# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=np.random.seed(42))

In [5]:
def create_model ():
    
    model = Sequential()

    model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
    BatchNormalization(axis=-1)
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    BatchNormalization(axis=-1)
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
  
    model.add(Conv2D(64, (3, 3)))
    BatchNormalization(axis=-1)
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    BatchNormalization(axis=-1)
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
        
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    BatchNormalization()
    model.add(Dense(512))
    model.add(Activation('relu'))
    BatchNormalization()
    model.add(Dropout(0.5))
    model.add(Dense(10))
    
    model.add(Activation('softmax'))

    optimizer = RMSprop(lr=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    model.summary()
    return model    

In [6]:
#Data Augmentation : Creating batches of images and train on them.
gen = ImageDataGenerator(featurewise_center=False, 
                         samplewise_center=False, 
                         featurewise_std_normalization=False, 
                         samplewise_std_normalization=False, 
                         zca_whitening=False, 
                         rotation_range=10, 
                         width_shift_range=0.10, 
                         height_shift_range=0.10, 
                         zoom_range=0.10,
                         horizontal_flip=False,
                         vertical_flip=False
                        )
gen.fit(X_train)

test_gen = ImageDataGenerator()
test_gen.fit(X_val)

annealer = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=0.0001, patience=1, verbose=1)

#Creating model and fitting
model = create_model()
model.fit_generator(gen.flow(X_train, Y_train, batch_size=64), 
                    steps_per_epoch=X_train.shape[0]//batch_size, 
                    epochs=16,
                    validation_data=test_gen.flow(X_val, Y_val, batch_size=64), 
                    validation_steps = 64, 
                    callbacks=[annealer]
                   )

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        18496     
__________

<keras.callbacks.History at 0x10f8d8fd0>

In [7]:
score = model.evaluate(X_val, Y_val, verbose=1)
print('valid loss:', score[0])
print('valid accuracy:', score[1])

('valid accuracy:', 0.99047619047619051)


In [10]:
#Making Predictions and writing to a file
results = model.predict(Y_test)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")

submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("dr_cnn_mnist_results.csv",index=False)

In [11]:
end = dt.datetime.now()
print('Total time {} s.'.format((end - start).seconds))

Total time 1274 s.
