In [1]:
#imports
import os
import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import *
from keras.layers.convolutional import *
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
#reshaping the input tensor to shape [samples, rows, columns, channels]
#x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32')
#x_train /= 255
x_test /= 255
#for converting catagorial data labels to numerical labels one hot encoding is used
num_classes = 10
#y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

In [4]:
#tuning hyperparameters
img_dim = 28
num_channels = 1

#for first convolutional layer
num_filters1 = 32
kernel_size1 = 3

#for second convolutional layer
num_filters2 = 32
kernel_size2 = 3

#for first maxpooling layer
pool_size1 = 2

#for third convolutional layer
num_filters3 = 64
kernel_size3 = 3

#for second maxpooling layer
pool_size2 = 2 

#for first fully connected layer
dense_units1 = 1024

#for second fully connected layer (since its final layer hence dense_units = num_classes)
dense_units2 = 10

In [5]:
def create_model():
    model = Sequential()
    #add input_shape argument in case of the first layer which defines (row, col, channels)
    model.add(Conv2D(num_filters1, (kernel_size1, kernel_size1), input_shape=(img_dim, img_dim, num_channels), activation = 'relu'))
    #here axis = -1 which means channel_last convention its function: Normalize the activations of the previous layer at each batch, i.e. applies a 
    #transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
    model.add(BatchNormalization(axis = -1))
    #if strides not defined then it is set to (1, 1) in case of Conv2D
    model.add(Conv2D(num_filters2, (kernel_size2, kernel_size2), activation = 'relu'))
    model.add(BatchNormalization(axis = -1))
    #if strides not defined then it is set by default to pool_size
    model.add(MaxPooling2D(pool_size = (pool_size1, pool_size1)))
    model.add(Conv2D(num_filters3, (kernel_size3, kernel_size3), activation = 'relu'))
    model.add(BatchNormalization(axis = -1))
    model.add(MaxPooling2D(pool_size = (pool_size2, pool_size2)))  
    model.add(Flatten())
    #by default it used biases and baises initialised to 0
    model.add(Dense(dense_units1, activation = 'relu'))
    #dropout introduced to avoid the overfitting
    model.add(Dropout(0.2))
    #softmax activation produces probability over classes
    model.add(Dense(dense_units2, activation = 'softmax'))
    
    #now compile model
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
    return model

In [7]:
#build model by calling create_model function
model = create_model()
#fit the model 
train = pd.read_csv("/home/mayur/Deep_learning_and_Project/tensorflow/handwritten_digits/train.csv")
x_train = train.iloc[:, 1:].values
y_train = train.iloc[:, 0:1].values
print(x_train.shape)
x_train = x_train.reshape(x_train.shape[0], img_dim, img_dim, num_channels).astype('float32')
print(x_train.shape)
x_train /= 255
y_train = np_utils.to_categorical(y_train, num_classes)
model.fit(x = x_train, y = y_train, epochs = 20, batch_size = 200, verbose = 2)
#final evaluation of model
#evaluation of the model done on the test data
#verbose 0 mean silent processing
scores = model.evaluate(x = x_test, y = y_test, verbose = 0)

print("Baseline Error: %.2f%%" % (100-scores[1]*100))

(42000, 784)
(42000, 28, 28, 1)
Epoch 1/20
 - 236s - loss: 0.2005 - acc: 0.9516
Epoch 2/20
 - 255s - loss: 0.0358 - acc: 0.9886
Epoch 3/20
 - 257s - loss: 0.0267 - acc: 0.9916
Epoch 4/20
 - 259s - loss: 0.0186 - acc: 0.9939
Epoch 5/20
 - 255s - loss: 0.0176 - acc: 0.9945
Epoch 6/20
 - 254s - loss: 0.0148 - acc: 0.9954
Epoch 7/20
 - 256s - loss: 0.0151 - acc: 0.9947
Epoch 8/20
 - 256s - loss: 0.0119 - acc: 0.9962
Epoch 9/20
 - 257s - loss: 0.0143 - acc: 0.9955
Epoch 10/20
 - 256s - loss: 0.0156 - acc: 0.9956
Epoch 11/20
 - 256s - loss: 0.0107 - acc: 0.9966
Epoch 12/20
 - 258s - loss: 0.0100 - acc: 0.9973
Epoch 13/20
 - 257s - loss: 0.0150 - acc: 0.9955
Epoch 14/20
 - 259s - loss: 0.0144 - acc: 0.9961
Epoch 15/20
 - 260s - loss: 0.0111 - acc: 0.9970
Epoch 16/20
 - 258s - loss: 0.0109 - acc: 0.9971
Epoch 17/20
 - 259s - loss: 0.0086 - acc: 0.9978
Epoch 18/20
 - 261s - loss: 0.0050 - acc: 0.9985
Epoch 19/20
 - 261s - loss: 0.0066 - acc: 0.9981
Epoch 20/20
 - 260s - loss: 0.0114 - acc: 0.99

In [8]:
#predict the classes for the test dataset
test = pd.read_csv("/home/mayur/Deep_learning_and_Project/tensorflow/handwritten_digits/test.csv").values
test = test.reshape(test.shape[0], 28, 28, 1).astype('float32')
test = test/255.0

test_pred_prob = model.predict(test)
print(test_pred_prob)

test_pred = np.argmax(test_pred_prob, axis = 1)
print(test_pred)

if not os.path.exists('/home/mayur/Deep_learning_and_Project/tensorflow/handwritten_digits/results'):
    os.makedirs('/home/mayur/Deep_learning_and_Project/tensorflow/handwritten_digits/results')
    np.savetxt('/home/mayur/Deep_learning_and_Project/tensorflow/handwritten_digits/results/predictions.csv', np.c_[range(1, len(test_pred) + 1), test_pred], delimiter = ',', header = 'ImageId,Label', comments = '', fmt = '%d')
    print("saved predictions to a CSV file")

[[  4.07343888e-30   6.01892165e-27   1.00000000e+00 ...,   5.62195972e-31
    4.12873601e-25   7.36956905e-32]
 [  1.00000000e+00   2.68017627e-36   2.72572069e-27 ...,   6.51895031e-31
    2.33977885e-36   8.00238096e-29]
 [  1.37983586e-22   1.42332091e-23   5.53744590e-21 ...,   3.08784901e-17
    1.50148702e-15   1.00000000e+00]
 ..., 
 [  4.38490945e-29   1.03504554e-25   1.07896807e-28 ...,   7.30065366e-24
    5.03217401e-23   3.08675592e-23]
 [  1.24491418e-15   7.59870858e-20   7.00199406e-20 ...,   1.49559564e-12
    6.63356711e-16   1.00000000e+00]
 [  2.07238681e-33   1.24711329e-25   1.00000000e+00 ...,   5.49375631e-32
    1.38053459e-24   6.86329642e-33]]
[2 0 9 ..., 3 9 2]
saved predictions to a CSV file
