In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout
from keras.utils.np_utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import MaxPool2D

Using TensorFlow backend.


In [None]:
train_data = pd.read_csv('./digit-recognizer/train.csv')
test_data = pd.read_csv('./digit-recognizer/test.csv')

In [None]:
img_rows, img_cols = 28, 28
num_classes = 10

In [None]:
def data_prep(raw):
    out_y = keras.utils.np_utils.to_categorical(raw.label, num_classes)

    num_images = raw.shape[0]
    x_as_array = raw.values[:,1:]
    x_shaped_array = x_as_array.reshape(num_images, img_rows, img_cols, 1)
    out_x = x_shaped_array / 255
    return out_x, out_y

In [None]:
x, y = data_prep(train_data)

In [None]:
# Data Augmentation

data_generator = ImageDataGenerator(rescale = 1/255, rotation_range = 10, 
                                     zoom_range =0.15, width_shift_range = 0.1, 
                                    height_shift_range = 0.1)
data_generator.fit(x)

In [None]:
model = Sequential()
'''convulation layer'''
model.add(Conv2D(32, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=(img_rows, img_cols, 1)))  
model.add(Conv2D(32, kernel_size=(5, 5),
                 activation='relu',
                 ))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))
model.add(Flatten())
model.add(Dense(8192, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(x, y,
          batch_size=128,
          epochs=10, validation_split=0.1)

In [None]:
# model = Sequential()
# model.add(Conv2D(25, kernel_size=(3, 3),
#                  activation='relu',
#                  input_shape=(img_rows, img_cols, 1)))
# model.add(Conv2D(25, kernel_size=(3, 3),
#                  activation='relu',))
# model.add(Conv2D(25, kernel_size=(3, 3), activation='relu'))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(num_classes, activation='softmax'))

# model.compile(loss=keras.losses.categorical_crossentropy,
#               optimizer='rmsprop',
#               metrics=['accuracy'])
# model.fit(x, y,
#           batch_size=128,
#           epochs=4,
#           validation_split = 0.2)

In [None]:
# # Model with stride and droupout - Faster model by addit stride and droupout

# model = Sequential()
# model.add(Conv2D(30, kernel_size=(3, 3),
#                  strides=2,
#                  activation='relu',
#                  input_shape=(img_rows, img_cols, 1)))
# model.add(Dropout(0.5))
# model.add(Conv2D(30, kernel_size=(3, 3), strides=2, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Conv2D(30, kernel_size=(3, 3), strides=2, activation='relu'))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(num_classes, activation='softmax'))

# model.compile(loss=keras.losses.categorical_crossentropy,
#               optimizer='adam',
#               metrics=['accuracy'])
# model.fit(x, y,
#           batch_size=128,
#           epochs=3,
#           validation_split = 0.2)

In [None]:
X_test = test_data.values.astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28,1)
X_test.shape

In [None]:
pred = model.predict_classes(X_test, verbose=0)

In [None]:
def write_preds(preds, fname):
    pd.DataFrame({"ImageId": list(range(1,len(preds)+1)), "Label": preds}).to_csv(fname, index=False, header=True)

In [None]:
write_preds(pred, "digit_recognition_submission.csv")