In [1]:
import keras
import numpy as np

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
np.random.seed(123)

## Preprocessing Data

In [3]:
def get_mnist_data():
    from keras.datasets import mnist
    from keras.utils import np_utils
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])
    y_train = y_train.reshape(y_train.shape[0], 1)
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2])
    y_test = y_test.reshape(y_test.shape[0], 1)
    
    y_train = np_utils.to_categorical(y_train)
    y_test  = np_utils.to_categorical(y_test)
    
    return X_train, y_train, X_test, y_test

In [4]:
def get_data_from_csv(path):
    import pandas as pd
    train = pd.read_csv(path + 'train.csv')
    test = pd.read_csv(path + 'test.csv')
    x_train, y_train = train.drop('label', axis=1).values, train['label'].values
    x_test = test.values
    x_train = x_train.reshape((x_train.shape[0], 1, 28, 28))
    y_train = y_train.reshape((y_train.shape[0], 1))
    x_test  = x_test.reshape((x_test.shape[0], 1, 28, 28))
    from keras.utils import np_utils
    y_train = np_utils.to_categorical(y_train)
    return x_train, y_train, x_test

In [6]:
X_train, y_train, X_test = get_data_from_csv('../')

In [7]:
X_train.shape, y_train.shape, X_test.shape

((42000, 1, 28, 28), (42000, 10), (28000, 1, 28, 28))

In [8]:
X_mean = X_train.mean().astype(np.float32)

In [9]:
X_std = X_train.std().astype(np.float32)

In [10]:
def normalizer(x):
    return (x - X_mean) / X_std

## Building the model

In [11]:
from keras.layers import Convolution2D, Dense, Flatten, Lambda, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers.pooling import MaxPooling2D

### CNN Model

In [12]:
nb_epoch = 5

In [15]:
model_1 = Sequential()
model_1.add(Lambda(normalizer, input_shape=(1, 28, 28)))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Flatten())
model_1.add(Dense(512, activation='relu'))
model_1.add(Dense(10, activation='softmax'))
model_1.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'] )

In [14]:
model_1.fit(X_train, y_train,batch_size=64, validation_split=0.2, nb_epoch=10)

Train on 33600 samples, validate on 8400 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9b0cad2350>

5-8 epochs seems to be a good starting point.

## First Submission model

In [16]:
model_1 = Sequential()
model_1.add(Lambda(normalizer, input_shape=(1, 28, 28)))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Flatten())
model_1.add(Dense(512, activation='relu'))
model_1.add(Dense(10, activation='softmax'))
model_1.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'] )

In [17]:
model_1.fit(X_train, y_train,batch_size=64, nb_epoch=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f9b0470b390>

In [18]:
preds = model_1.predict(X_test, batch_size=64)

In [55]:
def preds_to_df(preds):
    preds_num = [np.argmax(pred_row) for pred_row in preds]
    import pandas as pd
    preds_df = pd.DataFrame(preds_num, index=[i + 1 for i in range(len(preds_num))], columns=['Label'])
    preds_df.index.name = 'ImageId'
    return preds_df

In [56]:
preds_df = preds_to_df(preds)

In [52]:
preds_df.to_csv('mnist_pred.csv')

In [53]:
%cat mnist_pred.csv

ImageId,Label
1,2
2,0
3,9
4,0
5,3
6,7
7,0
8,3
9,0
10,3
11,5
12,7
13,4
14,0
15,4
16,3
17,3
18,1
19,9
20,0
21,9
22,1
23,1
24,5
25,7
26,4
27,2
28,7
29,4
30,7
31,7
32,5
33,4
34,2
35,6
36,2
37,5
38,5
39,1
40,6
41,7
42,7
43,4
44,9
45,8
46,7
47,8
48,2
49,6
50,7
51,6
52,8
53,8
54,3
55,8
56,2
57,1
58,2
59,2
60,0
61,4
62,1
63,7
64,0
65,0
66,0
67,1
68,9
69,0
70,1
71,6
72,5
73,8
74,8
75,2
76,8
77,9
78,9
79,2
80,3
81,5
82,4
83,1
84,8
85,9
86,2
87,4
88,3
89,6
90,7
91,2
92,0
93,6
94,6
95,1
96,4
97,3
98,9
99,7
100,4
101,0
102,9
103,2
104,0
105,7
106,3
107,0
108,5
109,0
110,8
111,0
112,0
113,4
114,7
115,1
116,7
117,1
118,1
119,3
120,3
121,3
122,7
123,2
124,8
125,6
126,3
127,8
128,7
129,7
130,4
131,3
132,5
133,6
134,0
135,0
136,0
137,3
138,1
139,3
140,6
141,4
142,3
143,4
144,5
145,5
146,8
147,7
148,7
149,2
150,8
151,4
152,3
153,5
154,6
155,5
156,3
1

In [54]:
from IPython.display import FileLink
FileLink('mnist_pred.csv')

# With Image Augmentation

In [59]:
model_2 = Sequential()
model_2.add(Lambda(normalizer, input_shape=(1, 28, 28)))
model_2.add(Convolution2D(32, 3, 3, activation='relu'))
model_2.add(Convolution2D(32, 3, 3, activation='relu'))
model_2.add(MaxPooling2D())
model_2.add(Convolution2D(64, 3, 3, activation='relu'))
model_2.add(Convolution2D(64, 3, 3, activation='relu'))
model_2.add(MaxPooling2D())
model_2.add(Flatten())
model_2.add(Dense(512, activation='relu'))
model_2.add(Dense(10, activation='softmax'))
model_2.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'] )

In [60]:
from keras.preprocessing import image

In [62]:
gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)

In [63]:
train_flow = gen.flow(X_train, y_train, batch_size=64)

In [64]:
train_flow.N

42000

In [65]:
model_2.fit_generator(train_flow, train_flow.N, nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f996b418390>

In [66]:
model_2.fit_generator(train_flow, train_flow.N, nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f996b418d90>

In [67]:
model_2.fit_generator(train_flow, train_flow.N, nb_epoch=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f996b418810>

In [68]:
model_2.optimizer.lr = 0.0001

In [69]:
model_2.fit_generator(train_flow, train_flow.N, nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f996b418bd0>

In [70]:
model_2.fit_generator(train_flow, train_flow.N, nb_epoch=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f996b418b50>

In [71]:
preds_2 = model_2.predict(X_test, batch_size=64)
preds_2_df = preds_to_df(preds_2)
preds_2_df.to_csv('mnist_preds_model_2.csv')

In [72]:
FileLink('mnist_preds_model_2.csv')

In [73]:
%cat mnist_preds_model_2.csv

ImageId,Label
1,2
2,0
3,9
4,0
5,3
6,7
7,0
8,3
9,0
10,3
11,5
12,7
13,4
14,0
15,4
16,3
17,3
18,1
19,9
20,0
21,9
22,1
23,1
24,5
25,7
26,4
27,2
28,7
29,4
30,7
31,7
32,5
33,4
34,2
35,6
36,2
37,5
38,5
39,1
40,6
41,7
42,7
43,4
44,9
45,8
46,7
47,8
48,2
49,6
50,7
51,6
52,8
53,8
54,3
55,8
56,2
57,1
58,2
59,2
60,0
61,4
62,1
63,7
64,0
65,0
66,0
67,1
68,9
69,0
70,1
71,6
72,5
73,8
74,8
75,2
76,8
77,9
78,9
79,2
80,3
81,5
82,4
83,1
84,0
85,9
86,2
87,4
88,3
89,6
90,7
91,2
92,0
93,6
94,6
95,1
96,4
97,3
98,9
99,7
100,4
101,0
102,9
103,2
104,0
105,7
106,3
107,0
108,5
109,0
110,8
111,0
112,0
113,4
114,7
115,1
116,7
117,1
118,1
119,3
120,3
121,3
122,7
123,2
124,8
125,6
126,3
127,8
128,7
129,7
130,4
131,3
132,5
133,6
134,0
135,0
136,0
137,3
138,1
139,3
140,6
141,4
142,3
143,4
144,5
145,5
146,8
147,7
148,7
149,2
150,8
151,4
152,3
153,5
154,6
155,5
156,3
1