In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import StratifiedKFold,StratifiedShuffleSplit,train_test_split

from keras.utils import to_categorical
from keras.optimizers import Adam, RMSprop, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, Conv2D, Flatten, Activation, MaxPooling2D, BatchNormalization, Dropout
from keras.models import Model,Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler, MinMaxScaler

Using TensorFlow backend.


In [2]:
x = pd.read_csv("../input/train.csv")
y = x["label"]
x.drop("label",axis=1,inplace=True)
test = pd.read_csv("../input/test.csv")

In [3]:
scaler = StandardScaler()
x = scaler.fit_transform(x)
test = scaler.transform(test)

In [4]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4, stratify=y, random_state=45)

In [5]:
x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)
test = test.reshape(-1,28,28,1)

In [6]:
def cnn():
    np.random.seed(0)
    inp = Input( shape=(28,28, 1) )
    
    conv1 = Conv2D( filters=32, kernel_size=(3,3), strides=(1,1), padding='same' ) (inp)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation('relu')(conv1)
    conv1 = MaxPooling2D()(conv1)
    
    conv2 = Conv2D( filters=64, kernel_size=(3,3), strides=(1,1), padding='same' ) (conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Activation('relu')(conv2)
    conv2 = MaxPooling2D()(conv2)
    
    conv3 = Conv2D( filters=128, kernel_size=(2,2), strides=(1,1), padding='same' ) (conv2)
    conv3 = BatchNormalization()(conv3)
    conv3 = Activation('relu')(conv3)
    conv3 = MaxPooling2D()(conv3)
    
    conv4 = Conv2D( filters=256, kernel_size=(2,2), strides=(1,1), padding='same' ) (conv3)
    conv4 = BatchNormalization()(conv4)
    conv4 = Activation('relu')(conv4)
    conv4 = MaxPooling2D()(conv4)

    
    fc = Flatten()(conv4)
    
    fc = Dense(128,activation="relu")(fc)
    fc = Dropout(.2)(fc)
    
    fc = Dense(64,activation="relu")(fc)
    fc = Dropout(.2)(fc)
    
    fc = Dense(10)(fc)
    outp = Activation('softmax')(fc)
    
    model = Model(inp, outp)
    
    return model

In [7]:
cnn().summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 14, 14, 64)        256       
__________

In [8]:
model = cnn()
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['acc'])

In [9]:
im_generator = ImageDataGenerator(rotation_range=10,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 zoom_range=[0.9, 1],
                                 horizontal_flip=True,
                                 vertical_flip=False,
                                 data_format="channels_last",
                                 dtype=np.ndarray)

im_generator.fit(x_train)

In [10]:
model_check_pt = ModelCheckpoint('./best_model.h5',monitor='val_acc', save_best_only=True, mode='max')
reduce_lr = ReduceLROnPlateau(monitor="val_acc", factor=0.6, patience=3, mode="max", cooldown=5, 
                              min_lr=0.0001, min_delta=0.001, verbose=1 )

hist = model.fit_generator(generator=im_generator.flow(x_train, to_categorical(y_train), batch_size=2000),
                           validation_data=(x_test,to_categorical(y_test)),
                           epochs=100,
                           steps_per_epoch=5,
                           verbose=2,
                           validation_steps=1,
                           use_multiprocessing=True,
                           callbacks=[model_check_pt,reduce_lr])

Epoch 1/100
 - 6s - loss: 2.3709 - acc: 0.2221 - val_loss: 1.9648 - val_acc: 0.2936
Epoch 2/100
 - 1s - loss: 1.8535 - acc: 0.3823 - val_loss: 1.6809 - val_acc: 0.4398
Epoch 3/100
 - 3s - loss: 1.5512 - acc: 0.4765 - val_loss: 1.4569 - val_acc: 0.5126
Epoch 4/100
 - 3s - loss: 1.3018 - acc: 0.5667 - val_loss: 1.2777 - val_acc: 0.5473
Epoch 5/100
 - 3s - loss: 1.0917 - acc: 0.6425 - val_loss: 0.9278 - val_acc: 0.6832
Epoch 6/100
 - 2s - loss: 0.8993 - acc: 0.7030 - val_loss: 1.0180 - val_acc: 0.6503
Epoch 7/100
 - 3s - loss: 0.7551 - acc: 0.7463 - val_loss: 0.5407 - val_acc: 0.8361
Epoch 8/100
 - 2s - loss: 0.6386 - acc: 0.7854 - val_loss: 0.7714 - val_acc: 0.7513
Epoch 9/100
 - 3s - loss: 0.5730 - acc: 0.8162 - val_loss: 0.4035 - val_acc: 0.8750
Epoch 10/100
 - 3s - loss: 0.5153 - acc: 0.8301 - val_loss: 0.3963 - val_acc: 0.8684
Epoch 11/100
 - 2s - loss: 0.4713 - acc: 0.8469 - val_loss: 0.3430 - val_acc: 0.8973
Epoch 12/100
 - 3s - loss: 0.4190 - acc: 0.8685 - val_loss: 0.3309 - val_a

In [11]:
kfolder = StratifiedKFold(n_splits=4,random_state=45,shuffle=True)
for train_index,test_index in kfolder.split(x,y):
    x_train = x[train_index].reshape(-1,28,28,1)
    y_train = y[train_index]
    x_test = x[test_index].reshape(-1,28,28,1)
    y_test = y[test_index]
    saved_model = cnn()
    saved_model.load_weights("./best_model.h5")
    saved_model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['acc'])
    saved_model.fit(x_train,to_categorical(y_train), validation_data=(x_test,to_categorical(y_test))
                , epochs=20,batch_size=1024, callbacks=[model_check_pt,reduce_lr])

Train on 31497 samples, validate on 10503 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0006000000284984708.
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0003600000170990825.
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.00021600000327453016.
Train on 31500 samples, validate on 10500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0006000000284984708.
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0003600000170990825.
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 00020: ReduceLROnPlateau 

In [12]:
saved_model = cnn()
saved_model.load_weights("./best_model.h5")

In [13]:
predictions = saved_model.predict(test)

In [14]:
submission = pd.DataFrame({"ImageId":range(1,28001),
                          "Label":np.argmax(predictions,axis=1)})
submission.head(100)

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
5,6,7
6,7,0
7,8,3
8,9,0
9,10,3


In [15]:
submission.to_csv("cnn_digit.csv",index=False)