In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
#import tensorflow as tf
import keras.backend as K

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau



# Load the data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

Y_train = train["label"]
# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1) 

# free some space
del train 

X_train = X_train / 255.0
test = test / 255.0

X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)
Y_train = to_categorical(Y_train, num_classes = 10)

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1)

#print(np.shape(Y_train))
#print(np.shape(X_train))
#print(np.shape(Y_val))
#print(np.shape(X_val))

In [8]:
def build_model():
    model = Sequential()
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu', input_shape = (28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = "softmax"))
    return model

def build_dense_model():
    inputs = Input(shape=(28,28,1,))

    # First dense block
    cnn1_1 = Conv2D(filters = 32, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inputs)
    cnn1_1 = Dropout(0.2)(cnn1_1)
    cnn1_1 = BatchNormalization()(cnn1_1)
    cnn1_2 = Conv2D(filters = 32, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn1_1)
    cnn1_2 = Dropout(0.2)(cnn1_2)
    cnn1_2 = BatchNormalization()(cnn1_2)
    inp_cnn1_3 = concatenate([cnn1_1,cnn1_2])
    cnn1_3 = Conv2D(filters = 32, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn1_3)
    cnn1_3 = Dropout(0.2)(cnn1_3)
    cnn1_3 = BatchNormalization()(cnn1_3)
    inp_cnn1_4 = concatenate([cnn1_1,cnn1_2,cnn1_3])
    cnn1_4 = Conv2D(filters = 32, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn1_4)
    cnn1_4 = Dropout(0.2)(cnn1_4)
    cnn1_4 = BatchNormalization()(cnn1_4)
    cnn1_5 = Conv2D(filters = 32, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn1_4)
    cnn1_5 = Dropout(0.2)(cnn1_5)
    cnn1_5 = BatchNormalization()(cnn1_5)
    
    ##Second dense block
    inp2_1 = MaxPool2D(pool_size=(2,2))(cnn1_5)
    cnn2_1 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp2_1)
    cnn2_1 = Dropout(0.2)(cnn2_1)
    cnn2_1 = BatchNormalization()(cnn2_1)
    cnn2_2 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn2_1)
    cnn2_2 = Dropout(0.2)(cnn2_2)
    cnn2_2 = BatchNormalization()(cnn2_2)
    inp_cnn2_3 = concatenate([cnn2_1,cnn2_2])
    cnn2_3 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn2_3)
    cnn2_3 = Dropout(0.2)(cnn2_3)
    cnn2_3 = BatchNormalization()(cnn2_3)
    inp_cnn2_4 = concatenate([cnn2_1,cnn2_2,cnn2_3])
    cnn2_4 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn2_4)
    cnn2_4 = Dropout(0.2)(cnn2_4)
    cnn2_4 = BatchNormalization()(cnn2_4)
    cnn2_5 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn2_4)
    cnn2_5 = BatchNormalization()(cnn2_5)
    
    ##Third dense block
    inp3_1 = MaxPool2D(pool_size=(2,2))(cnn2_5)
    cnn3_1 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp3_1)
    cnn3_2 = Dropout(0.2)(cnn3_1)
    cnn3_1 = BatchNormalization()(cnn3_1)
    cnn3_2 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn3_1)
    cnn3_2 = Dropout(0.2)(cnn3_2)
    cnn3_2 = BatchNormalization()(cnn3_2)
    inp_cnn3_3 = concatenate([cnn3_1,cnn3_2])
    cnn3_3 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn3_3)
    cnn3_3 = Dropout(0.2)(cnn3_3)
    cnn3_3 = BatchNormalization()(cnn3_3)
    inp_cnn3_4 = concatenate([cnn3_1,cnn3_2,cnn3_3])
    cnn3_4 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn3_4)
    cnn3_4 = Dropout(0.2)(cnn3_4)
    cnn3_4 = BatchNormalization()(cnn3_4)
    cnn3_5 = Conv2D(filters = 64, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn3_4)
    cnn3_5 = Dropout(0.2)(cnn3_5)
    cnn3_5 = BatchNormalization()(cnn3_5)
    
    vector = Flatten()(cnn3_5)
    vector = Dense(256, activation = "relu")(vector)
    vector = BatchNormalization()(vector)
    vector = Dropout(0.5)(vector)
    predictions = Dense(10, activation = "softmax")(vector)

    # This creates a model that includes
    # the Input layer and three Dense layers
    model = Model(inputs=inputs, outputs=predictions)
    return model
    
    
size_ensemble = 4
models = []
for i in range(0,size_ensemble):
    model = build_model()
    model.compile(optimizer = 'Nadam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    models.append(model)

In [31]:
epochs = 5
batch_size = 128
for model in models:
    model.fit(X_train,Y_train, batch_size,epochs=epochs,validation_data=(X_val,Y_val))

In [None]:
# predict results
results = None
for model in models:
    if results is None:
        results = model.predict(test)
    else:
        results += model.predict(test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("cnn_mnist_datagen.csv",index=False)

In [43]:
###split between 1, 7 and rest in order to train an expert model###

samples_per_class = len(Y_train)/10 #in order to avoid imbalance between 1, 7 and the rest of the classes

X_train_17 = []
Y_train_17 = []

done = False

for x, y in zip(X_train, Y_train):
    if done:
        break
    done = True
    if y[0] or y[6]:
        X_train_17.append(x)
        Y_train_17.append([y[0], y[6], 0])
        done = False
    elif samples_per_class > 0:
        X_train_17.append(x)
        Y_train_17.append([0, 0, 1])
        samples_per_class -= 1
        done = False
        
X_train_17 = np.asarray(X_train_17)
Y_train_17 = np.asarray(Y_train_17)

X_train_17, X_val_17, Y_train_17, Y_val_17 = train_test_split(X_train_17, Y_train_17, test_size = 0.1)

(4708, 28, 28, 1)
(4708, 3)


In [44]:
def build_model_17():
    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))


    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))


    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation = "softmax"))
    return model
    
epochs = 5
batch_size = 64
model_17 = build_model_17()
model_17.compile(optimizer = 'Nadam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model_17.fit(X_train_17,Y_train_17, batch_size,epochs=epochs,validation_data=(X_val_17,Y_val_17))

Train on 4237 samples, validate on 471 samples
Epoch 1/1


<keras.callbacks.History at 0x1a39e41668>