In [67]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
#import tensorflow as tf
import keras.backend as K

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D,\
                         BatchNormalization, Input, concatenate, \
                         GlobalAveragePooling2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

from keras.applications import ResNet50

In [72]:
# Load the data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

Y_train = train["label"]
# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1) 

# free some space
del train 

X_train = X_train / 255.0
test = test / 255.0

X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)
Y_train = to_categorical(Y_train, num_classes = 10)

print(np.shape(Y_train))
print(np.shape(X_train))

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1)

#print(np.shape(Y_train))
#print(np.shape(X_train))
#print(np.shape(Y_val))
#print(np.shape(X_val))

(42000, 10)
(42000, 28, 28, 1)


In [61]:
def build_model():
    model = Sequential()
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu', input_shape = (28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = "softmax"))
    return model

def dense_block(filters, inputs):
    cnn_1 = Conv2D(filters = filters, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inputs)
    cnn_1 = Dropout(0.2)(cnn_1)
    cnn_1 = BatchNormalization()(cnn_1)
    cnn_2 = Conv2D(filters = filters, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn_1)
    cnn_2 = Dropout(0.2)(cnn_2)
    cnn_2 = BatchNormalization()(cnn_2)
    inp_cnn_3 = concatenate([cnn_1,cnn_2])
    cnn_3 = Conv2D(filters = filters, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn_3)
    cnn_3 = Dropout(0.2)(cnn_3)
    cnn_3 = BatchNormalization()(cnn_3)
    inp_cnn_4 = concatenate([cnn_1,cnn_2,cnn_3])
    cnn_4 = Conv2D(filters = filters, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(inp_cnn_4)
    cnn_4 = Dropout(0.2)(cnn_4)
    cnn_4 = BatchNormalization()(cnn_4)
    cnn_5 = Conv2D(filters = filters, kernel_size = (3, 3),padding = 'Same', 
                     activation ='relu')(cnn_4)
    cnn_5 = Dropout(0.2)(cnn_5)
    output = BatchNormalization()(cnn_5)
    return output

def build_dense_model():
    inputs = Input(shape=(28,28,1,))

    # First dense block
    cnn1_5 = dense_block(32, inputs)
    inp2_1 = MaxPool2D(pool_size=(2,2))(cnn1_5)
    z
    ##Second dense block
    cnn2_5 = dense_block(64, inp2_1)
    inp3_1 = MaxPool2D(pool_size=(2,2))(cnn2_5)
    
    ##Third dense block
    cnn3_5 = dense_block(64, inp3_1)
    
    vector = Flatten()(cnn3_5)
    vector = Dense(256, activation = "relu")(vector)
    vector = BatchNormalization()(vector)
    vector = Dropout(0.5)(vector)
    predictions = Dense(10, activation = "softmax")(vector)

    # This creates a model that includes
    # the Input layer and three Dense layers
    model = Model(inputs=inputs, outputs=predictions)
    return model
    
    
size_ensemble = 4
models = []
for i in range(0,size_ensemble):
    model = build_dense_model()
    model.compile(optimizer = 'Nadam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    models.append(model)

In [None]:
epochs = 5
batch_size = 128
for model in models:
    model.fit(X_train,Y_train, batch_size,epochs=epochs,validation_data=(X_val,Y_val))

In [None]:
# predict results
results = None
for model in models:
    if results is None:
        results = model.predict(test)
    else:
        results += model.predict(test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("cnn_mnist_datagen.csv",index=False)

In [75]:
###ResNet50 fine-tuning###

base_model = ResNet50(weights='imagenet', include_top=False)

x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

#Reshaping the input data so it fits with the 3 channels input of ResNet
X_train_3_channels = np.squeeze(np.stack((X_train,)*3, -1))
X_val_3_channels = np.squeeze(np.stack((X_val,)*3, -1))

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# train the model on the new data for a few epochs
model.fit(X_train_3_channels,Y_train, batch_size,epochs=epochs,validation_data=(X_val_3_channels,Y_val))

# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:140]:
   layer.trainable = False
for layer in model.layers[140:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(X_train_3_channels,Y_train, batch_size,epochs=epochs,validation_data=(X_val_3_channels,Y_val))



Train on 37800 samples, validate on 4200 samples
Epoch 1/5

KeyboardInterrupt: 

In [None]:
###split between 1, 7 and rest in order to train an expert model###

samples_per_class = len(Y_train)/10 #in order to avoid imbalance between 1, 7 and the rest of the classes

X_train_17 = []
Y_train_17 = []

done = False

for x, y in zip(X_train, Y_train):
    if done:
        break
    done = True
    if y[0] or y[6]:
        X_train_17.append(x)
        Y_train_17.append([y[0], y[6], 0])
        done = False
    elif samples_per_class > 0:
        X_train_17.append(x)
        Y_train_17.append([0, 0, 1])
        samples_per_class -= 1
        done = False
        
X_train_17 = np.asarray(X_train_17)
Y_train_17 = np.asarray(Y_train_17)

X_train_17, X_val_17, Y_train_17, Y_val_17 = train_test_split(X_train_17, Y_train_17, test_size = 0.1)

In [None]:
def build_model_17():
    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))


    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))


    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation = "softmax"))
    return model
    
epochs = 5
batch_size = 64
model_17 = build_model_17()
model_17.compile(optimizer = 'Nadam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model_17.fit(X_train_17,Y_train_17, batch_size,epochs=epochs,validation_data=(X_val_17,Y_val_17))