# Model and Training

This notebook contains the model used for training. 

## Imports

In [2]:
import cv2
from PIL import Image
import numpy as np
from skimage import morphology
import pandas as pd

from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam,RMSprop,SGD
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
train = pd.read_csv('solution.csv')

## Model

The model used contains 4 blocks. Each block contains 3 convolutional-batch normalization pairs followed by a dropout layer. The final output block of the network has a dense layer, followed by a batch normalization layer, followed by a dropout layer. Finally, the last dense layer with softmax activation is used for output.  


### Model 1

In [42]:
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(128,128,1)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64,(3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

#-------------------

model.add(Conv2D(128,(3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

#--------------------

model.add(Conv2D(256,(3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(Conv2D(256, (3, 3)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

#--------------------------------

# model.add(Conv2D(512,(3, 3)))
# model.add(BatchNormalization(axis=-1))
# model.add(Activation('relu'))
# model.add(Conv2D(512, (3, 3)))
# model.add(BatchNormalization(axis=-1))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())

# Fully connected layer
model.add(Dense(512))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(6))

model.add(Activation('softmax'))

### Model 2

In [4]:

model = Sequential()

model.add(Conv2D(32,kernel_size=3,activation='relu',input_shape=(128,128,1)))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(128,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(256,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(256,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(256,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

# model.add(Conv2D(512,kernel_size=3,activation='relu'))
# model.add(BatchNormalization())
# model.add(Conv2D(512,kernel_size=3,activation='relu'))
# model.add(BatchNormalization())
# model.add(Conv2D(512,kernel_size=5,strides=2,padding='same',activation='relu'))
# model.add(BatchNormalization())
# model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(6, activation='softmax'))


## Loading the preprocessed training data

In [5]:
data = []

for i in range(1,14891):
    data.append(cv2.imread('v9/'+str(i)+'.png',0))

data = np.array(data)
data = data.astype(np.float16)
data /= 255

In [6]:
data = np.reshape(data,(14890,128,128,1))

## Loading the labels for all the original as well as augmented images used for training

In [7]:
lab = pd.read_csv('v9.csv')
y = [i-1 for i in lab['category']]

#y = [i-1 for i in train['category']]

number_of_classes = 6

Y = np_utils.to_categorical(y, number_of_classes)

#Y = np.concatenate((Y,Y),axis=0)



In [9]:
X_val.shape

(2978, 128, 128, 1)

## Preparing the training and validation dataset

In [8]:
#X_train,y_train,X_val,y_val = np.concatenate((data[:4000],data[6000:14000]),axis=0),np.concatenate((Y[:4000],Y[6000:14000]),axis=0),np.concatenate((data[4000:6000],data[14000:]),axis=0),np.concatenate((Y[4000:6000],Y[14000:]),axis=0)
#X_train,y_train,X_val,y_val = np.concatenate((data[:4500],data[5500:]),axis=0),np.concatenate((Y[:4500],Y[5500:]),axis=0),data[4500:5500],Y[4500:5500]

X_train,X_val,y_train,y_val = train_test_split(data,Y,test_size=0.2,random_state=42)

## Defining the optimizer for training

In [23]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

#Adding Callbacks and learning rate reductions
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=1, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [24]:
#learning_rate = 0.1
#epochs = 30
#decay_rate = learning_rate / epochs
#momentum = 0.8
#sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=True)

## Compiling the model

In [25]:
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


## Making batches for training

In [26]:
#gen = ImageDataGenerator(rotation_range=5, width_shift_range=0.08,
                         #height_shift_range=0.08, zoom_range=0.08)

gen = ImageDataGenerator()

test_gen = ImageDataGenerator()

train_generator = gen.flow(X_train, y_train, batch_size=32)
val_generator = test_gen.flow(X_val, y_val, batch_size=32)

## Training


The results obtained below are trained on model that had already been trained on model 2 for 30 epochs.

In [27]:
model.fit_generator(train_generator, steps_per_epoch=11912//32, epochs=30, validation_data=val_generator, validation_steps=2978//32, callbacks=[learning_rate_reduction])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fe73d5acfd0>

In [18]:
#model.save('aall_m100_99_56_3v9_2.h5')

In [82]:
#new_model = load_model('aall_m100_99_49_3v9_1.h5')

# Testing

## Loading preprocessed test data

In [19]:
test = []

for i in range(1,40001):
    test.append(cv2.imread('test_v9/'+str(i)+'.png',0))
    
test = np.array(test)
test = test.astype(np.float16)
test /= 255

## Making predictions

We take 7 best models and perform a majority voting ensemble for our final submission.

In [28]:
model1 = load_model('aall_m100_99_56_3v9_2.h5')
model2 = load_model('aall_m100_99_52_3v9_1.h5')
model3 = load_model('aall_m100_99_49_3v9_1.h5')
model4 = load_model('aall_m100_99_42_3v9_1.h5')
model5 = load_model('aall_m100_99_29_3v9_1.h5')
model6 = load_model('aall_m100_99_39_3v9_2.h5')
model7 = load_model('aall_m100_99_25_3v9_2.h5')

In [29]:
test = np.reshape(test,(40000,128,128,1))

a = []

prediction1 = model1.predict_classes(test)
prediction1 += 1
a.append(prediction1)

prediction2 = model2.predict_classes(test)
prediction2 += 1
a.append(prediction2)

prediction3 = model3.predict_classes(test)
prediction3 += 1
a.append(prediction3)

prediction4 = model4.predict_classes(test)
prediction4 += 1
a.append(prediction4)

prediction5 = model5.predict_classes(test)
prediction5 += 1
a.append(prediction5)

prediction6 = model6.predict_classes(test)
prediction6 += 1
a.append(prediction6)

prediction7 = model7.predict_classes(test)
prediction7 += 1
a.append(prediction7)



## Majority Voting Ensemble for 7 models

In [39]:
a = np.array(a)
a = a.T

final = []
for i in a:
    d = {}
    for j in i:
        if j in d:
            d[j]+=1
            continue
        d[j] = 1
    #if len(d)>1:
        #print(i)
    final.append(max(d))


## Preparing submission

In [34]:
ID = list(range(1,40001))

In [41]:
len(final)

40000

In [42]:
sub = pd.DataFrame({
    'id' : ID,
    'category' : final
})
sub[['id','category']].to_csv('sub_ensemble.csv',index=False)