# NOTE  
### 7 models. Starting with a basic model and progressing to more deeper models. 
### Did not consider filtering out the models which failed to give best result as a depiction of the learning process, this being my first CNN project. 
### First 2 models are rather shallow and don't capture the intricacies of the image well Next is a deeper model with BatchNormalisation and Dropout. Using transferlearning, LeNet50 and ResNet50 (with imagenet weights) models are implemented taking on-the-fly augmented data, some training only selected layers at the end of the model. 
### Lastly, a deep CNN model taking in augmented data and working with decaying learning rate which gave the best result.
### The epochs for the first 6 models are **very low as they were not the best performing models**. Hence their graphs do not really do justice to the actual performance due to less plotting points.

## To skip to the best performing model jump to **Cell 44**.

#### If this helps you in learning, an upvote would be huge! 

In [None]:
import pandas as pd
import numpy as np
import cv2
from PIL import Image
import glob
import keras as k
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Flatten
from keras.layers import Dense 
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import AveragePooling2D
from keras.optimizers import Adam
from keras.layers import BatchNormalization
from keras.layers import Dropout 
from keras.layers import Activation
import keras
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50
from numpy import loadtxt
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau
import tensorflow as tf

In [None]:
train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [None]:
ytrain = train['label']
xtrain = train.drop(labels = ["label"],axis = 1) 

In [None]:
xtrain = xtrain / 255.0
test = test / 255.0

xtrain = xtrain.values.reshape(-1,28,28,1)                         #Reshaping into size(examples, height, width, channel)
xtest = test.values.reshape(-1,28,28,1)

In [None]:
ytrain = np.asarray(tf.one_hot(ytrain, 10, axis = -1))       #One-Hot encoding

In [None]:
print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(xtrain, ytrain, test_size=0.1, random_state=42)
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

### We will use the below defined function after making prediction from the trained model, to create the final submission file and visualising the model performance.

In [None]:
def submission_and_visualization(final_predictions, model_number, model_history):
    y = final_predictions.copy()
    y= np.argmax(y, axis = 1)
    y.reshape(28000,1);

    col1 = np.arange(42000,70000,1)
    col2 = y.copy()

    final = np.stack((col1, col2), axis = 1)
    finaldf = pd.DataFrame(data=final)
    finaldf.rename(columns = {0:'filename', 1:'label'} , inplace = True)

    name = []
    for i in finaldf['filename']:
        i = str(i)+".png"
        name.append(i)

    finaldf['filenames'] = name
    finaldf['filename']=finaldf['filenames']
    del finaldf['filenames']
    
    submission_file = "Solution"+str(model_number)+".csv"
    finaldf.to_csv(submission_file, index=False)

    plt.plot(model_history.history['loss'], label='Train loss')
    plt.plot(model_history.history['val_loss'], label='Validation loss')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Graph")
    plt.legend()
    plt.show()
    plt.plot(model_history.history['accuracy'], label = 'Train accuracy')
    plt.plot(model_history.history['val_accuracy'], label = 'Validation accuracy')
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Accuracy Graph")
    plt.legend()
    plt.show()
    
    return 

# CNN Models :

## Model 1 
### Basic model without data augmentation, dropout, batch normalisation etc. 

In [None]:
model1 = Sequential()
model1.add(Conv2D(16, (3,3), activation = 'relu', kernel_initializer = 'he_uniform', input_shape = (28,28,1)))
model1.add(MaxPooling2D((2,2)))
model1.add(Conv2D(32, (3,3), activation = 'relu', kernel_initializer = 'he_uniform'))
model1.add(MaxPooling2D((2,2)))
model1.add(Flatten())
model1.add(Dense(100, activation = 'relu', kernel_initializer = 'he_uniform'))
model1.add(Dense(64, activation = 'relu', kernel_initializer = 'he_uniform'))
model1.add(Dense(10, activation = 'softmax'))
model1.compile(optimizer = Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history1 = []                  #Using callback to store history of accuracies and losses through and with which model proggresses. 
history1 = model1.fit(x_train, y_train, batch_size=32, epochs = 10, validation_data=(x_val, y_val),  callbacks=[history1]) 

In [None]:
ytest1 = model1.predict(xtest)

## Visualisation of train and developer sets' performance on model1 for each epoch.

In [None]:
submission_and_visualization(ytest1, 1, history1)

## Model 2
### Deeper model with Batch Normalisation. 

In [None]:
model2 = Sequential()
model2.add(Conv2D(16, (3,3), activation = 'relu', kernel_initializer = 'he_uniform', input_shape = (28,28,1)))
model2.add(MaxPooling2D((2,2)))
model2.add(BatchNormalization())
model2.add(Conv2D(32, (3,3), activation = 'relu', kernel_initializer = 'he_uniform'))
model2.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same' , kernel_initializer = 'he_uniform'))
model2.add(BatchNormalization())
model2.add(MaxPooling2D((2,2)))
model2.add(Flatten())
model2.add(Dense(100, activation = 'relu', kernel_initializer = 'he_uniform'))
model2.add(Dense(64, activation = 'relu', kernel_initializer = 'he_uniform'))
model2.add(Dense(10, activation = 'softmax'))
opt = Adam(learning_rate= 0.001)
model2.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history2 = []
history2 = model2.fit(x_train, y_train, batch_size=32, epochs = 10, validation_data=(x_val, y_val),  callbacks=[history2])

In [None]:
ytest2 = model2.predict(xtest)

## Visualisation of train and developer sets' performance on model2 for each epoch.

In [None]:
submission_and_visualization(ytest2, 2, history2)

## Model 3 :
### Deeper model with Batch Normalisation, Dropout and Adam optimizer.
### Since this was the most promising, it was trained for 50 epochs.

In [None]:
model3 = Sequential()
model3.add(Conv2D(32, (3, 3), kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model3.add(MaxPooling2D((2, 2)))
model3.add(Activation('relu'))
model3.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model3.add(Conv2D(64, (3, 3), kernel_initializer='he_uniform'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Activation('relu'))
model3.add(BatchNormalization())
model3.add(Flatten())
model3.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model3.add(BatchNormalization())
model3.add(Dense(10, activation='softmax'))
model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history3 = []
history3 = model3.fit(x_train, y_train, batch_size = 32, epochs = 5, validation_data=(x_val, y_val),  callbacks=[history3])

In [None]:
ytest3 = model3.predict(xtest)

## Visualisation of train and developer sets' performance on model3 for each epoch.

In [None]:
submission_and_visualization(ytest3, 3, history3)

## LeNet50
### Model using LeNet50 architecture, without any changes.

In [None]:
model4 = Sequential()
model4.add(Conv2D(6, (5,5), activation = 'tanh', input_shape = (28,28,1), padding = 'same'))
model4.add(AveragePooling2D())
model4.add(Conv2D(16, (5,5), activation = 'tanh', padding = 'valid'))
model4.add(AveragePooling2D())
model4.add(Flatten())
model4.add(Dense(120, activation = 'tanh'))
model4.add(Dense(84, activation = 'tanh'))
model4.add(Dense(10, activation = 'sigmoid'))
model4.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'] )

In [None]:
history4 = []
history4 = model4.fit(x_train, y_train, batch_size = 32, epochs = 10, validation_data=(x_val, y_val),  callbacks=[history3])

In [None]:
ytest4 = model4.predict(xtest)

## Visualisation of train and developer sets' performance on model4 for each epoch.

In [None]:
submission_and_visualization(ytest4, 4, history4)

## Transfer learning using ResNet50

### To use ResNet50 we need to do some preprocessing on the datasets again because
### 1. The network can take the input image having height and width as multiples of 32.
### 2. The network takes 3 as channel width.

In [None]:
train_examples = xtrain.shape[0]        #Number of training examples
test_examples = xtest.shape[0]          #Number of test examples 

ResTrain = np.zeros((train_examples, 32,32,3))          
ResTest = np.zeros((test_examples, 32,32,3))

for example in range(train_examples):
    ResTrain[example,:28,:28,0] = xtrain[example, :].reshape(28,28)
    ResTrain[example,:28,:28,1] = xtrain[example, :].reshape(28,28)
    ResTrain[example,:28,:28,2] = xtrain[example, :].reshape(28,28)

for example in range(test_examples):
    ResTest[example,:28,:28,0] = xtest[example, :].reshape(28,28)
    ResTest[example,:28,:28,1] = xtest[example, :].reshape(28,28)
    ResTest[example,:28,:28,2] = xtest[example, :].reshape(28,28)

In [None]:
for example in range(train_examples):
    ResTrain[example] = cv2.resize(ResTrain[example], (32, 32))
    
for example in range(test_examples):
    ResTest[example] = cv2.resize(ResTest[example], (32, 32))

In [None]:
print("Shape of train input images : ", ResTrain.shape)
print("Shape of test input images : ", ResTest.shape)
print("Shape of train labels : ", ytrain.shape)
print("We have processed the input data to be fed into ResNet50.")

### Data Augmentation :

In [None]:
train_datagen = ImageDataGenerator(width_shift_range=.3, height_shift_range=.2,
                                   shear_range=.3, rotation_range=25, zoom_range=.2)
                                   
validation_datagen = ImageDataGenerator()

### Loading ResNet50 model with weights trained on ImageNet data, as a base model.

In [None]:
base_model = ResNet50(weights='imagenet', input_shape=(32,32,3),include_top=False)

## Model 5 is a model built on top of ResNet50, **where all the layers are trained.** 

In [None]:
#Appeding layers to the base model(ResNet50)
model5 = Sequential()
model5.add(base_model)
model5.add(Flatten())
model5.add(Dense(units=60, activation='relu'))
model5.add(Dense(units=10, activation='softmax'))

In [None]:
model5.summary()

In [None]:
model5.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
train_images, val_images, train_labels, val_labels = train_test_split(ResTrain, ytrain, test_size=0.1, shuffle=True, random_state=42)
test_images = ResTest

In [None]:
class myCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if (logs.get('accuracy') > 0.999999):
            print("Stop training!")
            self.model.stop_training = True
callbacks = myCallback()

In [None]:
batch_size = 212
history5 = model5.fit(train_datagen.flow(train_images,train_labels, batch_size=batch_size),
                         steps_per_epoch=train_images.shape[0] / batch_size, 
                         epochs=10,    
                         validation_data=validation_datagen.flow(val_images,val_labels,
                                                                 batch_size=batch_size),
                         validation_steps=val_images.shape[0] / batch_size,
                         callbacks=[callbacks])

In [None]:
ytest5 = model5.predict(ResTest)

## Visualisation of train and developer sets' performance on model5 for each epoch.

In [None]:
submission_and_visualization(ytest5, 5, history5)

## Model 6 is a model built on top of ResNet50, where selected layers, at the end of the network architecture are trained. 

In [None]:
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(32,32,3))
x = base_model.output
x = Flatten()(x)
x = Dense(units=60, activation='relu')(x)
pred = Dense(units=10, activation='softmax')(x)

model6 = Model(inputs=base_model.input, outputs=pred)

model6.compile(optimizer='rmsprop', loss= 'categorical_crossentropy', metrics=['accuracy'])

for layer in base_model.layers:
    layer.trainable = False

In [None]:
history6 = []
history6 = model6.fit(train_datagen.flow(train_images,train_labels, batch_size=batch_size),
                         steps_per_epoch=train_images.shape[0] / batch_size, 
                         epochs=10,   
                         validation_data=validation_datagen.flow(val_images,val_labels,
                                                                 batch_size=batch_size),
                         validation_steps=val_images.shape[0] / batch_size,
                         callbacks=[history6])

In [None]:
for layer in base_model.layers[0:143]:
    layer.trainable = False
    
for layer in base_model.layers[143:]:
    layer.trainable = True

In [None]:
history6 = model6.fit(train_datagen.flow(train_images,train_labels, batch_size=batch_size),
                         steps_per_epoch=train_images.shape[0] / batch_size, 
                         epochs=5,    
                         validation_data=validation_datagen.flow(val_images,val_labels,
                                                                 batch_size=batch_size),
                         validation_steps=val_images.shape[0] / batch_size,
                         callbacks=[history6]) 

In [None]:
ytest6 = model6.predict(ResTest)

## Visualisation of train and developer sets' performance on model6 for each epoch.

In [None]:
submission_and_visualization(ytest6, 6, history6)

## Model 7 :

In [None]:
model = Sequential()
model.add(Conv2D(32, (4, 4), activation = 'relu', input_shape = (28, 28, 1)))
model.add(Conv2D(64, (3, 3)))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization(axis = -1))
      
model.add(Conv2D(128, (3, 3)))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
       
model.add(Conv2D(128, (2, 2), activation = 'relu'))
model.add(Conv2D(256, (2, 2)))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Activation('relu'))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
        
model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation = 'softmax'))

opt = Adam(lr = 0.002)
model.compile(optimizer = opt, loss = 'categorical_crossentropy',  metrics = ['accuracy'])

In [None]:
train_gen = ImageDataGenerator(rotation_range=.15, horizontal_flip=False, vertical_flip=False, 
                              shear_range=.1, width_shift_range=.1, height_shift_range=.1, zoom_range=.1)
val_gen = ImageDataGenerator()

train_img_gen = train_gen.flow(x_train, y_train, batch_size=32)
val_img_gen = val_gen.flow(x_val, y_val, batch_size=32)

In [None]:
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', patience = 3, verbose = 1, factor = 0.4, min_lr = 0.00002,
                                            mode = 'auto', cooldown = 0)

In [None]:
history = model.fit(train_img_gen, epochs = 80, validation_data=val_img_gen, verbose=1, callbacks=[reduce_lr])

In [None]:
ytest = model.predict(xtest)

In [None]:
submission_and_visualization(ytest, None, history)

In [None]:
# Index with the maximum probability to 1
results = np.argmax(ytest,axis = 1)

results = pd.Series(results,name="Label")

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("submission.csv",index=False)