In [None]:
import pandas as pd
import numpy as np

In [None]:
pd_train = pd.read_csv('../input/digit-recognizer/train.csv')


x_train = pd_train.drop(['label'], axis=1)
y_train = pd_train['label']



In [None]:
import matplotlib.pyplot as plt

x_train_vis = np.array(x_train).reshape(x_train.shape[0], 28, 28)

fig, axis = plt.subplots(1, 4, figsize=(20, 10))
for i, ax in enumerate(axis.flat):
    ax.imshow(x_train_vis[i], cmap='binary')
    digit = y_train[i]
    ax.set(title = f"Real Number is {digit}");

In [None]:
x_train = x_train / 255.0


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size=0.2, shuffle=True, stratify=y_train, random_state=26)

In [None]:
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import Adam, SGD, Adagrad, Adadelta, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dropout, Flatten,Activation , Dense

In [None]:
X_train = np.array(X_train).reshape(-1,28,28,1)
X_val = np.array(X_val).reshape(-1,28,28,1)
Y_train = np_utils.to_categorical(np.array(Y_train))
Y_val = np_utils.to_categorical(np.array(Y_val))

In [None]:
# This step is data augmentation. In order to prevent overfitting of the model, we do some adjustment to the training image
#so that it dosent over fit
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.01, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

train_gen = datagen.flow(X_train, Y_train, batch_size=32)
test_gen = datagen.flow(X_val, Y_val, batch_size=32)

In [None]:
model=Sequential()

#model.add(Lambda(standardize,input_shape=(28,28,1)))    
model.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu", input_shape=(28,28,1)))
model.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))
model.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())    

model.add(Conv2D(filters=256, kernel_size = (3,3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
    
model.add(Flatten())
model.add(Dense(512,activation="relu"))
    
model.add(Dense(10,activation="softmax"))
    
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
model_path = './model_out.h5'
checkpointer = ModelCheckpoint(
    filepath=model_path,
    monitor='val_accuracy',
    verbose=1,
    mode='max',
    save_best_only=True)


In [None]:
model.compile(optimizer=Adam(learning_rate=0.001),loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit_generator(train_gen, 
                              epochs = 100, 
                              validation_data = test_gen,
                              callbacks = [checkpointer])

# Testing resuming training from saved model

In [None]:
from tensorflow.keras.models import load_model
model = load_model('model_out.h5')

In [None]:
model.summary()

In [None]:
history_2 = model.fit_generator(train_gen, 
                              initial_epoch = 100,
                              epochs = 200, 
                              validation_data = test_gen,
                              callbacks = [checkpointer])

# Doing the submission stuff

In [None]:
pd_test = pd.read_csv('../input/digit-recognizer/test.csv')
x_test = pd_test
x_test = x_test/255.0
X_test = np.array(x_test).reshape(-1,28,28,1)

In [None]:
model = load_model('./model_out.h5')
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred,1)
pd_submit = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
pd_submit['Label'] = y_pred
pd_submit.to_csv('submit.csv', header=True, index=False)

# Trying out different CNN model (Modified LeNet-5)

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Activation, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import math

In [None]:
pd_train = pd.read_csv('../input/digit-recognizer/train.csv')


x_train = pd_train.drop(['label'], axis=1)
y_train = pd_train['label']



In [None]:
import matplotlib.pyplot as plt

x_train_vis = np.array(x_train).reshape(x_train.shape[0], 28, 28)

fig, axis = plt.subplots(1, 4, figsize=(20, 10))
for i, ax in enumerate(axis.flat):
    ax.imshow(x_train_vis[i], cmap='binary')
    digit = y_train[i]
    ax.set(title = f"Real Number is {digit}");

In [None]:
x_train = x_train / 255.0


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size=0.2, shuffle=True, stratify=y_train, random_state=26)

In [None]:
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import Adam, SGD, Adagrad, Adadelta, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dropout, Flatten,Activation , Dense

In [None]:
X_train = np.array(X_train).reshape(-1,28,28,1)
X_val = np.array(X_val).reshape(-1,28,28,1)
Y_train = np_utils.to_categorical(np.array(Y_train))
Y_val = np_utils.to_categorical(np.array(Y_val))

In [None]:
# This step is data augmentation. In order to prevent overfitting of the model, we do some adjustment to the training image
#so that it dosent over fit
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.01, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

train_gen = datagen.flow(X_train, Y_train, batch_size=32)
test_gen = datagen.flow(X_val, Y_val, batch_size=32)

In [None]:
def LeNet5v2(input_shape=(28,28,1), classes = 10):
    """
    Implementation of a modified LeNet-5.
    Only those layers with learnable parameters are counted in the layer numbering.
    
    Arguments:
    input_shape -- shape of the images of the dataset
    classes -- integer, number of classes

    Returns:
    model -- a Model() instance in Keras
    """
    
    model = Sequential([
        
    # Layer 1
    Conv2D(filters = 32, kernel_size = 5, strides = 1, activation = 'relu', input_shape=(28,28,1), kernel_regularizer=l2(0.0005), name = 'convolution_1'),
    
    # Layer 2
    Conv2D(filters = 32, kernel_size = 5, strides = 1, name = 'convolution_2', use_bias=False),
    
    # Layer 3    
    BatchNormalization(name = 'batchnorm_1'),
        
    # -------------------------------- #  
    Activation("relu"),
    MaxPooling2D(pool_size = 2, strides = 2, name = 'max_pool_1'),
    Dropout(0.25, name = 'dropout_1'),
    # -------------------------------- #  
        
    # Layer 3
    Conv2D(filters = 64, kernel_size = 3, strides = 1, activation = 'relu', kernel_regularizer=l2(0.0005), name = 'convolution_3'),
        
    # Layer 4
    Conv2D(filters = 64, kernel_size = 3, strides = 1, name = 'convolution_4', use_bias=False),
        
    # Layer 5
    BatchNormalization(name = 'batchnorm_2'),
        
    # -------------------------------- #  
    Activation("relu"),
    MaxPooling2D(pool_size = 2, strides = 2, name = 'max_pool_2'),
    Dropout(0.25, name = 'dropout_2'),
    Flatten(name = 'flatten'),
    # -------------------------------- #  
        
    # Layer 6
    Dense(units = 256, name = 'fully_connected_1', use_bias=False),
        
    # Layer 7
    BatchNormalization(name = 'batchnorm_3'),
    
    # -------------------------------- #  
    Activation("relu"),
    # -------------------------------- #  
        
    # Layer 8
    Dense(units = 128, name = 'fully_connected_2', use_bias=False),
        
    # Layer 9
    BatchNormalization(name = 'batchnorm_4'),
        
    # -------------------------------- #  
    Activation("relu"),
    # -------------------------------- #  
        
    # Layer 10
    Dense(units = 84, name = 'fully_connected_3', use_bias=False),
        
    # Layer 11
    BatchNormalization(name = 'batchnorm_5'),
        
    # -------------------------------- #  
    Activation("relu"),
    Dropout(0.25, name = 'dropout_3'),
    # -------------------------------- #  

    # Output
    Dense(units = 10, activation = 'softmax', name = 'output')
        
    ])
    
    model._name = 'LeNet5v2'

    return model

In [None]:
LeNet5Model = LeNet5v2(input_shape = (28, 28, 1), classes = 10)

In [None]:
LeNet5Model.summary()

In [None]:
variable_learning_rate  = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001, verbose=1)

model_path = './model_out.h5'
checkpointer = ModelCheckpoint( filepath=model_path, monitor='val_accuracy', verbose=1, mode='max', save_best_only=True)



In [None]:
LeNet5Model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = LeNet5Model.fit_generator(train_gen, epochs = 100, callbacks = [variable_learning_rate , checkpointer], validation_data = test_gen)


In [None]:
variable_learning_rate  = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=5, min_lr=0.00001, verbose=1)

model_path = './model_out_loss.h5'
checkpointer = ModelCheckpoint( filepath=model_path, monitor='val_loss', verbose=1, mode='min', save_best_only=True)

model_path_2 = './model_out.h5'
checkpointer_2 = ModelCheckpoint( filepath=model_path_2, monitor='val_accuracy', verbose=1, mode='max', save_best_only=True)


In [None]:
pd_test = pd.read_csv('../input/digit-recognizer/test.csv')
x_test = pd_test
x_test = x_test/255.0
X_test = np.array(x_test).reshape(-1,28,28,1)

In [None]:
from tensorflow.keras.models import load_model
model = load_model('./model_out.h5')
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred,1)
pd_submit = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
pd_submit['Label'] = y_pred
pd_submit.to_csv('submit.csv', header=True, index=False)