# Part 1 - Baseline

In [None]:
import numpy as np
import pandas as pd

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
train.tail()

In [None]:
test = pd.read_csv('../input/digit-recognizer/test.csv')
test.tail()

> How to make a dataset?

In [None]:
X = train.drop('label', axis=1)
Y = train['label']
X_test = test

> How to chose an algorithm? (no way)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection

In [None]:
%%time
random_forest = RandomForestClassifier()

# cross validation
scores = model_selection.cross_val_score(random_forest, X, Y, cv=5, scoring='accuracy')
print(scores)
print("Kfold on RandomForestClassifier: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std()))

random_forest.fit(X, Y)
random_forest.score(X, Y)

> Can we trust to validation score?

In [None]:
Y_pred = random_forest.predict(X_test)

submission = pd.concat([
    pd.Series(range(1,28001), name="ImageId"), 
    pd.Series(Y_pred, name="Label")
], axis = 1)
submission.to_csv("rf.csv", index=False) # 0.96517

> Is it good enough?

# Part 2 - Neural networks

In [None]:
X = train.drop('label', axis=1)
Y = train['label']
X_test = test
X.head()

> How to interpret an array of numbers as an image?

## Step 1 - Reshape

In [None]:
X = X.values.reshape(-1,28,28,1)
X_test = X_test.values.reshape(-1,28,28,1)
X.shape

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(20,12))
for i in range(40):
    plt.subplot(5,8,1+i)
    plt.title(Y[i])
    plt.imshow(X[i], 'gray')

> What do neural networks like?

## Step 2 - Rescale

In [None]:
X = X / 255
X_test = X_test / 255

## Step 3 - One-hot encoding

In [None]:
Y = np.array(pd.get_dummies(Y))

> How to control randoms?

In [None]:
import numpy as np
import tensorflow as tf
import random as python_random

seed=0
np.random.seed(seed)
python_random.seed(seed)
tf.random.set_seed(seed)

## LeNet-5
[Gradient-based learning applied to document recognition](https://ieeexplore.ieee.org/document/726791 ) Yann LeCun et al. 1998 

In [None]:
from tensorflow import keras

In [None]:
def LeNet5(input_shape=(28,28,1)):
    model = keras.Sequential([
        keras.Input(shape=input_shape), # shape=(28,28,1)
        keras.layers.Conv2D(filters=6, kernel_size=(5,5), padding='same', activation='sigmoid'), # shape=(28,28,6)
        keras.layers.AveragePooling2D(pool_size=(2,2), strides=(2,2), padding='valid'), # shape=(14,14,6)
        keras.layers.Conv2D(filters=16, kernel_size=(5,5), padding='valid', activation='sigmoid'), # shape=(10,10,16)
        keras.layers.AveragePooling2D(pool_size=(2,2), strides=(2,2), padding="valid"), # shape=(5,5,16)
        keras.layers.Flatten(), # shape=(400,1)
        keras.layers.Dense(units=120, activation='sigmoid'), # shape=(120,1)
        keras.layers.Dense(units=84, activation='sigmoid'),# shape=(84,1)
        keras.layers.Dense(units=10, activation='softmax') # shape=(10,1)
    ])
    model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['categorical_accuracy'])    
    return model

model = LeNet5()

> How to train it?

In [None]:
X_train, X_valid, Y_train, Y_valid = model_selection.train_test_split(X, Y, train_size=0.8)
X_train.shape, X_valid.shape

> early stopping

> batch size

> epoch

In [None]:
%%time
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.0001,
    restore_best_weights=True,
)

history = model.fit(
    X_train, Y_train,
    validation_data=(X_valid, Y_valid),
    batch_size=32,
    epochs=200,
    callbacks=[early_stopping],
    verbose=True
)

> How many epochs have passed?

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['categorical_accuracy', 'val_categorical_accuracy']].plot(title="Accuracy");

In [None]:
Y_pred = model.predict(X_test)
Y_pred_cat = np.argmax(Y_pred, axis=1)

submission = pd.concat([
    pd.Series(range(1,28001), name="ImageId"), 
    pd.Series(Y_pred_cat, name="Label")
], axis = 1)
submission.to_csv("lenet5.csv", index=False) # 0.97460

> How to improve the result?

# Part 3  - Data augmentation

In [None]:
def augment_images(data, labels, copies = 1, rotation=True, shear=True, shift=True, zoom=True):
    augmented_data = []
    augmented_labels = []
    
    for i in range (0, labels.shape[0]):
        augmented_data.append(data[i])
        augmented_labels.append(labels[i])
        
        for j in range(1, copies):            
            img = data[i]
            lbl = labels[i]
                        
            if (rotation):
                img = keras.preprocessing.image.random_rotation(img, 15, row_axis=0, col_axis=1, channel_axis=2)
            if (shear):
                img = keras.preprocessing.image.random_shear(img, 0.1, row_axis=0, col_axis=1, channel_axis=2)
            if (shift):
                img = keras.preprocessing.image.random_shift(img, 0.15, 0.15, row_axis=0, col_axis=1, channel_axis=2)
            if (zoom):
                img = keras.preprocessing.image.random_zoom(img, (0.9, 1.1), row_axis=0, col_axis=1, channel_axis=2)
            
            augmented_data.append(img)
            augmented_labels.append(lbl)
    
    return np.array(augmented_data), np.array(augmented_labels)

In [None]:
imgs, lbls = augment_images(X[:10], Y[:10], copies=10)

plt.figure(figsize=(20,20))
for i in range(len(imgs)):
    plt.subplot(10,10,1+i)
    plt.imshow(imgs[i], cmap='gray')

In [None]:
%%time
X2, Y2 = augment_images(X, Y, copies=7)

In [None]:
X_train, X_valid, Y_train, Y_valid = model_selection.train_test_split(X2, Y2, train_size=0.8)
X_train.shape, X_valid.shape

In [None]:
%%time
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.0001,
    restore_best_weights=True,
)

history = model.fit(
    X_train, Y_train,
    validation_data=(X_valid, Y_valid),
    batch_size=32,
    epochs=200,
    callbacks=[early_stopping],
    verbose=True
)

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['categorical_accuracy', 'val_categorical_accuracy']].plot(title="Accuracy");

> What would happend without early stopping?

In [None]:
Y_pred = model.predict(X_test)
Y_pred_cat = np.argmax(Y_pred, axis=1)

submission = pd.concat([
    pd.Series(range(1,28001), name="ImageId"), 
    pd.Series(Y_pred_cat, name="Label")
], axis = 1)
submission.to_csv("lenet5_aug.csv", index=False) # 0.99046

# Part 4 - Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
Y_valid_pred = model.predict(X_valid)

A = np.argmax(Y_valid_pred,axis = 1)
B = np.argmax(Y_valid,axis = 1)

confusion_mtx = confusion_matrix(A, B) 
f,ax = plt.subplots(figsize=(10, 10))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Blues", fmt= '.0f',ax=ax)
plt.show()