**<center><font size=6>Digit Recognizer with CNN</font></center>**
***

**Date**: 19.01.2021

**Table of Contents**
- <a href='#intro'>1. The data</a> 
- <a href='#split'>2. Splitting the data</a>
- <a href='#fit'>3. Fitting and validating the models</a>
    - <a href='#relu_sigm'>3.1. NN with RELU and Sigmoid</a>
    - <a href='#relu_soft'>3.2. NN with RELU and Softmax</a>
    - <a href='#cnn'>3.3. CNN with RELU and Softmax</a>
    - <a href='#sum'>3.4. Summary</a>
- <a href='#perd'>4. Predicting with CNN</a>
- <a href='#sub'>5. Submitting the file</a>

# <a id='intro'>1. The data</a>

In [None]:
# Matplotlib config
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']
%config InlineBackend.rc = {'figure.figsize': (5.0, 3.0)}

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('../input/digit-recognizer/train.csv')
df_pred = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
print('All data set: ' + str(df.shape))
print('Prediction set: ' + str(df_pred.shape))

In [None]:
#The label from the first photo
df.iloc[0,0]

In [None]:
#Plot the first photo
plt.imshow(df.iloc[0,1:].to_numpy().reshape(28,28))

In [None]:
plt.figure(figsize=(5,5))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(df.iloc[i,1:].to_numpy().reshape(28,28))
    plt.xlabel([df.iloc[i,0]])
plt.show()

In [None]:
#Define the X and y variables, delete the lable columns from Xs
X = df.drop('label',axis=1)  
X_pred = df_pred

y = df['label']

print(X.head())
print(y.head())

In [None]:
print('All data set: ' + str(X.shape))
print('Prediction set: ' + str(X_pred.shape))

# <a id='split'>2. Splitting the data</a>

In [None]:
#Splitting the data

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 32)

print('Train set: ' + str(X_train.shape))
print('Test set: ' + str(X_test.shape))

print('Prediction set: ' + str(X_pred.shape))

In [None]:
#Change the type
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
X_pred = X_pred.astype(np.float32)

# <a id='fit'>3. Fitting and testing the models</a>

## <a id='relu_sigm'>3.1. NN with RELU and Sigmoid</a>

In [None]:
#Dataframes to numpy array
X_train_relu_sigm = X_train.to_numpy().reshape(-1, 784)
X_test_relu_sigm = X_test.to_numpy().reshape(-1, 784)
X_pred_relu_sigm = X_pred.to_numpy().reshape(-1, 784)

from tensorflow.keras.utils import to_categorical
y_train_relu_sigm = to_categorical(y_train)
y_test_relu_sigm = y_test

In [None]:
#Loading Tensorflow 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_relu_sigm = keras.Sequential([
    keras.Input(shape = (784,)),
    layers.Dense(1030, activation = "relu"),
    layers.Dense(10, activation = "sigmoid")
])

print(model_relu_sigm.summary())

model_relu_sigm.compile(
    optimizer = keras.optimizers.RMSprop(),
    loss = keras.losses.BinaryCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_relu_sigm.fit(X_train_relu_sigm, y_train_relu_sigm, 
          batch_size = 64, 
          epochs = 10,
         validation_split = 0.1
         )

In [None]:
#Train further with a smaller rate (0.0001)
model_relu_sigm.compile(
    optimizer = keras.optimizers.RMSprop(0.0001),
    loss = keras.losses.BinaryCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_relu_sigm.fit(X_train_relu_sigm, y_train_relu_sigm, 
          batch_size = 64, 
          epochs = 5,
          validation_split = 0.1
         )

In [None]:
y_test_pred_relu_sigm = model_relu_sigm.predict(X_test_relu_sigm.astype(np.float32))
A_relu_sigm = np.mean(np.argmax(y_test_pred_relu_sigm, axis = 1) == y_test_relu_sigm)
print('Accuracy of RELU Sigmoid NN: ' + str(A_relu_sigm))

## <a id='relu_soft'>3.2. NN with RELU and Softmax</a>

In [None]:
#Dataframes to numpy array
X_train_relu_soft = X_train.to_numpy().reshape(-1, 784)
X_test_relu_soft = X_test.to_numpy().reshape(-1, 784)
X_pred_relu_soft = X_pred.to_numpy().reshape(-1, 784)

from tensorflow.keras.utils import to_categorical
y_train_relu_soft = to_categorical(y_train)
y_test_relu_soft = y_test

In [None]:
#Loading Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_relu_soft = keras.Sequential([
    keras.Input(shape = (784,)),
    layers.Dense(1024, activation = "relu"),
    layers.Dense(10, activation = "softmax")
])

print(model_relu_soft.summary())

model_relu_soft.compile(
    optimizer = keras.optimizers.RMSprop(),
    loss = keras.losses.CategoricalCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_relu_soft.fit(X_train_relu_soft, y_train_relu_soft, 
                    batch_size = 64, 
                    epochs = 10, 
                    validation_split = 0.1
                   )

In [None]:
#Train further with a smaller rate (0.0001)
model_relu_soft.compile(
    optimizer = keras.optimizers.RMSprop(0.0001),
    loss = keras.losses.CategoricalCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_relu_soft.fit(X_train_relu_soft, y_train_relu_soft, 
                    batch_size = 64, 
                    epochs = 7, 
                    validation_split = 0.1,
                   )

In [None]:
y_test_pred_relu_soft = model_relu_soft.predict(X_test_relu_soft)
A_relu_soft = np.mean(np.argmax(y_test_pred_relu_soft, axis = 1) == y_test_relu_soft)
print('Accuracy of RELU Softmax NN: ' + str(A_relu_soft))

## <a id='cnn'>3.3. CNN with RELU and Softmax</a>

In [None]:
#Dataframes to numpy array, reshape
X_train_cnn = X_train.to_numpy().reshape(-1, 28, 28, 1)
X_test_cnn = X_test.to_numpy().reshape(-1, 28, 28, 1)
X_pred_cnn = X_pred.to_numpy().reshape(-1, 28, 28, 1)

In [None]:
print(X_train_cnn.shape)

In [None]:
#Normalize
X_train_cnn = X_train_cnn / 255
X_test_cnn = X_test_cnn / 255
X_pred_cnn = X_pred_cnn / 255

from tensorflow.keras.utils import to_categorical
y_train_cnn = to_categorical(y_train)
y_test_cnn = y_test

In [None]:
#Loading Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_cnn = keras.Sequential([
    keras.Input(shape = (28, 28, 1)),
    layers.Conv2D(50, kernel_size = (3, 3), padding = "same", activation = "relu"),
    layers.MaxPooling2D(),
    layers.Conv2D(50, kernel_size = (3, 3), padding = "same", activation = "relu"),
    layers.MaxPooling2D(),
    layers.Flatten(),
    #layers.Dropout(0.5),
    #layers.Dense(1000, activation = "relu"),
    layers.Dropout(0.5),
    layers.Dense(1000, activation = "relu"),
    layers.Dense(10, activation = "softmax")
])

print(model_cnn.summary())

In [None]:
model_cnn.compile(
    optimizer = keras.optimizers.RMSprop(),
    loss = keras.losses.CategoricalCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_cnn.fit(X_train_cnn, y_train_cnn, 
          batch_size = 64, 
          epochs = 15, 
          validation_split = 0.1
             )

In [None]:
#Train further with a smaller rate (0.0001)
model_cnn.compile(
    optimizer = keras.optimizers.RMSprop(0.0001),
    loss = keras.losses.CategoricalCrossentropy(),
    metrics = [
        keras.metrics.CategoricalAccuracy()
    ]
)

model_cnn.fit(X_train_cnn, y_train_cnn, 
          batch_size = 64, 
          epochs = 10, 
          validation_split = 0.1
             )

In [None]:
y_test_pred_cnn = model_cnn.predict(X_test_cnn)

A_cnn = np.mean(np.argmax(y_test_pred_cnn, axis = 1) == y_test_cnn)

print('Accuracy of CNN: ' + str(A_cnn))

## <a id='sum'>3.4. Summary</a>

In [None]:
relevant_metrics_test = pd.DataFrame({
    'Model': ['RELU Sigmoid NN', 'RELU Softmax NN', 'CNN'],
    'Accuracy, A': [A_relu_sigm, A_relu_soft, A_cnn]
})
best_model_test =relevant_metrics_test.sort_values(by='Accuracy, A', ascending=False)
best_model_test

# <a id='pred'>4. Predicting with CNN</a>

In [None]:
y_pred_cnn = model_cnn.predict(X_pred_cnn)

df_pred['label'] = np.argmax(y_pred_cnn, axis = 1)
df_pred['ImageId'] = range(1,len(y_pred_cnn)+1)

In [None]:
df_pred.head()

# <a id='sub'>5. Submitting the file</a>

In [None]:
df_sub = pd.DataFrame({'ImageId': df_pred['ImageId'], 'label': df_pred['label']})

df_sub.to_csv (r'submission_cezara.csv', index = False, header=True)