Here is my first attempt to solve the [MNIST](https://www.kaggle.com/c/digit-recognizer) problem using convolutional Netvorks. Please PM me if you have any questions or suggestions, thanks!

## Importing and visualising

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

x_sub is the test data for predictions, that we will submit

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
x_sub = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
train.shape

In [None]:
x_sub.shape

In [None]:
x_sub = x_sub.values # Transforming x_sub into a NumPy array

In [None]:
y_train = train['label']

In [None]:
x_train = train.drop('label', axis=1).values

In [None]:
#Not a necessary step, but it`s nice to visualize the data and do a sanity-check
single_image = x_train[0]
single_image = single_image.reshape(28,28)
plt.imshow(single_image, cmap='gray') #Using gray colormap just because it looks neat

## PreProcessing data

### Normalizing

In [None]:
x_train[0].max()

In [None]:
x_train[0].min()

In [None]:
#Normalizing the data by dividing it by it`s max value. Notice that we do not normilize the labels 
x_train = x_train / 255
x_sub = x_sub / 255

In [None]:
x_train[1].max()

### One-hot encoding labels

* 0 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
* 1 = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] 
* 4 = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0] 
* https://machinelearningmastery.com/why-one-hot-encode-data-in-machine-learning/

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
y_train = to_categorical(y_train)

In [None]:
y_train[0]

In [None]:
y_train.shape

### Reshaping

Right now our training data is 42,000 images stored in 28 by 28 pixel array formation.

We need to add one more dimension to show we're dealing with 1 RGB channel (since technically the images are in black and white, only showing values from 0-255 on a single channel), a color image would have 3 dimensions.

In [None]:
x_train.shape

In [None]:
x_train = x_train.reshape(42000,28,28,1)

In [None]:
x_sub = x_sub.reshape(28000,28,28,1)

In [None]:
x_train.shape

## Creating the model

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
#Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.20, random_state=101)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten

In [None]:
model = Sequential()

#Convolutional layer
model.add(Conv2D(filters=32, kernel_size=(4,4), input_shape=(28,28,1), activation='relu'))
#Pooling level (2,2)
model.add(MaxPool2D())


#Flatten (28,28,1) to (784,)
model.add(Flatten())

#Single dense hidden layer of 128 neurons. 
model.add(Dense(units=128, activation='relu'))

#Output layer for 10 mutually exclusive classes
model.add(Dense(units=10, activation='softmax'))

#https://keras.io/metrics/
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy']
)

In [None]:
model.summary()

### Adding Early stopping and TensorBoard

In [None]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

In [None]:
#Training will be stopped if val_loss stops dropping for 2 epochs
early_stop = EarlyStopping(monitor='val_loss', patience=2, mode='min')

In [None]:
from datetime import datetime

In [None]:
#Saving the logs for the TensorBoard
date = datetime.now().strftime("%Y-%m-%d--%H%M")

In [None]:
log_directory = 'logs\\fit' + '\\' + date

board = TensorBoard(log_dir=log_directory,histogram_freq=1,
    write_graph=True,
    write_images=True,
    update_freq='epoch',
    profile_batch=2,
    embeddings_freq=1)

In [None]:
model.fit(X_train, y_train, epochs=15, validation_data=(X_val, y_val), callbacks=[board,early_stop])

## Evaluating the model

In [None]:
model.metrics_names

In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses.head()

In [None]:
losses[['accuracy','val_accuracy']].plot()

In [None]:
losses[['loss','val_loss']].plot()

## Making predictions to submit

This part is a bit messy, I could not come up with a better way to output the predictions

In [None]:
predictions = pd.DataFrame(model.predict_classes(x_sub))

In [None]:
predictions = predictions.reset_index()

In [None]:
predictions.columns = ['ImageId','Label']

In [None]:
predictions['ImageId'] = predictions['ImageId'] + 1

In [None]:
predictions.head()

In [None]:
predictions.to_csv(path_or_buf='results.csv', index=False )

 Use ***tensorboard --logdir logs\fit  ***
to run Tensorboard, 
then check http://localhost:6006/