# Introduction

This notebook contains **my first attempt at building a neural network!** I may update the presentation from time to time but I will leave the original architecture in place so I can use it to compare my progress on computer vision in the future.

The objective is to classify hand written digits through the famous MNIST dataset. 

# Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks

# Data

In [None]:
# Training data
train_data=pd.read_csv('../input/digit-recognizer/train.csv')

# Test data scaled to lie in [0,1]
test_data=pd.read_csv('../input/digit-recognizer/test.csv')/255

# Shape and preview
print(train_data.shape)
train_data.head()

**Check for null values**

In [None]:
print('Number of null values in training set:',train_data.isnull().sum().sum())
print('')
print('Number of null values in test set:',test_data.isnull().sum().sum())

**Preview first few images**

In [None]:
# Figure size
plt.figure(figsize=(8,8))

# Subplot 
for i in range(9):
    img = np.asarray(train_data.iloc[i+18,1:].values.reshape((28,28))/255);
    ax=plt.subplot(3, 3, i+1)
    ax.grid(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.title.set_text(f'{train_data.iloc[i+18,0]}')
    plt.imshow(img, cmap='gray')
    
plt.show()

**Explore label distribution**

In [None]:
sns.countplot(x='label', data=train_data)
plt.title('Distribution of labels in training set')

**Labels and features**

In [None]:
# Labels
y=train_data.label

# One-hot encoding of labels
y=pd.get_dummies(y)

# Features scaled to lie in [0,1]
X=train_data.drop('label', axis=1)/255

**Break off validation set**

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.9, test_size=0.1, random_state=0)

# Model

In [None]:
model = keras.Sequential([
    
    # hidden layer 1
    layers.Dense(units=256, activation='relu', input_shape=[784]),
    layers.Dropout(rate=0.4),
    
    # hidden layer 2
    layers.Dense(units=256, activation='relu'),
    layers.Dropout(rate=0.4),
    
    # output layer (softmax returns a probability distribution)
    layers.Dense(units=10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.0001,
    restore_best_weights=True,
)

# Train model

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=256,
    epochs=100,
    callbacks=[early_stopping],
    verbose=True
)

**Learning curves**

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['categorical_accuracy', 'val_categorical_accuracy']].plot(title="Accuracy")

print('Final accuracy on validation set:', history_df.loc[len(history_df)-1,'val_categorical_accuracy'])

# Make predictions

In [None]:
# Predictions
preds = model.predict(test_data)

# Confidence
conf = np.max(preds, axis=1)

# Retrieve most likely classes
pred_classes = np.argmax(preds,axis=1)

# Save predictions to file
output = pd.DataFrame({'ImageId': test_data.index+1, 'Label': pred_classes})

# Check format
output.head()

**Plot predictions**

In [None]:
# Plot some model predictions
plt.figure(figsize=(15,4))
plt.suptitle('Model predictions', fontsize=20, y=1.05)

# Subplot
for i in range(20):
    img = test_data.iloc[i,:].values.reshape((28,28))/255;
    ax=plt.subplot(2, 10, i+1)
    ax.grid(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title(f'Pred:{pred_classes[i]} \n Conf:{np.round(100*conf[i],1)}', fontdict = {'fontsize':14})
    plt.imshow(img, cmap='gray')
    
plt.show()

**Submit predictions**

In [None]:
# Output to csv
output.to_csv('submission.csv', index=False)