# Digit Recognizer

Hello everyone,

I'm going to implement a **neural network** using **PyTorch** to predict hand written digits.

In this notebook, I will **NOT** implement **Convolutional NN**. For now, my plan is using a simple model with an input layer, 2 hidden layers and an output layer. 

After finishing this project, i am also planning to implement CNN in another notebook.

# 1. Preparing Data

In this section, I will
* import packages
* import csv files
* check missing values
* plot some images
* create tensors and dataloaders

In [None]:
# importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset

In [None]:
train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")
train.head()

In [None]:
print("Checking if there are any missing values:")
print("Train: {}".format(train.isnull().sum().sum()))
print("Test: {}".format(test.isnull().sum().sum()))

In [None]:
# Splitting train data set into train and validation sets
# validation size is selected as 0.2
train_X, val_X, train_y, val_y = train_test_split(train.drop("label", axis=1), train["label"], test_size=0.20)

print("Shape of training set: {}".format(train_X.shape))
print("Shape of validation set: {}".format(val_X.shape))
print("Shape of test set: {}".format(test.shape))

In [None]:
#Lets plot some random images from training data and see the labels
indices = [42, 314, 2022, 33333]

f,ax = plt.subplots(1, len(indices))
for i in range(len(indices)):
    title = "Label: {}".format(train_y.iloc[indices[i]])
    ax[i].imshow( train_X.iloc[indices[i]].values.reshape(28,28) )
    ax[i].set_title(title)

In [None]:
# Converting train, validation and test image data into tensors
# dividing by 255 is for normalization
train_X_tensor = torch.tensor(train_X.values)/255.0
val_X_tensor = torch.tensor(val_X.values)/255.0
test_tensor = torch.tensor(test.values)/255.0

# Converting train and validation labels into tensors
train_y_tensor = torch.tensor(train_y.values)
val_y_tensor = torch.tensor(val_y.values)

# Creating train and validation tensors
train_tensor = TensorDataset(train_X_tensor, train_y_tensor)
val_tensor = TensorDataset(val_X_tensor, val_y_tensor)

In [None]:
# Defining the dataloaders
dataloaders = dict()
dataloaders["train"] = DataLoader(train_tensor, batch_size=32, shuffle=True)
dataloaders["val"] = DataLoader(val_tensor, batch_size=16, shuffle=True)
dataloaders["test"] = DataLoader(test_tensor, batch_size=16)

# 2. Model Initiation

In this section, I will define
* nn model
* validation function
* training function
* criterion and optimizer

In [None]:
# Creating neural network model:
#   Layers: one input, one output and 2 hidden
input_size = 784
output_size = 10
hidden_layers = [512,128]
p_drop = 0.25
model_recognizer = nn.Sequential(nn.Linear(input_size, hidden_layers[0]),
                                 nn.ReLU(),
                                 nn.Dropout(p=p_drop),
                                 nn.Linear(hidden_layers[0], hidden_layers[1]),
                                 nn.ReLU(),
                                 nn.Dropout(p=p_drop),
                                 nn.Linear(hidden_layers[1], output_size),
                                 nn.LogSoftmax(dim=1))
print(model_recognizer)

In [None]:
### VALIDATION FUNCTION
def validation(model, loader, criterion, device="cpu"):
    model.eval()
    loss = 0
    acc = 0
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            
            output = model.forward(images)
            loss += criterion(output,labels).item()
            
            probs = torch.exp(output)
            equality = (labels.data == probs.max(dim=1)[1])
            acc += equality.type(torch.FloatTensor).mean()
    res_loss = loss/len(loader)
    res_acc = (acc.item())/len(loader)
    return res_loss, res_acc

In [None]:
### TRAINING FUNCTION
def train_model(model, trainloader, validloader, criterion, optimizer, epochs=3, print_every=40, device="cpu"):
    model.to(device)
    
    train_loss_per_epoch = []
    valid_loss_per_epoch = []
    
    for e in range(epochs):
        running_loss = 0
        steps = 0
        model.train()
        
        for images, labels in trainloader:
            steps += 1
            images, labels = images.to(device), labels.to(device)
            
            # Training 
            optimizer.zero_grad()
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Printing current loss and accuracy
            if steps % print_every == 0:
                model.eval()
                valid_loss, valid_accuracy = validation(model, validloader, criterion, device)
                
                to_print = "Epoch: "+str(e+1)+" of "+str(epochs)
                to_print += "... Train Loss: {:.3f}".format(running_loss/print_every)
                to_print += "... Valid Loss: {:.3f}".format(valid_loss)
                to_print += "... Valid Accuracy: {:.3f}".format(valid_accuracy)
                print(to_print)
                
                running_loss = 0
                model.train()
        # at the end of each epoch calculate loss:
        train_loss, train_accuracy = validation(model, trainloader, criterion, device)
        valid_loss, valid_accuracy = validation(model, validloader, criterion, device)
        train_loss_per_epoch.append(train_loss)
        valid_loss_per_epoch.append(valid_loss)
    return train_loss_per_epoch, valid_loss_per_epoch

In [None]:
# Define criterion and optimizer:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model_recognizer.parameters(), lr=0.001)

# 3. Training the Model

In this section, I will
* TRAIN THE MODEL !!!

In [None]:
# "epoch" is the number of times that model will train the entire train dataset
# we have soooo many images, i don't want to print something on the screen for each batch
# so in every "print_every" steps, i will print accuracy and loss
# i selected device as cpu since the model is not too complicated
# gpu is an option for faster training (device="cuda")
train_loss, valid_loss = train_model(model = model_recognizer,
                                     trainloader = dataloaders["train"],
                                     validloader = dataloaders["val"],
                                     criterion = criterion,
                                     optimizer = optimizer,
                                     epochs = 8,
                                     print_every = 300,
                                     device ="cpu"
                                     )

In [None]:
print("Training loss in each epoch:")
print(train_loss)
print()
print("Validation loss in each epoch:")
print(valid_loss)

In [None]:
# I have trained the model for 16 epochs
# for future plots, I want to save these values in the variables below:
train_loss_16_epochs = [0.13787669561581598, 0.07582466917600306, 0.05383466555792395, 0.034825331529801974, 0.028129039008843913, 0.020988315712311304, 0.02332191020201814, 0.017009653160293098, 0.016772397063941753, 0.021548685492307415, 0.009186462120274947, 0.011247794017871716, 0.010653088813236254, 0.008269465251884347, 0.009493477538097289, 0.005730943549323689]
valid_loss_16_epochs = [0.17423813392363843, 0.12283760847795444, 0.11133580147956743, 0.09436281601324611, 0.10040175634666368, 0.09744789005078063, 0.1031095781708843, 0.09291011415699922, 0.10281138533085445, 0.12173442720597276, 0.10909194191648783, 0.10568566672965449, 0.1245951993337975, 0.11474877005043205, 0.12414950424937972, 0.11632858605553643]

In [None]:
# Lets plot training and validation loss
# I will use results of 16 epochs
train_loss = train_loss_16_epochs
valid_loss = valid_loss_16_epochs
x = np.arange(len(train_loss)) + 1 # epoch array

# finding minimum validation loss for annotation
valid_min = min(valid_loss)
valid_ind = valid_loss.index(valid_min)
x_min = x[valid_ind]
annot_text = "Min Valid Loss\n"
annot_text += "Loss: {:.4f}\n".format(valid_min)
annot_text += "Epoch: {}".format(x_min)

# Plot
plt.subplots(figsize=(10, 6))
plt.plot(x, train_loss, color="blue", lw=2, ls='-', marker='h', label = "Train Loss")
plt.plot(x, valid_loss, color="red",  lw=2, ls='-', marker='d', label = "Valid Loss")
plt.annotate(annot_text, xy=(x_min, valid_min), xytext=(x_min-2, valid_min+0.05),
             arrowprops=dict(arrowstyle="fancy"))
plt.legend()
plt.show()

In the training section, first I have trained my model for 16 epochs. By plotting the loss values of each epoch, I see that:
* Training loss keeps decreasing. It is expected since the model learns from train dataset.
* But validation loss stops decreasing and at some point starts increasing.
* That's because overfitting. The model learns too much about the training data.
* The minimum value for validation loss is after 8 epochs.
* As a result, i will train my model for 8 epochs and make a submission.

# 4. Prediction

In this section, I will
* define function for prediction
* predict the test dataset
* submit my results

In [None]:
### PREDICTION FUNCTION
def prediction(model, loader, device="cpu"):
    model.to(device)
    model.eval()
    preds_all = torch.LongTensor()
    
    with torch.no_grad():
        for images in loader:
            images = images.to(device)
            
            output = model.forward(images)            
            probs = torch.exp(output)
            pred = probs.to('cpu').max(dim=1)[1]
            preds_all = torch.cat((preds_all, pred), dim=0)
    return preds_all

In [None]:
y_pred = prediction(model_recognizer, dataloaders["test"])
y_pred

In [None]:
# Creating a dataframe for results
result = pd.DataFrame({'ImageId': test.index, 'Label': y_pred})
result["ImageId"] += 1
result.head()

In [None]:
result.to_csv('submission.csv', index=False)
print("Resuls are saved to submission.csv")