# MNIST Neural Network

Working from this [kernel in Kaggle](https://www.kaggle.com/sdelecourt/cnn-with-pytorch-for-mnist)

In [6]:
import numpy as np # to handle matrix and data operation
import pandas as pd # to read csv and handle dataframe

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable

from sklearn.model_selection import train_test_split


In [11]:
# Trying to keep training parameters in the same place
BATCH_SIZE = 32
EPOCHS = 5

In [12]:
df = pd.read_csv('digit-recognizer/train.csv')
print(df.shape)

(42000, 785)


In [13]:
y = df['label'].values
X = df.drop(['label'],1).values # Drop the labels so you don't get data pollution

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) # Split into test and training data

In [9]:
torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long; But why?

# create feature and targets tensor for test set
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

It looks like the data flow in torch in this example goes follows:
1. Read the data in from csv file
2. Convert the data to numpy array (can be combined with step 1)
3. Split the dataset into test and train data, using scikit learn functionality
4. Convert the test and train datasets into *Torch Tensor*
5. Convert the *Torch Tensors* into *Tensor Datasets* for both train and test data
6. Input the Datasets into a data loader, considering a specified batch size

Seems like an unnecessary number of steps; Chance to optimize, at the very least for shorter and clearer code.

In [10]:
# Same as other example; Create an inherited class for the neural network
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)
    
    def forward(self,X):
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        return F.log_softmax(X, dim=1)
 
mlp = MLP()
print(mlp)

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)


'*' Copied from the example code

We have 784*(250+1) + 250*(100+1) + 100*(10+1) = 222 360 parameters to train

In [48]:
def fit(model, train_loader):
    # Adam is a method for stochastic gradient descent; avail. below
    # https://pytorch.org/docs/stable/_modules/torch/optim/adam.html
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss() # Cross Entropy loss is a loss fxn, AKA log loss
    # Read more aboout cross entropy, or log loss, here: https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html
    model.train() # Calling .train() method on nn.Module object
    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch).float()
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(output.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()
            #print(correct)
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset), 100.*batch_idx / len(train_loader), loss.item(), float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))
                

Run the model

In [49]:
fit(mlp, train_loader)



In [58]:
def evaluate(model):
#model = mlp
    correct = 0 
    for test_imgs, test_labels in test_loader:
#         print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
#         print(test_imgs.shape)
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))
evaluate(mlp)

Test accuracy:0.962% 


In [80]:
#### Sam Exploration
# I want to run this model on an image of a digit and see, in near realtime, what it predicts
from PIL import Image
import os
from matplotlib import pyplot as plt
from IPython.display import display
from ipywidgets import interact # Interactive IPython is badass; Highly recommend. More info on setup here: https://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html


SAMPLE_JPEGS = 'digit-recognizer/testSample/'
SAMPLE_JPEGS = [SAMPLE_JPEGS +'/' +i for i in os.listdir(SAMPLE_JPEGS)]
# im = Image.open(SAMPLE_JPEGS[0])
# display(im)
@interact
def test_model(im_index = 100):
    """
    Tests the Computer vision model in namespace as 'mlp', using an image read from the im_file filepath
    """
    im_file = SAMPLE_JPEGS[im_index]
    # Gets and displays the image 
    im = Image.open(im_file)
    im_arr = np.array(im)
    plt.imshow(im_arr, cmap ='binary')
#     plt.show()
    
    # Ravel the array to get something to feed to the classifier neural net
    im_arr = im_arr.ravel()
    im_arr_test = torch.Tensor(im_arr).float()
    im_arr_test = im_arr_test.unsqueeze(0) # Adds a dimension to the structure with size 1 at index 0; semantics
#     print(im_arr_test.shape)
    out = mlp(im_arr_test)
    pred = torch.max(out,1)[1][0]
    print('\n\n\nThe model predicted {} for the below image'.format(pred))
    

interactive(children=(IntSlider(value=100, description='im_index', max=300, min=-100), Output()), _dom_classes…

In [39]:
# help(mlp.eval())
mlp.eval()

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)