# Handwritten Digit Recognition
- Author = Amitrajit Bose
- Dataset = MNIST
- [Medium Article Link](https://medium.com/@amitrajit_bose/handwritten-digit-mnist-pytorch-977b5338e627)
- Frameworks = PyTorch


### Necessary Imports

In [14]:
# Import necessary packages
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
import numpy as np
import pandas as pd
import copy
from dbclass import TrainDB
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST('drive/My Drive/mnist/MNIST_data/', download=True, train=True, transform=transform)
valset = datasets.MNIST('drive/My Drive/mnist/MNIST_data/', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

### Defining The Neural Network

In [15]:
from torch import nn

# Layer details for the neural network
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()

from torch import optim

# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def f(X):
    if np.ndim(X) >1:
        return np.trace(X)
    else:
        return 0
def g(X):
    if np.ndim(X) >1:
        return np.trace(X)+10
    else:
        return 10
d = {'trace':f, 'trace2':g}

In [16]:
db = TrainDB(model,trainloader,criterion,dictf=d,dictg=d,batchfreq=100)

### Core Training Of Neural Network

In [17]:
optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)
time0 = time()
epochs = 2
for e in range(epochs):
    running_loss = 0
    for i, (images, labels) in enumerate(trainloader):
        prev_state = copy.deepcopy(model.state_dict())

        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward(create_graph=True,retain_graph=True)
        #And optimizes its weights here
        optimizer.step()
        db.step(e,i,prev_state,model,loss.item())
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
       # print((model[0].weight))
print("\nTraining Time (in minutes) =",(time()-time0)/60)



Epoch 0 - Training loss: 0.35990693663229056
Epoch 1 - Training loss: 0.16799336516939756

Training Time (in minutes) = 0.4565428455670675


In [20]:
table1 = db.tweight
table2 = db.tnorm
table3 = db.tdiffnorm
table4 = db.tdictf


#table3[['0.weight', '2.weight', '4.weight']][:].plot()
#table2[['0.weight', '2.weight', '4.weight']].plot()
#table2.loc[1,'0.weight']
#table1.loc[1]
table4

Unnamed: 0,0.weight,0.bias,2.weight,2.bias,4.weight,4.bias
count,39.0,39.0,39.0,39.0,39.0,39.0
mean,0.191355,0.0,-0.830966,0.0,-0.024677,0.0
std,0.261565,0.0,0.342625,0.0,0.050115,0.0
min,-0.40107,0.0,-1.364218,0.0,-0.102732,0.0
25%,0.022081,0.0,-1.137994,0.0,-0.064942,0.0
50%,0.294332,0.0,-0.838224,0.0,-0.031669,0.0
75%,0.398796,0.0,-0.592095,0.0,0.013392,0.0
max,0.497412,0.0,-0.153049,0.0,0.070299,0.0


In [None]:
torch.from_numpy(table1.loc[(0,10),'0.weight'].values[0]).requires_grad

In [None]:
print((table1.memory_usage(True,True).sum())/1000000)
print((table2.memory_usage(True,True).sum())/1000)
print((table3.memory_usage(True,True).sum())/1000)

### Model Evaluation

In [None]:
correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    # Turn off gradients to speed up this part
    with torch.no_grad():
        logps = model(img)

    # Output of the network are log-probabilities, need to take exponential for probabilities
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

In [None]:
print(db.currnetwork.state_dict()['0.weight'])
print(db.reconstructnet(None,None).state_dict()['0.weight'])

In [6]:
#db.criterion = torch.nn.functional.nll_loss
db.ithhess_eigenval(k=4)

[hessian_eigenthings] beginning deflated power iteration
[hessian_eigenthings] computing eigenvalue/vector 1 of 4
Step 0 - Time taken = 1 seconds
Step 1 - Time taken = 1 seconds....................................]  Step: 55s274ms | Tot: 1ms | power iter error: 1.0000                                                           1/20 
Step 2 - Time taken = 1 seconds....................................]  Step: 1s487ms | Tot: 1s488ms | power iter error: 856.2453                                                      2/20 
Step 3 - Time taken = 1 seconds....................................]  Step: 1s488ms | Tot: 2s977ms | power iter error: 0.4090                                                        3/20 
Step 4 - Time taken = 1 seconds....................................]  Step: 1s476ms | Tot: 4s454ms | power iter error: 0.0808                                                        4/20 
Step 5 - Time taken = 1 seconds....................................]  Step: 1s483ms | Tot: 5s937ms | power

KeyboardInterrupt: 

In [7]:
from hessian_eigenthings2.hvp_operator import compute_hessian_eigenthings

network = copy.deepcopy(db.currnetwork)
eigvec,eigenvalues = compute_hessian_eigenthings(network,trainloader, criterion,num_eigenthings=4, use_gpu=False,power_iter_steps=20)

[hessian_eigenthings] beginning deflated power iteration
[hessian_eigenthings] computing eigenvalue/vector 1 of 4
Step 0 - Time taken = 13 seconds
Step 1 - Time taken = 12 seconds...................................]  Step: 20s188ms | Tot: 0ms | power iter error: 1.0000                                                           1/20 
Step 2 - Time taken = 25 seconds...................................]  Step: 12s448ms | Tot: 12s449ms | power iter error: 806.2057                                                    2/20 
Step 3 - Time taken = 18 seconds...................................]  Step: 25s57ms | Tot: 37s506ms | power iter error: 0.3925                                                       3/20 
Step 4 - Time taken = 13 seconds...................................]  Step: 18s669ms | Tot: 56s176ms | power iter error: 0.0679                                                      4/20 
Step 5 - Time taken = 15 seconds...................................]  Step: 13s574ms | Tot: 1m9s | power 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-86f84fa42465>", line 4, in <module>
    eigvec,eigenvalues = compute_hessian_eigenthings(network,trainloader, criterion,num_eigenthings=4, use_gpu=False,power_iter_steps=20)
  File "/Users/tushant/Documents/Uchicago/Database/Project/deepquery/TrainDB/hessian_eigenthings2/hvp_operator.py", line 174, in compute_hessian_eigenthings
    hvp_operator, num_eigenthings, use_gpu=use_gpu, **kwargs
  File "/Users/tushant/Documents/Uchicago/Database/Project/deepquery/TrainDB/hessian_eigenthings/power_iter.py", line 74, in deflated_power_iteration
    init_vec=prev_vec,
  File "/Users/tushant/Documents/Uchicago/Database/Project/deepquery/TrainDB/hessian_eigenthings/power_iter.py", line 124, in power_iteration
    new_vec = operator.apply(vec) - momentum * prev_vec
  File "/Users/tus

KeyboardInterrupt: 