# 2. Neural Networks with Numpy

In this notebook we will build our first neural network using only `numpy` as library.

We will work on the same dataset as last week and try to predict which digit is shown on the given pixel values.

In [1]:
from sklearn.datasets import fetch_openml
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, data_home="./data", cache=True)

We know already from last time how the data looks:

In [2]:
X.head(3)

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


The label is a number between 0-9 represnting the digit shown on the pixels.

In [3]:
y.head(3)

0    5
1    0
2    4
Name: class, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

Before we start, we scale the data and divide it into train and test data:

In [4]:
y = y.astype("long").values

In [7]:
from sklearn.model_selection import train_test_split

X_scaled = (X/255).astype('float32').values
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.15, random_state=42)

## Task 1: Implement the Forward pass

In [10]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x7fbfe3b1af70>

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [13]:
train_x = torch.Tensor(X_train).float().to(device)
test_x = torch.Tensor(X_test).float().to(device)
train_y = torch.Tensor(y_train).long().to(device)
test_y = torch.Tensor(y_test).long().to(device)

In [21]:
import torch.nn as nn

class DeepNeuralNetwork(nn.Module):
    
    def __init__(self):
    
        super(DeepNeuralNetwork, self).__init__()
        
        self.hidden1 = nn.Linear(784, 128)
        self.hidden2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 10)

    def forward(self, x_train):
        
        z1 = self.hidden1(x_train)
        a1 = torch.sigmoid(z1)
        z2 = self.hidden2(a1)
        a2 = torch.sigmoid(z2)
        z3 = self.output(a2) # no softmax needed

        return z3

In [23]:
dnn = DeepNeuralNetwork()
for param in dnn.parameters():
      print(param.numel())

100352
128
8192
64
640
10


Hint: To calculate the ouput of a layer you can use numpys matrix operations. For instance:

In [29]:
dnn = DeepNeuralNetwork()
dnn.to(device)
dnn.train()

no_epochs = 1000
learning_rate = 0.3

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(dnn.parameters(), lr=learning_rate)

losses = []
for iteration in range(no_epochs):
    
    optimizer.zero_grad()
    
    y_hat = dnn(train_x) # forward pass
    
    loss = loss_func(y_hat, train_y)
    loss.backward()
    
    optimizer.step()
    
    losses.append(loss.item())
    if iteration % 100 == 0:
        print(f"Loss in epoch {iteration} is {loss.item()}")
    

Loss in epoch 0 is 2.3446292877197266
Loss in epoch 100 is 2.276169538497925
Loss in epoch 200 is 2.0426132678985596
Loss in epoch 300 is 1.3611892461776733
Loss in epoch 400 is 0.9327110648155212
Loss in epoch 500 is 0.74612957239151
Loss in epoch 600 is 0.6184443235397339
Loss in epoch 700 is 0.5308632254600525
Loss in epoch 800 is 0.47102147340774536
Loss in epoch 900 is 0.428315669298172


### Task 3: Predict on the test data

In [43]:
dnn.eval()

y_pred = dnn(test_x)
predicted = torch.argmax(y_pred.data, 1)
correct = (predicted == test_y).sum().item()
correct / len(test_x)

0.8862857142857142

In [53]:
y_hat = dnn(test_x[0:5])

In [54]:
y_hat.shape

torch.Size([5, 10])

In [56]:
import torch.nn.functional as F

F.softmax(y_hat, dim=0)

tensor([[1.2199e-02, 4.8797e-01, 7.2078e-01, 4.1075e-01, 1.5836e-04, 1.2111e-01,
         1.4841e-02, 2.9002e-06, 9.8258e-01, 4.4454e-03],
        [4.2000e-03, 6.3725e-02, 1.1681e-02, 4.6590e-04, 9.9654e-01, 1.6244e-02,
         2.2679e-01, 7.5130e-04, 1.1585e-03, 6.4510e-01],
        [2.9275e-01, 2.5047e-02, 6.8526e-02, 2.4453e-02, 2.7114e-03, 8.1092e-01,
         7.5807e-01, 4.3921e-06, 1.5873e-02, 4.3619e-03],
        [6.0623e-01, 1.5934e-01, 1.8182e-01, 5.0752e-01, 7.3958e-05, 2.4970e-02,
         2.1108e-04, 1.2435e-01, 1.7257e-04, 3.1157e-02],
        [8.4623e-02, 2.6391e-01, 1.7192e-02, 5.6812e-02, 5.1473e-04, 2.6759e-02,
         8.3709e-05, 8.7489e-01, 2.1642e-04, 3.1494e-01]],
       grad_fn=<SoftmaxBackward0>)

In [58]:
x1 = dnn(test_x[0])
x1

tensor([-3.6472, -1.4993,  1.2703,  2.8877, -1.8768,  2.5556, -2.7160, -3.9458,
         7.2896,  0.9629], grad_fn=<AddBackward0>)

In [63]:
x2 = F.softmax(dnn(test_x[0]), dim=0)

In [61]:
torch.argmax(x1)

tensor(8)

In [64]:
torch.argmax(x2)

tensor(8)