# Building a neural network

In [70]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

In [71]:
# get the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device being used is {device}")

Device being used is cpu


In [72]:
# define the neural network class
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [73]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [74]:
X = torch.rand(1, 28, 28, device=device) # simulate a grayscale image
logits = model(X) # pass it through the model to get an output of 10 digits
# the logit output are not yet probabilities since some values are negative 
pred_probab = nn.Softmax(dim=1)(logits) # convert the logit to into probabilities that sum up to 1 
y_pred = pred_probab.argmax(1) # get the index of the highest prob for each input
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6])


In [75]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [76]:
# flatten a multi-dimensional tensor into a 1D vector
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [77]:
layer1 = nn.Linear(in_features=28*28, out_features=20) # take 784 input values and returns 20 outputs 
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [78]:
# show the effect of the ReLU
print(f"Before ReLU:\n {hidden1}\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: \n {hidden1}\n")

Before ReLU:
 tensor([[-1.2345e+00, -3.7790e-01,  2.2664e-01,  6.3286e-01, -5.0647e-02,
          1.0906e-01,  3.8134e-01, -1.9736e-01,  2.0640e-01, -7.4909e-01,
         -3.6762e-01, -1.4567e-01, -4.5153e-02, -1.5251e-01, -1.6423e-02,
          6.7859e-02,  4.1846e-01,  4.4700e-02,  5.2248e-01, -2.7520e-01],
        [-5.4924e-01, -4.2060e-01,  4.1992e-01,  5.4909e-01, -2.0014e-01,
          2.8171e-01,  9.7015e-02, -1.2606e-01,  2.6286e-03, -9.4798e-01,
         -4.9811e-01, -4.1241e-01, -5.0770e-02,  5.0267e-01, -3.8323e-01,
          6.7063e-02,  3.0656e-01, -2.7928e-01,  1.9145e-01, -1.8580e-01],
        [-6.7371e-01, -3.1151e-01,  3.4633e-01,  3.7358e-01, -1.7281e-01,
         -2.4576e-03, -1.1054e-02,  1.6607e-01,  3.3654e-01, -8.3080e-01,
         -3.3401e-01, -2.5195e-01, -2.5135e-01, -1.1085e-01,  1.6782e-01,
         -1.7132e-01,  4.2023e-01,  3.2289e-02,  2.2301e-01, -4.7621e-04]],
       grad_fn=<AddmmBackward0>)

After ReLU: 
 tensor([[0.0000, 0.0000, 0.2266, 0.6329, 0.000

In [79]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [81]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [82]:
print(f"Model structure: {model}")
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0045, -0.0055,  0.0247,  ...,  0.0254,  0.0356,  0.0051],
        [ 0.0069,  0.0282,  0.0252,  ..., -0.0237,  0.0086,  0.0068]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0084, -0.0111], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0021,  0.0397,  0.0178,  ...,  0.0427, -0.0148, -0.0300],
        [ 0.0302,  0.0362,  0.0339,  ...,  0.0066,  0.0349,  0.0416]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: