# Build the Neural Network

https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = ('cuda'
          if torch.cuda.is_available()
          else 'mps'
          if torch.backends.mps.is_available()
          else 'cpu' )
print(f'Using {device}')

Using cpu


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10))
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [9]:
X = torch.rand(1,28,28,device=device)
logits = model(X)
proba_pred = nn.Softmax(dim=1)(logits)
y_pred = proba_pred.argmax(1)
print(f'Predicted: {y_pred}, probability: {proba_pred[0,y_pred.item()]}')

Predicted: tensor([0]), probability: 0.11264409124851227


In [10]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [11]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [12]:
layer1 = nn.Linear(in_features = 28*28, out_features = 20)
print(layer1(flat_image))

tensor([[ 0.2901, -0.0800,  0.1265, -0.1465,  0.2098, -0.9463,  0.1978,  0.4985,
         -0.2774,  0.0060, -0.0026,  0.0096,  0.0203, -0.0253,  0.3044,  0.2478,
          0.5302, -0.2506, -0.7631, -0.4910],
        [ 0.2431, -0.0250,  0.2025,  0.0042,  0.1721, -1.0342, -0.1111,  0.2811,
         -0.1498, -0.1898, -0.3414, -0.1040,  0.1245, -0.1053,  0.3549,  0.1550,
          0.3413,  0.2220, -0.3641, -0.5157],
        [ 0.3079, -0.0592,  0.3225, -0.0345, -0.1979, -0.8669,  0.1035, -0.1252,
         -0.4510, -0.5338, -0.2398,  0.1201,  0.2339, -0.0157,  0.3319,  0.2493,
          0.4681,  0.0134, -0.6208, -0.5347]], grad_fn=<AddmmBackward0>)


In [13]:
hidden1 = layer1(flat_image)
print(f'Before ReLU: {hidden1}')
relu1 = nn.ReLU()(hidden1)
print(f'After ReLU: {relu1}')

Before ReLU: tensor([[ 0.2901, -0.0800,  0.1265, -0.1465,  0.2098, -0.9463,  0.1978,  0.4985,
         -0.2774,  0.0060, -0.0026,  0.0096,  0.0203, -0.0253,  0.3044,  0.2478,
          0.5302, -0.2506, -0.7631, -0.4910],
        [ 0.2431, -0.0250,  0.2025,  0.0042,  0.1721, -1.0342, -0.1111,  0.2811,
         -0.1498, -0.1898, -0.3414, -0.1040,  0.1245, -0.1053,  0.3549,  0.1550,
          0.3413,  0.2220, -0.3641, -0.5157],
        [ 0.3079, -0.0592,  0.3225, -0.0345, -0.1979, -0.8669,  0.1035, -0.1252,
         -0.4510, -0.5338, -0.2398,  0.1201,  0.2339, -0.0157,  0.3319,  0.2493,
          0.4681,  0.0134, -0.6208, -0.5347]], grad_fn=<AddmmBackward0>)
After ReLU: tensor([[0.2901, 0.0000, 0.1265, 0.0000, 0.2098, 0.0000, 0.1978, 0.4985, 0.0000,
         0.0060, 0.0000, 0.0096, 0.0203, 0.0000, 0.3044, 0.2478, 0.5302, 0.0000,
         0.0000, 0.0000],
        [0.2431, 0.0000, 0.2025, 0.0042, 0.1721, 0.0000, 0.0000, 0.2811, 0.0000,
         0.0000, 0.0000, 0.0000, 0.1245, 0.0000, 0.3549

In [14]:
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [16]:
for name, param in model.named_parameters():
    print(f'Layer: {name} | Size: {param.size()} | Values: {param} \n')

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: Parameter containing:
tensor([[ 0.0030, -0.0128,  0.0171,  ..., -0.0244, -0.0250,  0.0111],
        [ 0.0168, -0.0273,  0.0021,  ..., -0.0242, -0.0008, -0.0232],
        [-0.0289, -0.0264,  0.0070,  ...,  0.0243, -0.0162,  0.0340],
        ...,
        [ 0.0071, -0.0162,  0.0051,  ...,  0.0230,  0.0290, -0.0158],
        [-0.0230, -0.0045,  0.0023,  ..., -0.0020,  0.0207,  0.0271],
        [-0.0125,  0.0240,  0.0029,  ...,  0.0022,  0.0004, -0.0006]],
       requires_grad=True) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: Parameter containing:
tensor([ 2.4006e-02, -6.7358e-03, -4.0426e-03,  3.0495e-02, -3.3796e-02,
        -2.8599e-02, -2.5223e-02,  2.2750e-02,  3.1241e-02, -3.5317e-03,
         1.2756e-02, -2.9143e-02, -2.9807e-02, -8.2488e-04,  2.7288e-02,
         6.2555e-05,  3.4708e-02,  3.4400e-04,  9.7375e-03,  2.7271e-02,
         2.8348e-02, -3.5094e-02,  1.3489e-02, -2.8377e-02