In [3]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [7]:
device = (
    'cuda'
    if torch.cuda.is_available()
    else 'mps'
    if torch.backends.mps.is_available()
    else 'cpu'
)

print(f"Using {device} device")

Using cuda device


### Define the class

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        # logits are the unnormalized ouput values of the nn's final layer
        logits = self.linear_relu_stack(x)
        return logits

In [9]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [23]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred.item()}")

Predicted class: 6


### Model Layers

In [24]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [25]:
# nn.Flatten ->  28X28 image to 784 pixel values
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [26]:
# nn.Linear -> linear transformation on the input data 
# (using stored weights and biases)
layer1 = nn.Linear(in_features=28*28, out_features=200)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 200])


In [29]:
# nn.ReLU -> non linear activation function
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-4.5566e-01, -1.0408e-01,  1.5582e-02, -1.1191e-01, -4.4729e-02,
         -5.3997e-01,  3.6226e-01, -5.0750e-01, -4.1135e-01,  4.5544e-02,
          1.0232e-01,  3.1147e-01,  1.4578e-01, -5.9325e-01,  2.5307e-01,
          6.4199e-02,  7.0377e-01, -6.0155e-02, -3.2041e-02, -7.1822e-02,
          6.7404e-02,  1.6131e-01,  5.9427e-01,  4.1027e-01, -2.0111e-01,
          2.0341e-02,  2.5911e-01, -2.5476e-01, -3.8917e-01, -6.5688e-01,
          2.0638e-01, -1.3138e-01,  3.1536e-01,  4.8270e-01,  4.5204e-01,
         -4.4188e-01,  3.0505e-01, -3.5397e-01,  2.9765e-01,  4.5023e-01,
         -9.9323e-02,  1.8277e-02, -3.2945e-01,  3.8849e-01,  9.4976e-02,
         -6.3139e-02,  2.0086e-01, -5.0283e-01,  2.5494e-01, -1.4506e-01,
          3.7121e-01,  1.1820e-01, -2.2860e-03,  1.3887e-01, -1.8320e-01,
          1.3583e-01, -1.3554e-01,  1.6044e-01, -8.5494e-02,  5.8181e-02,
          1.0796e-01,  2.7751e-01, -7.0210e-02, -2.6001e-01, -3.7628e-02,
          2.4802e-01, -3.

In [33]:
# nn.Sequential -> ordered container of modules
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(200, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
print(seq_modules)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=200, bias=True)
  (2): ReLU()
  (3): Linear(in_features=200, out_features=10, bias=True)
)


In [35]:
## nn.Softmax
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print("pred_probab.shape:\n", pred_probab)

pred_probab.shape:
 tensor([[0.0923, 0.0962, 0.0942, 0.0859, 0.1076, 0.1117, 0.0965, 0.1038, 0.1033,
         0.1085],
        [0.0920, 0.0962, 0.0824, 0.0885, 0.1101, 0.1032, 0.0887, 0.0961, 0.1230,
         0.1197],
        [0.1048, 0.0892, 0.0836, 0.0916, 0.1111, 0.1072, 0.0942, 0.0936, 0.1087,
         0.1160]], grad_fn=<SoftmaxBackward0>)


### Model Parameters

In [36]:
print(f"Model structure: \n{model} \n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: 
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0208, -0.0183,  0.0098,  ..., -0.0051,  0.0027, -0.0294],
        [ 0.0014, -0.0090,  0.0018,  ...,  0.0155, -0.0238,  0.0081]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0284,  0.0080], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0113, -0.0406,  0.0092,  ..., -0.0183,  0.0337,  0.0027],
        [ 0.0384,  0.0147, -0.0105,  ..., -0.0151, -0.0119,  0.0204]],
       device='cuda:0', grad_fn=<