In [3]:
#Build the Neural Network
# torch.nn is  a collection of ready to use layers and functions
# nn.module is the base class from which all the models and layers should inherit

import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [5]:
# we will check if GPU is available, if not we will use cpu
# Check if CUDA (GPU) is available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [6]:
#define the class subclassing nn.Module
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [7]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [10]:
# calling forward pass
X= torch.rand(1,28,28,device=device)
logits= model(X)
pred_probab = nn.Softmax(dim=1)(logits)
print(f"print the probability: {pred_probab}")
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")


print the probability: tensor([[0.1014, 0.0984, 0.1054, 0.1049, 0.0944, 0.1094, 0.0961, 0.1021, 0.0906,
         0.0974]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Predicted class: tensor([5], device='cuda:0')


In [11]:
#Model Layers
input_image=torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [12]:
# Flatten the layers
flatten= nn.Flatten()
flat_image=flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [13]:
# nn linear is a module that applies a linear transformation on the input
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [14]:
# nn relu
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")


Before ReLU: tensor([[-0.0280, -0.0038,  0.1186, -0.5023,  0.2913, -0.3041,  0.0208, -0.0931,
          0.1807, -0.0122, -0.3528,  0.2375,  0.2734,  0.4775, -0.0282,  0.5482,
          0.1966,  0.2648,  0.2586,  0.2519],
        [ 0.3166, -0.3558,  0.1468, -0.3569,  0.4263,  0.0181, -0.3887,  0.2836,
         -0.1035,  0.1007,  0.1543,  0.3694,  0.4860,  0.3858, -0.3161,  0.6157,
          0.1633,  0.4891, -0.3131,  0.3256],
        [ 0.1533, -0.3250,  0.2351, -0.4263,  0.5245, -0.1095, -0.2299,  0.2762,
          0.3671,  0.4177, -0.1249,  0.4050,  0.1945,  0.1594, -0.3431,  0.7766,
          0.4111,  0.4846,  0.3386,  0.1362]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.1186, 0.0000, 0.2913, 0.0000, 0.0208, 0.0000, 0.1807,
         0.0000, 0.0000, 0.2375, 0.2734, 0.4775, 0.0000, 0.5482, 0.1966, 0.2648,
         0.2586, 0.2519],
        [0.3166, 0.0000, 0.1468, 0.0000, 0.4263, 0.0181, 0.0000, 0.2836, 0.0000,
         0.1007, 0.1543, 0.3694, 0.4860, 0.3858, 0.00

In [15]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
softmax = nn.Softmax(dim=1) # row wise (dim=1)
pred_probab = softmax(logits)

In [16]:
#model parameters
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")


Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0202,  0.0090, -0.0012,  ..., -0.0333,  0.0124,  0.0062],
        [-0.0213,  0.0183, -0.0008,  ...,  0.0296,  0.0241,  0.0034]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0051, -0.0281], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0131, -0.0436,  0.0300,  ..., -0.0347,  0.0115,  0.0376],
        [ 0.0308,  0.0250, -0.0384,  ...,  0.0120,  0.0106, -0.0416]],
       device='cuda:0', grad_fn=<Sl