# Make sure to run this in the `my-torch` environment!

Link to tutorial: https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html

> Every module in PyTorch subclasses the `nn.Module`. A neural network is a module itself that consists of other modules (layers). 

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
torch.cuda.is_available()

False

In [3]:
# GPU not available, therfore use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [4]:
# create a neural network by sub-classing nn.Module 
# define initialization and `forward` method
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten() ## what does this do?
        # the following stack combines two linear layers with ReLU activation
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=28 * 28, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=10)
        )
    
    # define feed-forward operation
    def forward(self, x):
        x = self.flatten(x) ## flattens input into a vector sized 784
        logits = self.linear_relu_stack(x)
        return logits
    
    
    
    
    

In [5]:
model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [6]:
# create random Tensor and run a single forward operation
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
logits.shape

torch.Size([1, 10])

In [7]:
# convert Tensor output into probabilities with Softmax
probs = nn.Softmax(dim=1)(logits)
y_pred = probs.argmax(1)[0]
pr_pred = float(probs[0, y_pred])
probs

tensor([[0.0922, 0.0944, 0.1081, 0.1037, 0.1067, 0.0955, 0.1010, 0.0944, 0.1050,
         0.0991]], grad_fn=<SoftmaxBackward0>)

In [8]:
print('Predicted class: {} with probability: {:.3}'.format(y_pred, pr_pred))


Predicted class: 2, with probability: 0.108


In [10]:
# input will be 28 x 28 pixels, create minibatch of 3 images
img_minibtch = torch.rand(3, 28, 28)
img_minibtch.size()

torch.Size([3, 28, 28])

In [14]:
# flatten the minibatch (preserves one dimension for separate samples)
flt_minibtch = nn.Flatten()(img_minibtch)
flt_minibtch.size()

torch.Size([3, 784])

In [17]:
# apply the first (hidden) linear layer
layer1 = nn.Linear(in_features=28*28, out_features=512)
hidden1 = layer1(flt_minibtch)
hidden1.size()

torch.Size([3, 512])

In [18]:
# apply activation layer
print('Before ReLU:', hidden1)
hidden1 = nn.ReLU()(hidden1)
print('After ReLU:', hidden1)

Before ReLU tensor([[ 0.1254,  0.0970, -0.0260,  ...,  0.4807, -0.4991,  0.1651],
        [ 0.0687,  0.0592,  0.2136,  ...,  0.0374, -0.3441,  0.1987],
        [ 0.3345,  0.1573,  0.2000,  ...,  0.3032, -0.3836,  0.5333]],
       grad_fn=<AddmmBackward0>)
After ReLU tensor([[0.1254, 0.0970, 0.0000,  ..., 0.4807, 0.0000, 0.1651],
        [0.0687, 0.0592, 0.2136,  ..., 0.0374, 0.0000, 0.1987],
        [0.3345, 0.1573, 0.2000,  ..., 0.3032, 0.0000, 0.5333]],
       grad_fn=<ReluBackward0>)


In [19]:
# zooming out, examine all model parameters
for name, param in model.named_parameters():
    print('Layer: {} | Size: {} | Values : {} \n'.format(name, param.size(), param[:2]))
          
          

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0294,  0.0215,  0.0137,  ...,  0.0002,  0.0307,  0.0027],
        [ 0.0347,  0.0249, -0.0086,  ..., -0.0062, -0.0187,  0.0274]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0162,  0.0111], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0018, -0.0204,  0.0108,  ..., -0.0215,  0.0072,  0.0230],
        [-0.0115, -0.0085,  0.0037,  ..., -0.0107,  0.0220, -0.0070]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values : tensor([-0.0151, -0.0304], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.4.weight | Size: torch.Size([10, 512]) | Values : tensor([[-0.0024,  0.0107,  0.0299,  ..., -0.0331, -0.0329, -0.0025],
        [-0.0048, -0.0065, -0.0404,  ..., -0.0031, -0.0062, -0.0370]],
       grad_fn=<SliceBackward0>)

In [20]:
layer1.weight.size()

torch.Size([512, 784])