In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f'Predicted class: {y_pred}')

Predicted class: tensor([7])


In [8]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [9]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [10]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [11]:
print(f'Before ReLU: {hidden1}\n\n')
hidden1 = nn.ReLU()(hidden1)
print(f'After ReLU: {hidden1}')

Before ReLU: tensor([[ 0.3103,  0.3340,  0.0736,  0.1424, -0.0147,  0.0018,  0.3025, -0.1622,
          0.1588, -0.4691, -0.4013, -0.1869, -0.5920,  0.0264,  0.0655, -0.0314,
          0.5965, -0.2675, -0.2701, -0.3550],
        [ 0.3079,  0.5438, -0.2633,  0.3580,  0.0823, -0.1835,  0.5709, -0.4919,
         -0.0292,  0.0421, -0.4338, -0.4897, -0.3668,  0.3607,  0.1024,  0.1954,
          0.2621, -0.2691, -0.1272, -0.2995],
        [ 0.1322,  0.4142, -0.1131,  0.1995,  0.0159,  0.0361,  0.5339, -0.3126,
         -0.0331, -0.2850, -0.2678, -0.1225, -0.3638,  0.0785,  0.0749,  0.0757,
          0.4471, -0.0069, -0.4477, -0.1817]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.3103, 0.3340, 0.0736, 0.1424, 0.0000, 0.0018, 0.3025, 0.0000, 0.1588,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0264, 0.0655, 0.0000, 0.5965, 0.0000,
         0.0000, 0.0000],
        [0.3079, 0.5438, 0.0000, 0.3580, 0.0823, 0.0000, 0.5709, 0.0000, 0.0000,
         0.0421, 0.0000, 0.0000, 0.0000, 0.3607, 0.10

In [13]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
logits

tensor([[-0.1957,  0.0235, -0.3058, -0.0695, -0.1530, -0.0427, -0.1088,  0.0915,
         -0.0486,  0.1623],
        [-0.1611,  0.0274, -0.3930,  0.0435, -0.1392, -0.0333, -0.1728,  0.1376,
         -0.2231,  0.0764],
        [-0.0021, -0.1067, -0.3723, -0.0373, -0.0487,  0.1615, -0.2233,  0.0303,
         -0.0961,  0.1215]], grad_fn=<AddmmBackward0>)

In [18]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
pred_probab, pred_probab.sum(axis=1)

(tensor([[0.0870, 0.1083, 0.0779, 0.0987, 0.0908, 0.1014, 0.0949, 0.1159, 0.1008,
          0.1244],
         [0.0915, 0.1105, 0.0726, 0.1123, 0.0935, 0.1040, 0.0904, 0.1233, 0.0860,
          0.1160],
         [0.1045, 0.0942, 0.0722, 0.1009, 0.0998, 0.1231, 0.0838, 0.1080, 0.0952,
          0.1183]], grad_fn=<SoftmaxBackward0>),
 tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward1>))

In [20]:
print(f'Model structure: {model}\n\n')

for name, param in model.named_parameters():
    print(f'Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n')

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0051, -0.0355,  0.0137,  ...,  0.0124,  0.0131,  0.0230],
        [ 0.0144, -0.0114,  0.0276,  ...,  0.0133, -0.0298,  0.0220]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([ 0.0251, -0.0256], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0266, -0.0199,  0.0019,  ...,  0.0219,  0.0250, -0.0283],
        [ 0.0423, -0.0272, -0.0351,  ..., -0.0239, -0.0033, -0.0236]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Siz