In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = (
    'cuda'
    if torch.cuda.is_available()
    else 'mps'
    if torch.backends.mps.is_available()
    else 'cpu'
)
print('Using {} device'.format(device))



Using cpu device


  warn(f"Failed to load image Python extension: {e}")


In [2]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [8]:
X = torch.rand(3, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print('Predicted class: {}', y_pred)

tensor([[ 0.0683,  0.0156, -0.1464,  0.0348,  0.0071,  0.1196,  0.0025, -0.0195,
         -0.0385,  0.1217],
        [ 0.0545,  0.0078, -0.1202,  0.0647, -0.0310,  0.1147,  0.0306, -0.0578,
         -0.0368,  0.1383],
        [ 0.0867, -0.0185, -0.1276,  0.0237,  0.0549,  0.1445,  0.0007, -0.0479,
         -0.0314,  0.1466]], grad_fn=<AddmmBackward0>)
Predicted class: {} tensor([9, 9, 9])


In [7]:
input_image = torch.rand(3, 28 ,28)
print(input_image.size())

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 28, 28])
torch.Size([3, 784])


In [9]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [10]:
print("Before ReLU: {}\n\n".format(hidden1))
hidden1 = nn.ReLU()(hidden1)
print("After ReLU: {}\n\n".format(hidden1))

Before ReLU: tensor([[-0.4119, -0.0854,  0.1147, -0.5203, -0.0438,  0.5005, -0.0232, -0.3435,
         -0.1391,  0.0225,  0.0134, -0.1655, -0.1851, -0.1393, -0.0832,  0.6489,
          0.5483,  0.0768, -0.0664,  0.4829],
        [ 0.1259, -0.3167,  0.0401, -0.8039,  0.0100,  0.1957,  0.1441, -0.0725,
         -0.1131,  0.1927, -0.2487, -0.1218, -0.2589,  0.1218,  0.0374,  0.2957,
          0.4169,  0.1265,  0.0019,  0.0675],
        [-0.3395, -0.1361,  0.3152, -0.6311, -0.1833,  0.4475, -0.0677,  0.2858,
         -0.3582, -0.1340, -0.0187, -0.3711, -0.1147, -0.3212, -0.3000,  0.6141,
         -0.0178,  0.0134,  0.0786,  0.0444]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.1147, 0.0000, 0.0000, 0.5005, 0.0000, 0.0000, 0.0000,
         0.0225, 0.0134, 0.0000, 0.0000, 0.0000, 0.0000, 0.6489, 0.5483, 0.0768,
         0.0000, 0.4829],
        [0.1259, 0.0000, 0.0401, 0.0000, 0.0100, 0.1957, 0.1441, 0.0000, 0.0000,
         0.1927, 0.0000, 0.0000, 0.0000, 0.1218, 0.03

In [13]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
print(logits)

tensor([[-0.1267,  0.1495, -0.1998, -0.0884, -0.2506, -0.1846,  0.1773, -0.0722,
         -0.3234,  0.0428],
        [-0.1354,  0.2015, -0.2015, -0.0862, -0.2298, -0.2521,  0.1786, -0.0048,
         -0.2966,  0.0995],
        [-0.0911,  0.2023, -0.1531, -0.0904, -0.2539, -0.1713,  0.1388, -0.0078,
         -0.3110,  0.0441]], grad_fn=<AddmmBackward0>)


In [15]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)

tensor([[0.0950, 0.1252, 0.0883, 0.0987, 0.0839, 0.0896, 0.1287, 0.1003, 0.0780,
         0.1125],
        [0.0925, 0.1295, 0.0866, 0.0972, 0.0842, 0.0823, 0.1266, 0.1054, 0.0787,
         0.1170],
        [0.0966, 0.1296, 0.0908, 0.0967, 0.0821, 0.0892, 0.1216, 0.1050, 0.0776,
         0.1106]], grad_fn=<SoftmaxBackward0>)


In [17]:
print("Model structure: {}\n".format(model))
for name, param in model.named_parameters():
    print("Layer: {} | Size: {} | Value: {}\n".format(name, param.size(), param[:2]))

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Value: tensor([[-1.6362e-03, -4.1012e-05,  2.1428e-02,  ..., -9.8728e-04,
         -1.9958e-02, -2.3179e-02],
        [ 1.7746e-02,  5.5712e-03, -2.3535e-02,  ...,  8.2951e-03,
         -3.2924e-02, -2.1465e-02]], grad_fn=<SliceBackward0>)

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Value: tensor([-0.0226,  0.0166], grad_fn=<SliceBackward0>)

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Value: tensor([[-0.0422,  0.0209, -0.0186,  ...,  0.0111, -0.0156,  0.0255],
        [ 0.0121, -0.0437, -0.0382,  ..., -0.0171, -0.0172, -0.0288]],
       grad_fn=<Sli