In [14]:
import torch
import os
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [16]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
      nn.Linear(28*28, 512),
      nn.ReLU(),
      nn.Linear(512, 512),
      nn.ReLU(),
      nn.Linear(512, 10),
    )
  
  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

In [17]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [18]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")
print(logits.size())

Predicted class: tensor([1])
torch.Size([1, 10])


In [19]:
input_image = torch.rand(3, 28, 28)
print(input_image.shape)


torch.Size([3, 28, 28])


In [20]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [24]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [25]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.2582,  0.1952,  0.2498,  0.0676, -0.0754,  0.1412,  0.0410, -0.0386,
          0.2242, -0.5943, -0.3985, -0.0013,  0.0726,  0.1370, -0.2599,  0.3591,
         -0.3895, -0.5784, -0.1149, -0.4168],
        [ 0.0999, -0.1654,  0.0788, -0.0606, -0.0663, -0.0291,  0.4174,  0.1671,
          0.2376, -0.5802, -0.4034, -0.2789,  0.0521,  0.0054,  0.1459,  0.2525,
         -0.0730, -0.4854, -0.0340, -0.5142],
        [ 0.0969,  0.1237, -0.1845,  0.0340,  0.0920,  0.2034,  0.0574, -0.0320,
          0.3760, -0.8300, -0.5136, -0.4085,  0.0011,  0.2264,  0.0270,  0.3254,
         -0.2567, -0.1340,  0.4027, -0.1493]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.1952, 0.2498, 0.0676, 0.0000, 0.1412, 0.0410, 0.0000, 0.2242,
         0.0000, 0.0000, 0.0000, 0.0726, 0.1370, 0.0000, 0.3591, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0999, 0.0000, 0.0788, 0.0000, 0.0000, 0.0000, 0.4174, 0.1671, 0.2376,
         0.0000, 0.0000, 0.0000, 0.0521, 0.0054, 0.14

In [27]:
seq_modules = nn.Sequential(
  flatten,
  layer1,
  nn.ReLU(),
  nn.Linear(20, 10),
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
print(logits.size())

torch.Size([3, 10])


In [28]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)
print(pred_prob)

tensor([[0.0932, 0.0915, 0.1248, 0.1226, 0.1027, 0.0780, 0.0783, 0.1172, 0.0789,
         0.1128],
        [0.0826, 0.0937, 0.1289, 0.1264, 0.1155, 0.0801, 0.0724, 0.1125, 0.0874,
         0.1005],
        [0.0903, 0.0783, 0.1252, 0.1183, 0.1097, 0.0911, 0.0783, 0.1111, 0.0898,
         0.1079]], grad_fn=<SoftmaxBackward0>)
