In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Lambda, v2


In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [3]:
class NeuralNetwork(nn.Module): # class NeuralNetwork inherits from nn.Module
    def __init__(self): # The constructor method of the NeuralNetwork class.
        super().__init__() # calls the constructor of the parent class nn.Module
        self.flatten = nn.Flatten() # creates an instance of the nn.Flatten module, which is used to flatten the input tensor into a 1D tensor.
        self.linear_relu_stack = nn.Sequential( # This defines a sequence of linear layers followed by ReLU activation functions. The nn.Sequential container allows you to stack layers sequentially.
            nn.Linear(28*28, 512), # The first linear layer accepts the flattened input of size 28*28 and outputs a tensor of size 512.
            nn.ReLU(), # The ReLU activation function is used to add non-linearity to the model, which improves the model's ability to predict complex patterns in the data.
            nn.Linear(512, 512), # The second linear layer accepts the output of the first layer of size 512 and outputs a tensor of size 512.
            nn.ReLU(), # The ReLU activation function is used again to add non-linearity to the model.
            nn.Linear(512, 10), # The third linear layer accepts the output of the second layer of size 512 and outputs a tensor of size 10, which corresponds to the number of classes in the dataset.
        )

    def forward(self, x): # The forward method defines the computation performed at every call to the model. In this method, the input tensor x is passed through each layer in the network and the output is computed.
        x = self.flatten(x) # The input tensor x is flattened into a 1D tensor.
        logits = self.linear_relu_stack(x) # The flattened input tensor x is passed through the linear layers defined in self.linear_relu_stack.
        return logits # The output from the last linear layer is returned.

We create an instance of NeuralNetwork, and move it to the device, and print its structure.


In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device) # The input tensor X is created with the size (1, 28, 28), which corresponds to a single random image with a height and width of 28 pixels.
logits = model(X) # The input tensor X is passed through the model to get the output logits.
pred_probab = nn.Softmax(dim=1)(logits) # The output logits is passed through the nn.Softmax function to convert the logits into probabilities.
y_pred = pred_probab.argmax(1) # The predicted class label is obtained by finding the index with the highest probability.
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7], device='mps:0')
