In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import v2

In [2]:
torch.cuda.is_available()
device = torch.device("cuda")

# Data Pipeline and dataloader



## Creating an image transform pipeline

In [3]:
image_transform = v2.Compose([
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Lambda(lambda x: torch.flatten(x)) # Can also use x.view(-1) tp flatten the tensor
])

## Downloading the data

In [4]:
train_data = datasets.MNIST(root = "train_data",
                            train = True,
                            download = True,
                            transform = image_transform)


test_data = datasets.MNIST(root = "test_data",
                           train = False,
                           download = True,
                           transform = image_transform)


## Using Dataloader to create batched data

In [5]:
train_dataloader = DataLoader(train_data, batch_size=1000,  shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=1000, shuffle=True)

In [6]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([1000, 784])
Labels batch shape: torch.Size([1000])


# Creating a Neural Network

In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.feed_forward_network = nn.Sequential(
            nn.Linear(784, 100),
            nn.Sigmoid(),
            nn.Linear(100, 100),
            nn.Sigmoid(),
            nn.Linear(100, 10),
        )
    
    def forward(self, X):
        y_hat = self.feed_forward_network(X)
        return y_hat

model = NeuralNetwork().to(device= device)
print(model)

NeuralNetwork(
  (feed_forward_network): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [8]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (feed_forward_network): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)


Layer: feed_forward_network.0.weight | Size: torch.Size([100, 784]) | Values : tensor([[ 0.0330,  0.0290, -0.0036,  ..., -0.0196, -0.0143, -0.0068],
        [-0.0155,  0.0064, -0.0143,  ...,  0.0185,  0.0013,  0.0106]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: feed_forward_network.0.bias | Size: torch.Size([100]) | Values : tensor([0.0204, 0.0289], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: feed_forward_network.2.weight | Size: torch.Size([100, 100]) | Values : tensor([[ 0.0940, -0.0844, -0.0394,  0.0300,  0.0991, -0.0177, -0.0741, -0.0685,
         -0.0540, -0.0691, -0.0669, -0.0996, -0.0411, -0.0494,  0.0208,  0.0152,
         -0.0460,  0.0720, -0.0130, -0.0175,

## Defining a training loop

In [9]:
# Loss function
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
epochs = 100

for epoch in range(epochs):
    model.train() # Training Mode
    datasize = len(train_dataloader.dataset)
    train_loss, num_batches = 0, len(train_dataloader)
    print(f'X------------------Epoch : {epoch+1}/{epochs}------------------X')
    # Training Loop for each batch
    for batch, (X, y) in enumerate(train_dataloader):
        # Clearing the gradients
        optimizer.zero_grad()
        # Feedforward
        X = X.to(device)
        y = y.to(device)
        y_hat = model(X)
        loss = loss_fn(y_hat, y)
        train_loss += loss.item()
        # Backpropagation
        loss.backward()
        optimizer.step()
    train_loss /= num_batches
    print(f'Training Avg loss: {train_loss:>8f}')
    
    # Running inference for each epoch
    model.eval() # Evaluation mode
    datasize = len(test_dataloader.dataset)
    num_batches = len(test_dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad(): # Ensures no gradients are computed during evaluation mode.
        for X, y in test_dataloader:
            X = X.to(device)
            y = y.to(device)
            y_hat = model(X)
            test_loss += loss_fn(y_hat, y).item()
            correct += (y_hat.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss /= num_batches
    correct /= datasize
    print(f"Testing Avg loss: {test_loss:>8f} \nAccuracy: {(100*correct):>0.1f}% \n")

X------------------Epoch : 1/100------------------X
Training Avg loss: 2.298409
Testing Avg loss: 2.268742 
Accuracy: 25.3% 

X------------------Epoch : 2/100------------------X
Training Avg loss: 2.254421
Testing Avg loss: 2.235836 
Accuracy: 37.2% 

X------------------Epoch : 3/100------------------X
Training Avg loss: 2.224627
Testing Avg loss: 2.206972 
Accuracy: 43.5% 

X------------------Epoch : 4/100------------------X
Training Avg loss: 2.196158
Testing Avg loss: 2.177841 
Accuracy: 48.0% 

X------------------Epoch : 5/100------------------X
Training Avg loss: 2.166557
Testing Avg loss: 2.146546 
Accuracy: 52.2% 

X------------------Epoch : 6/100------------------X
Training Avg loss: 2.134478
Testing Avg loss: 2.113151 
Accuracy: 56.5% 

X------------------Epoch : 7/100------------------X
Training Avg loss: 2.099983
Testing Avg loss: 2.077372 
Accuracy: 56.6% 

X------------------Epoch : 8/100------------------X
Training Avg loss: 2.062876
Testing Avg loss: 2.039095 
Accuracy: 

# Saving and Loading the model

In [10]:
# Save
torch.save(model, "torch_model.pt")

# Load
new_model = torch.load("torch_model.pt")
new_model.eval()

NeuralNetwork(
  (feed_forward_network): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)

## Saving the parameters

The above code saves the entire model

Instead we can save the model parameters and the optimizer parameters seperately in the form of dictionary using `state_dict()`. This allows us to view and update them easily

Source: https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [11]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
feed_forward_network.0.weight 	 torch.Size([100, 784])
feed_forward_network.0.bias 	 torch.Size([100])
feed_forward_network.2.weight 	 torch.Size([100, 100])
feed_forward_network.2.bias 	 torch.Size([100])
feed_forward_network.4.weight 	 torch.Size([10, 100])
feed_forward_network.4.bias 	 torch.Size([10])
Optimizer's state_dict:
state 	 {0: {'momentum_buffer': None}, 1: {'momentum_buffer': None}, 2: {'momentum_buffer': None}, 3: {'momentum_buffer': None}, 4: {'momentum_buffer': None}, 5: {'momentum_buffer': None}}
param_groups 	 [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3, 4, 5]}]


## Saving and loading model paramaters

In [12]:
torch.save(model.state_dict(), "torch_model_params.pt")
torch.save(optimizer.state_dict(), "optimizer_params.pt")

new_model = NeuralNetwork()
new_model.load_state_dict(torch.load("torch_model_params.pt"))
new_model.eval()

NeuralNetwork(
  (feed_forward_network): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=100, out_features=10, bias=True)
  )
)

Since we can save model and optimizer parameters, we can use them as checkpoints during the training loop like adding this piece of code.

```python
if (epoch % 5 == 0):
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "loss": loss,
        "accuracy": accuracy,
    }, "checkpoint.pt.tar") # Tar extension allows us to save multiple components in a single file.
```
This code allows us to save the parameters every 5 epochs. However do keep in mind that using same filename everytime we save the checkpoint overrides the previous checkpoint.

## Using Onnx to save the model

In [None]:
# Dummy input
dummy_input = torch.randn(100, 28*28, dtype=torch.float32).to(device)
onnx_program = torch.onnx.dynamo_export(model, dummy_input)
onnx_program.save("torch_model.onnx")