# 1. Imports

In [1]:
# importing other dependencies
import numpy as np
# importing PyTorch
import torch
# import torch.nn Module
import torch.nn as nn

In [25]:
# checks whether MPS is available
print(torch.backends.mps.is_available())

# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

# setting the device to "mps" instead of default "cpu"
device = torch.device("mps" if torch.backends.mps.is_available else "cpu")

True
True


To demonstrate all the steps in sequence, we look at a Linear Regression model, trained and optimized via SGD.

The training data consists of 4 points `(1,2) (2,4) (3,6) (4,8)`.

We try to find the model: $y = f(x) = 2x$.

In [5]:
# Training Sample, as Tensors
X_train = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
y_train = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features =  X_train.shape
print(f'#samples: {n_samples}, #features: {n_features}')

#samples: 4, #features: 1


In [6]:
# Test Sample
X_test = torch.tensor([[5], [6]], dtype=torch.float32)
y_test = torch.tensor([[10], [12]], dtype=torch.float32)

In [11]:
# we specify the no. of features and the output dim.
input_dim = n_features
output_dim = 1

# we give out a single no. as output while predicting

# 2. Training Pipeline

Given data and everything else, a full training pipeline consists of 3 steps:

1. **Model Designing**: 
    - Input, Output
    - Forward Pass (with different layers)
2. **Loss and Optimizer**
3. **Training Loop**:
    - Forward: Compute Prediction and Loss
    - Backward: Compute Gradients
    - Update Weights (Parameters)

## 2.1 Model Designing

In [9]:
# we create custom wrappers to add multiple linear/non-linear layers in our model
# this is enabled by torch.nn module

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define different layers here:
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, X):
        return self.lin(X)

In [12]:
# initialise a model instance
model = LinearRegression(input_dim, output_dim)

In [16]:
# initially the weights are initialized randomly
# we check the initial prediction on the test set before training

print("Prediction before training: [f(5), f(6)] :\n", model(X_test))

Prediction before training: [f(5), f(6)] :
 tensor([[2.8919],
        [3.3072]], grad_fn=<AddmmBackward0>)


## 2.2 Loss and Optimizer

In [22]:
# in this scenario, we use the MSE Loss, with SGD optimization

# learning rate
lr = 0.01

# no. of epochs
num_epochs = 300

# define MSE loss
loss = nn.MSELoss()

# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# format of optimizer: torch.optim.SGD(weights, lr, ...)

## 2.3 Training Loop

In [23]:
for epoch in range(num_epochs):
    # forward pass:     
    # compute prediction
    y_train_hat = model(X_train)
    # compute loss
    train_loss = loss(y_train_hat, y_train)

    # backward pass:
    # compute gradients
    train_loss.backward()

    # update weights:
    optimizer.step()
    # zero-gradients after updating
    optimizer.zero_grad()


    # output for debugging every 20 loops
    if epoch%20 == 0:
        # unpack the parameters
        [W, b] = model.parameters()
        print("-----------------------")
        print("Epoch#:", epoch)
        print("Weights:", W)
        print("Loss:", train_loss)

-----------------------
Epoch#: 0
Weights: Parameter containing:
tensor([[1.7740]], requires_grad=True)
Loss: tensor(0.0742, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 20
Weights: Parameter containing:
tensor([[1.7871]], requires_grad=True)
Loss: tensor(0.0658, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 40
Weights: Parameter containing:
tensor([[1.7995]], requires_grad=True)
Loss: tensor(0.0584, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 60
Weights: Parameter containing:
tensor([[1.8112]], requires_grad=True)
Loss: tensor(0.0518, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 80
Weights: Parameter containing:
tensor([[1.8222]], requires_grad=True)
Loss: tensor(0.0459, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 100
Weights: Parameter containing:
tensor([[1.8325]], requires_grad=True)
Loss: tensor(0.0407, grad_fn=<MseLossBackward0>)
-----------------------
Epoch#: 120
Weights: Parameter containing:
tensor([[

We can see the weights converging to `2`, which means our model is slowing coverging towards $f(x) = 2x = w.x$

In [24]:
# we check the final prediction on the test set after training

print("Prediction after training: [f(5), f(6)] :\n", model(X_test))

Prediction after training: [f(5), f(6)] :
 tensor([[ 9.8100],
        [11.7178]], grad_fn=<AddmmBackward0>)
