<a href="https://colab.research.google.com/github/pj2111/Assignments/blob/master/assignment_data/warmup_pytorch_modeldesign_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers torch datasets > /dev/null

# 1) Design the model (input, output size and forward pass)
    # Input, target data must have shape of m x n, i.e both must have features
    # Model needs to designed by keeping the feature length in mind, as these will be processed
# 2) construct loss and optimizer
    # torch.nn module contains the variety of Loss Criterions
    # torch.optim module contains various optimizers
# 3) training loop

###  - forward pass: compute prediction
    # loss criterion is called with the target and predicted values

###  - backward pass: compute gradient & update weights
    # optimizers will take care of gradient, weight updates of the model
    # remember to pass optimizer.zero_grad() for the gradient to be
    # reset

### Start by working on bare-bones matrix model LinReg Training

In [3]:
import torch

In [4]:
X = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8, 10, 12], dtype=torch.float32)

In [14]:
# here is the weights of the model, simple one-c tensor with gradient calc ability

w = torch.tensor(0.0, requires_grad=True, dtype=torch.float32)

In [5]:
def forward(x):
  return x * w

In [6]:
def loss(y, y_pred):
  return ((y_pred - y) ** 2).mean()

In [7]:
import numpy as np
# Gradient calculation is done using Numpy
def gradient(x, y, y_pred):
  return np.dot(2 * x, y_pred - y).mean()

In [8]:
# Try predict what is the output before model training

print(f"Predict with simple forward fn: {forward(5)}")

Predict with simple forward fn: 0.0


In [11]:
# beginning the training process

learning_rate = 0.01
n_iters = 30

In [None]:
# gradients calculated using numpy method
w = 0.0

for epoch in range(n_iters):
  y_pred = forward(X)
  l = loss(y , y_pred)
  dw = gradient(X, y, y_pred)
  w -= learning_rate * dw
  if epoch % 1 == 0:
    print(f"epoch {epoch + 1}: w= {w: 3f} loss: {l: 8f}")

In [13]:
print(f"Prediction after training f(5) = {forward(5)}")

Prediction after training f(5) = 9.999804496765137


In [None]:
# gradients calculated using tensor backward() method

for epoch in range(n_iters):
  y_pred = forward(X)
  l = loss(y , y_pred)
  l.backward()  # this part is done by pytorch
  with torch.no_grad():
    w -= learning_rate * w.grad
  w.grad.zero_()
  if epoch % 1 == 0:
    print(f"epoch {epoch + 1}: w= {w: 3f} loss: {l: 8f}")

#### Going full torch mode

In [18]:
import torch
import torch.nn as nn

In [17]:
# the shape of the input is different, when used with torch models
X = torch.tensor([[1], [2], [3], [4], [5]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8], [10]], dtype=torch.float32)

# n_features is the cols persent in each datapoint, both inputs & targets
n_samples, n_features = X.shape
x_test = torch.tensor([5],
                      dtype=torch.float32)

In [19]:
# Models are basic python classes with the blueprint.

class LinearRegression(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(n_features, n_features)