In [1]:
import torch
from torch import nn

## Preparing Training Data

In [2]:
weight = 0.7
bias = 0.3

start = 0
end = 1
step = 0.02

X = torch.arange(start, end, step=step).unsqueeze(dim=1)
y = weight * X + bias

X[:10], y[:10]

(tensor([[0.0000],
         [0.0200],
         [0.0400],
         [0.0600],
         [0.0800],
         [0.1000],
         [0.1200],
         [0.1400],
         [0.1600],
         [0.1800]]),
 tensor([[0.3000],
         [0.3140],
         [0.3280],
         [0.3420],
         [0.3560],
         [0.3700],
         [0.3840],
         [0.3980],
         [0.4120],
         [0.4260]]))

In [3]:
## Train Test Split

train_split = int(0.8 * len(X))

X_train, y_train = X[:train_split], y[:train_split]
X_test , y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)


(40, 40, 10, 10)

## Creating Linear Regression

In [4]:
class LinearRegressionModel(nn.Module):

  def __init__(self):
    super().__init__()

    self.weight = nn.Parameter(torch.rand(1,
                                          requires_grad=True,
                                          dtype=torch.float))

    self.bias = nn.Parameter(torch.rand(1,
                                          requires_grad=True,
                                          dtype=torch.float))

    # Write the forward method to define the computation of the model
  def forward(self, x : torch.Tensor) -> torch.Tensor:
    return self.weight * x + self.bias

In [5]:
torch.manual_seed(42)
model_0 = LinearRegressionModel()
model_0

LinearRegressionModel()

In [6]:
list(model_0.parameters())

[Parameter containing:
 tensor([0.8823], requires_grad=True),
 Parameter containing:
 tensor([0.9150], requires_grad=True)]

In [7]:
model_0.state_dict()

OrderedDict([('weight', tensor([0.8823])), ('bias', tensor([0.9150]))])

In [8]:
y_pred = model_0(X_test)
y_pred
# If we see the result, when we call model_0 directly, it uses Backward prapogation wherein to predict the values we don't need any bkwrd prapogations.
# Hence we use torch.inference_model() or torch.no_grad() while predicting, as given below.

tensor([[1.6208],
        [1.6385],
        [1.6561],
        [1.6738],
        [1.6914],
        [1.7090],
        [1.7267],
        [1.7443],
        [1.7620],
        [1.7796]], grad_fn=<AddBackward0>)

In [9]:
X_test, y_test

(tensor([[0.8000],
         [0.8200],
         [0.8400],
         [0.8600],
         [0.8800],
         [0.9000],
         [0.9200],
         [0.9400],
         [0.9600],
         [0.9800]]),
 tensor([[0.8600],
         [0.8740],
         [0.8880],
         [0.9020],
         [0.9160],
         [0.9300],
         [0.9440],
         [0.9580],
         [0.9720],
         [0.9860]]))

In [10]:
# Lets make Predictions through Inference mode.
# Inference mode does not use/perform gradient dicent while predicting because we do not need any gradient decent to happen when doing prediction. Its only needed while training.
# This is same as no_grad

with torch.inference_mode():
  y_pred = model_0(X_test)

y_pred

# If we look at the predicted outputs with the actual y_test values, then this differes a lot. Why?
# Because we have not yet trained the model that would have used gradient decent and backward prapogation, to adjust and get proper weights and biases.
# Here the Prediction is directly done through random weights and biases thats get initialized when the LinearRegressionModel is called.

tensor([[1.6208],
        [1.6385],
        [1.6561],
        [1.6738],
        [1.6914],
        [1.7090],
        [1.7267],
        [1.7443],
        [1.7620],
        [1.7796]])


*Lets Add Loss Functions and Optimizers to do the Gradient descent*

In [11]:
# Loss function initialization
loss_fn = nn.L1Loss()

# Setup optimizer
optimizer = torch.optim.SGD(model_0.parameters(), # We want to optimize parameter of our model
                            lr = 0.01) # lr = Learning Rate

**Building a Training Loop and a Testing Loop in Pytorch**

Few Pointers -
* 1. Loop through the data and do ..
* 2. Forward pass also called as forward prapogation to make predictions on data
* 3. Calculate the loss
* 4. Optimizer Zero Grad
* 5. Bacxkward Propagation - Loss Backwards to calculate the gradient of each of the parameters of the model
* 6. Optimizer step - Here we adjust the parametrs against the loss calculated inorder to improve the loss.

In [12]:
model_0.state_dict()

OrderedDict([('weight', tensor([0.8823])), ('bias', tensor([0.9150]))])

In [13]:
torch.manual_seed(42)

epochs = 110

## Training
for epoch in range(epochs):
  #print(f"Epoch : {epoch}")
  model_0.train() # This sets mode to train. Explaination given below cell

  #2. Forward pass
  y_pred = model_0(X_train)

  #3. Calulate the Loss
  loss = loss_fn(y_pred, y_train)
  #print(f"Loss : {loss}")


  #4. Optimizer Zero grad
  optimizer.zero_grad()     #  usually optimizer.zero_grad() should always be invoked before loss.backward() and not in between loss.backward() and optimizer.step()

  #5. Perform backpropagation on the loss w.r.t parameters of the model
  loss.backward()

  #6. Step the optimizer
  optimizer.step()



  ## Testing -
  model_0.eval() # Turn off different setting in the model not needed for evaluation/testing
  with torch.inference_mode(): #Inference mode does not use/perform gradient dicent while predicting because we do not need any gradient decent to happen when doing prediction. Its only needed while training.
# This is same as with torch.no_grad():
    # do the forward pass
    test_pred = model_0(X_test)

    # calculate the test loss
    test_loss = loss_fn(test_pred, y_test)

  if epoch % 10 == 0:
    print(f"Epoch : {epoch} , Train Loss : {loss}, Test Loss : {test_loss}")
    print(f"Parameters : {model_0.state_dict()} \n")


Epoch : 0 , Train Loss : 0.6860889196395874, Test Loss : 0.7637526988983154
Parameters : OrderedDict([('weight', tensor([0.8784])), ('bias', tensor([0.9050]))]) 

Epoch : 10 , Train Loss : 0.5708791017532349, Test Loss : 0.6290428042411804
Parameters : OrderedDict([('weight', tensor([0.8394])), ('bias', tensor([0.8050]))]) 

Epoch : 20 , Train Loss : 0.45566922426223755, Test Loss : 0.4943329691886902
Parameters : OrderedDict([('weight', tensor([0.8004])), ('bias', tensor([0.7050]))]) 

Epoch : 30 , Train Loss : 0.34045934677124023, Test Loss : 0.35962313413619995
Parameters : OrderedDict([('weight', tensor([0.7614])), ('bias', tensor([0.6050]))]) 

Epoch : 40 , Train Loss : 0.2252494841814041, Test Loss : 0.2249133139848709
Parameters : OrderedDict([('weight', tensor([0.7224])), ('bias', tensor([0.5050]))]) 

Epoch : 50 , Train Loss : 0.1100396141409874, Test Loss : 0.09020347893238068
Parameters : OrderedDict([('weight', tensor([0.6834])), ('bias', tensor([0.4050]))]) 

Epoch : 60 , 

In [14]:
model_0.state_dict()

OrderedDict([('weight', tensor([0.6977])), ('bias', tensor([0.3080]))])

In [15]:
with torch.inference_mode():
  y_pred_new = model_0(X_test)

y_pred_new

tensor([[0.8661],
        [0.8801],
        [0.8940],
        [0.9080],
        [0.9220],
        [0.9359],
        [0.9499],
        [0.9638],
        [0.9778],
        [0.9917]])

In [16]:
#Comparing with original y_test
y_test

tensor([[0.8600],
        [0.8740],
        [0.8880],
        [0.9020],
        [0.9160],
        [0.9300],
        [0.9440],
        [0.9580],
        [0.9720],
        [0.9860]])


**1. Why we use model.train()**

--> *model.train() tells your model that you are training the model. This helps inform layers such as Dropout and BatchNorm, which are designed to behave differently during training and evaluation. For instance, in training mode, BatchNorm updates a moving average on each new batch; whereas, for evaluation mode, these updates are frozen.*

*More details: model.train() sets the mode to train. You can call either model.eval() or model.train(mode=False) to tell that you are testing. It is somewhat intuitive to expect train function to train model but it does not do that. It just sets the mode.*

Source - https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch/51433411#51433411?newreg=d75231dcb3114a89bad77134a7d8c2f1

**2. About optimizer.zero_grad()**

--> *optimizer.zero_grad() should always be invoked before loss.backward(). This ensures that the gradients are properly zeroed out and then computed and stored in the appropriate tensors' grad field*

https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch

https://datascience.stackexchange.com/questions/124487/what-do-we-mean-by-optimizer-zero-grad

https://github.com/yunjey/pytorch-tutorial/issues/238


**3. About model.eval()**

--> *model.eval() is a kind of switch for some specific layers/parts of the model that behave differently during training and inference (evaluating) time. For example, Dropouts Layers, BatchNorm Layers etc. You need to turn them off during model evaluation, and .eval() will do it for you. In addition, the common practice for evaluating/validation is using torch.no_grad() in pair with model.eval() to turn off gradients computation*


###Saving The Model

There are 3 methodes used in saving the model and unloading-
1. torch.save() - saves Pytorch object in Python pickle form
2. torch.load() - allows you to load a saved Pytorch Object
3. torch.nn.Module.load_state_dict() - This allows to load a model's saved state dictionary.

Pytorch Save and load model : - https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [17]:
# Saving our Pytorch model

from pathlib import Path

#1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok="True")

#2. Create model save path
MODEL_NAME = "01_pytorch_workflow.pth" # we can use .pth or .pt to save the PyTorch objects
MODEL_SAVE_PATH = MODEL_PATH/MODEL_NAME
print(MODEL_SAVE_PATH)

models/01_pytorch_workflow.pth


In [18]:
#3. Save the model
torch.save(obj=model_0.state_dict(),
           f = MODEL_SAVE_PATH)

In [19]:
model_0.state_dict()

OrderedDict([('weight', tensor([0.6977])), ('bias', tensor([0.3080]))])

In [20]:
!ls -l models

total 4
-rw-r--r-- 1 root root 1568 Feb  7 16:37 01_pytorch_workflow.pth



 **Loading the Model to the instance**

In [21]:
# Loading the Model to the instance.
# First we need to create an instance of the Algorithm/model we are trying to create.

loaded_model_0 = LinearRegressionModel()

# Load the saved state_dict of model_0 to the instance created above (loaded_model_0)

loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

<All keys matched successfully>

In [22]:
loaded_model_0.state_dict()

OrderedDict([('weight', tensor([0.6977])), ('bias', tensor([0.3080]))])

In [23]:
# Lets make some predictions
# erlier predictions were -
print(f"Earlier Predictions on X_test are : \n {y_pred_new}")

Earlier Predictions on X_test are : 
 tensor([[0.8661],
        [0.8801],
        [0.8940],
        [0.9080],
        [0.9220],
        [0.9359],
        [0.9499],
        [0.9638],
        [0.9778],
        [0.9917]])


In [24]:
# Make prediction from the model that was save and then loaded back
loaded_model_0.eval()
with torch.inference_mode():
  loaded_model_pred = loaded_model_0(X_test)
loaded_model_pred

tensor([[0.8661],
        [0.8801],
        [0.8940],
        [0.9080],
        [0.9220],
        [0.9359],
        [0.9499],
        [0.9638],
        [0.9778],
        [0.9917]])

In [25]:
y_pred_new == loaded_model_pred

tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])