<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Defining-the-network" data-toc-modified-id="Defining-the-network-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Defining the network</a></span></li><li><span><a href="#Creating-an-object-of-MLP-class" data-toc-modified-id="Creating-an-object-of-MLP-class-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Creating an object of MLP class</a></span></li><li><span><a href="#Inputs-and-outputs" data-toc-modified-id="Inputs-and-outputs-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Inputs and outputs</a></span></li><li><span><a href="#Implementing-SGD-manually" data-toc-modified-id="Implementing-SGD-manually-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Implementing SGD manually</a></span><ul class="toc-item"><li><span><a href="#Method-1" data-toc-modified-id="Method-1-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Method 1</a></span></li><li><span><a href="#Method-2" data-toc-modified-id="Method-2-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Method 2</a></span></li></ul></li></ul></div>

# Implementing SGD manually

## Defining the network

In [7]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, n_in, n_hid, n_out):
        super(MLP, self).__init__()
        self.hid = nn.Linear(n_in, n_hid)
        self.out = nn.Linear(n_hid, n_out)
        
    def forward(self, x):
        y = self.hid(x)
        y = torch.relu(y)
        y = self.out(y)
        y = torch.sigmoid(y)
        return y

## Creating an object of MLP class

In [8]:
model = MLP(10, 5, 1)

## Inputs and outputs

In [9]:
x = torch.randn(15, 10)
y = torch.randn(15, 1)

## Implementing SGD manually

### Method 1

In [10]:
loss_fn = nn.MSELoss()
lr = 0.01

epochs = 10
for epoch in range(epochs):
    y_pred1 = model(x)
    loss1 = loss_fn(y, y_pred1)
    loss1.backward()
    for child in model.children(): #Get each layer of the model
        with torch.no_grad(): #requires_grad needs to be switched off while updating weights and biases
            child.weight -= lr*child.weight.grad #Updating weights
            child.bias   -= lr*child.bias.grad #Updating biases
        child.weight.grad.zero_() #Afer weights and biases are updated,
        child.bias.grad.zero_()   #set the gradiants of weights and biases to zero
    print("Epoch: ", "{:4d}/{:4d} ==>".format(epoch+1,epochs), "Loss:", loss1.item())

Epoch:     1/  10 ==> Loss: 1.0676789283752441
Epoch:     2/  10 ==> Loss: 1.066733717918396
Epoch:     3/  10 ==> Loss: 1.0657926797866821
Epoch:     4/  10 ==> Loss: 1.064855694770813
Epoch:     5/  10 ==> Loss: 1.0639225244522095
Epoch:     6/  10 ==> Loss: 1.0629934072494507
Epoch:     7/  10 ==> Loss: 1.062068223953247
Epoch:     8/  10 ==> Loss: 1.061146855354309
Epoch:     9/  10 ==> Loss: 1.0602294206619263
Epoch:    10/  10 ==> Loss: 1.0593159198760986


### Method 2

In [12]:
loss_fn = nn.MSELoss()
lr = 0.01

epochs = 10
for epoch in range(epochs):
    y_pred2 = model(x)
    loss2 = loss_fn(y, y_pred2)
    loss2.backward()
    for param in model.parameters():
        with torch.no_grad(): #requires_grad needs to be switched off while updating weights and biases
            param -= lr*param.grad #Updating parameters
        param.grad.zero_() #Afer weights and biases are updated, set the gradiants of parameters to zero
    print("Epoch: ", "{:4d}/{:4d} ==>".format(epoch+1,epochs), "Loss:", loss2.item())

Epoch:     1/  10 ==> Loss: 1.0495771169662476
Epoch:     2/  10 ==> Loss: 1.0487186908721924
Epoch:     3/  10 ==> Loss: 1.0478636026382446
Epoch:     4/  10 ==> Loss: 1.0470119714736938
Epoch:     5/  10 ==> Loss: 1.0461636781692505
Epoch:     6/  10 ==> Loss: 1.045318841934204
Epoch:     7/  10 ==> Loss: 1.0444772243499756
Epoch:     8/  10 ==> Loss: 1.043639063835144
Epoch:     9/  10 ==> Loss: 1.04280424118042
Epoch:    10/  10 ==> Loss: 1.0419726371765137
