In [1]:
# pytorch
# Same as numpy
# The main diff is pytorch works on a gpu
import numpy as np
import torch
import pandas as pd

In [2]:
inp = np.arange(58,387,step = 5.5, dtype=np.float32).reshape(10,6)
inp.dtype

dtype('float32')

In [3]:
actual = np.arange(6000,9000,step = 320.7,dtype=np.float32)
actual.dtype
actual.shape

(10,)

In [4]:
#Convert everything to torch
inp= torch.from_numpy(inp)
actual = torch.from_numpy(actual)

In [5]:
# Initialise the wt's and bias
# requires grad will make sure that gradients will be automatically calculated for the tensor
# Define the datatypes to match the input and output tesnsor
wt = torch.randn(6, requires_grad=True)
bias = torch.randn(1, requires_grad=True)

In [6]:
wt.dtype,bias.dtype

(torch.float32, torch.float32)

In [7]:
# Model formula
def model(inp):
    return inp @ wt.t() + bias

In [8]:
# Example of model calculation
pred = model(inp)
pred, '+'*100 , inp

(tensor([12.7866, 13.0640, 13.3413, 13.6186, 13.8960, 14.1733, 14.4507, 14.7280,
         15.0053, 15.2827], grad_fn=<AddBackward0>),
 '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++',
 tensor([[ 58.0000,  63.5000,  69.0000,  74.5000,  80.0000,  85.5000],
         [ 91.0000,  96.5000, 102.0000, 107.5000, 113.0000, 118.5000],
         [124.0000, 129.5000, 135.0000, 140.5000, 146.0000, 151.5000],
         [157.0000, 162.5000, 168.0000, 173.5000, 179.0000, 184.5000],
         [190.0000, 195.5000, 201.0000, 206.5000, 212.0000, 217.5000],
         [223.0000, 228.5000, 234.0000, 239.5000, 245.0000, 250.5000],
         [256.0000, 261.5000, 267.0000, 272.5000, 278.0000, 283.5000],
         [289.0000, 294.5000, 300.0000, 305.5000, 311.0000, 316.5000],
         [322.0000, 327.5000, 333.0000, 338.5000, 344.0000, 349.5000],
         [355.0000, 360.5000, 366.0000, 371.5000, 377.0000, 382.5000]]))

In [9]:
# Calculation of the loss function - mse
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

# calculation of loss considering random wts and bias is initialized
loss = mse(pred,actual)
loss

tensor(56038804., grad_fn=<DivBackward0>)

In [10]:
loss.backward()

In [11]:
print(wt)
print(wt.grad)
print(bias.grad)

tensor([-0.8699,  0.5496, -0.3244,  0.2490,  0.4581, -0.0539],
       requires_grad=True)
tensor([-3242695.5000, -3324415.5000, -3406136.0000, -3487856.2500,
        -3569576.5000, -3651296.7500])
tensor([-14858.2334])


In [12]:
# -ve gradient - increase wt - less loss
# -ve gradient - decrease wt - more loss

# +ve gradient - increase wt - more loss
# +ve gradient - decrease wt - less loss

# Increase and decrease of loss is proportional to the gradient of the loss

# torch.no_grad that the gradient is not calculated while updating the wt's and the bias

# 1e-5 is the learning rate to make sure we move slowly towards the minimum of cost function

with torch.no_grad():
    wt -= wt.grad * 1e-5
    bias -= bias.grad * 1e-5

tensor(56038804., grad_fn=<DivBackward0>)

In [13]:
# grad_zero_() makes sure that the gradients are not added to .grad and are made equal to 0
wt.grad.zero_()
bias.grad.zero_()

tensor([0.])

In [14]:
# Loss calcuation to check if the loss improves after gradient descent

preds = model(inp)
print(preds)

loss = mse(preds, actual)
print(loss)

tensor([14930.9082, 21756.2363, 28581.5684, 35406.8984, 42232.2305, 49057.5586,
        55882.8867, 62708.2148, 69533.5469, 76358.8672],
       grad_fn=<AddBackward0>)
tensor(1.8084e+09, grad_fn=<DivBackward0>)


In [15]:
with torch.no_grad():
    wt -= wt.grad * 1e-5
    bias -= bias.grad * 1e-5
    wt.grad.zero_()
    bias.grad.zero_()

In [16]:
print(wt)
print(bias)

tensor([31.5571, 33.7937, 33.7370, 35.1276, 36.1538, 36.4590],
       requires_grad=True)
tensor([0.2911], requires_grad=True)


In [17]:
preds = model(inp)
loss = mse(preds, actual)
print(loss)

tensor(1.8084e+09, grad_fn=<DivBackward0>)


In [26]:
for i in range(100):
    preds = model(inp)
    loss = mse(preds, actual)
    loss.backward()
    with torch.no_grad():
        wt -= wt.grad * 1e-8
        bias -= bias.grad * 1e-8
        wt.grad.zero_()
        bias.grad.zero_()
    print(loss)

tensor(1.1706e+08, grad_fn=<DivBackward0>)
tensor(1.1551e+08, grad_fn=<DivBackward0>)
tensor(1.1398e+08, grad_fn=<DivBackward0>)
tensor(1.1247e+08, grad_fn=<DivBackward0>)
tensor(1.1098e+08, grad_fn=<DivBackward0>)
tensor(1.0951e+08, grad_fn=<DivBackward0>)
tensor(1.0806e+08, grad_fn=<DivBackward0>)
tensor(1.0663e+08, grad_fn=<DivBackward0>)
tensor(1.0523e+08, grad_fn=<DivBackward0>)
tensor(1.0384e+08, grad_fn=<DivBackward0>)
tensor(1.0247e+08, grad_fn=<DivBackward0>)
tensor(1.0112e+08, grad_fn=<DivBackward0>)
tensor(99783600., grad_fn=<DivBackward0>)
tensor(98469440., grad_fn=<DivBackward0>)
tensor(97173368., grad_fn=<DivBackward0>)
tensor(95895144., grad_fn=<DivBackward0>)
tensor(94634504., grad_fn=<DivBackward0>)
tensor(93391240., grad_fn=<DivBackward0>)
tensor(92165096., grad_fn=<DivBackward0>)
tensor(90955816., grad_fn=<DivBackward0>)
tensor(89763208., grad_fn=<DivBackward0>)
tensor(88587008., grad_fn=<DivBackward0>)
tensor(87426992., grad_fn=<DivBackward0>)
tensor(86282960., grad

In [27]:
preds = model(inp)
loss = mse(preds, actual)
print(loss)

tensor(32514160., grad_fn=<DivBackward0>)


In [28]:
preds

tensor([ 3773.3501,  5489.8921,  7206.4336,  8922.9756, 10639.5166, 12356.0586,
        14072.6006, 15789.1426, 17505.6836, 19222.2246],
       grad_fn=<AddBackward0>)

In [29]:
actual

tensor([6000.0000, 6320.7002, 6641.4004, 6962.1006, 7282.8008, 7603.5010,
        7924.2012, 8244.9014, 8565.6016, 8886.3018])