Linear Regression and Gradient Descent using basic pytorch operations

In [2]:
import numpy as np
import torch

In [3]:
inputs=np.array([[73, 67, 43],
                 [91, 88, 64],
                 [87, 134, 58],
                 [102, 43, 37],
                 [69, 96, 70.]], dtype='float32')

In [4]:
targets=np.array([[56, 70],
                  [81, 101],
                  [119, 133],
                  [22, 37],
                  [103, 119]], dtype='float32')

convert array into pytorch tensors

In [5]:
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)
print(targets)
print(inputs)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])


Linear Regression Model

In [6]:
w=torch.randn(2,3, requires_grad=True)
b=torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.7042, -1.5674, -0.3579],
        [ 1.7298, -1.3018,  1.2664]], requires_grad=True)
tensor([-0.9878,  0.1373], requires_grad=True)


randon weights

In [7]:
def model(x):
  return x @ w.t()+b

@ means matrix multiplication and t() defines the transpose

In [8]:
preds=model(inputs)
print(preds)

tensor([[ -69.9865,   93.6475],
        [ -97.7417,  124.0402],
        [-170.5096,   49.6385],
        [  -9.8022,  167.4584],
        [-127.9193,   83.1674]], grad_fn=<AddBackward0>)


these are the predicted values

In [9]:
 print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [10]:
diff=preds-targets


In [11]:
diff
diff.numel()

10

In [12]:
torch.sum(diff*diff)

tensor(212314.6250, grad_fn=<SumBackward0>)

Loss function

In [13]:
def mse(t1, t2):
  diff=t1-t2
  return torch.sum(diff*diff)/diff.numel()


In [14]:
loss=mse(preds, targets)
print(loss)

tensor(21231.4629, grad_fn=<DivBackward0>)


52.561 is error between actual and predicted

compute Gradients

In [15]:
loss.backward()

In [16]:
print(w)
print(w.grad)

tensor([[ 0.7042, -1.5674, -0.3579],
        [ 1.7298, -1.3018,  1.2664]], requires_grad=True)
tensor([[-1.3965e+04, -1.7300e+04, -1.0198e+04],
        [ 1.4810e+03, -1.0777e+03, -4.9734e+00]])


In [17]:
print(b)
print(b.grad)

tensor([-0.9878,  0.1373], requires_grad=True)
tensor([-171.3918,   11.5904])


if gradient element is positive
increasing the weight element's value slightly will increase the loss
decreasing the weight element's value slightly will decrease the loss

If a gradient element is negative
increasing the weight element's value slightly will decrease the loss
decreasing the weight element's value slightly will increase the loss

Adjusting weights and bias

In [19]:
with torch.no_grad():
  w-=w.grad*1e-5
  b-=b.grad*1e-5
##multiplying ensures that weights do not differ by large value which is learning rate
##torch.no_grad to indicate to PyTorch that we shouldn't track, calculate, or modify gradients while updating the weights and biases.

In [21]:
##we reset the gradients to zero by invoking the .zero_() method. We need to do this because PyTorch accumulates gradients. Otherwise, the next time we invoke .backward on the loss, the new gradient values are added to the existing gradients, which may lead to unexpected results.
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [25]:
print(w, b)

tensor([[ 0.9835, -1.2214, -0.1539],
        [ 1.7002, -1.2803,  1.2665]], requires_grad=True) tensor([-0.9844,  0.1371], requires_grad=True)


Train the model using Gradient Descent

In [22]:
preds=model(inputs)
print(preds)

tensor([[-17.6413,  92.9335],
        [-28.8198, 123.2478],
        [-88.0126,  49.9555],
        [ 41.1152, 165.3676],
        [-61.1503,  83.1997]], grad_fn=<AddBackward0>)


In [23]:
loss=mse(preds, targets)
print(loss)

tensor(11332.5527, grad_fn=<DivBackward0>)


Train multiple multiple epochs

the process of adjusting the weights and bias using gradients multiple time and each iteration is called epochs

In [31]:
for i in range(500):
  preds=model(inputs)
  loss=mse(preds, targets)
  loss.backward()
  with torch.no_grad():
    w-=w.grad*1e-5
    b-=b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()

In [32]:
preds=model(inputs)
loss=mse(preds, targets)
print(loss)

tensor(5.9635, grad_fn=<DivBackward0>)


In [33]:
preds

tensor([[ 57.2448,  70.2242],
        [ 81.3412, 102.8011],
        [120.5308, 128.2489],
        [ 22.4152,  36.6258],
        [ 99.4407, 123.0959]], grad_fn=<AddBackward0>)

In [34]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])