In [2]:
import torch
from torch.autograd import Variable

In [2]:
x = torch.randn(4, requires_grad=True)

y = x + 10
z = y*y+2

z = z.mean()

z.backward() # dz/dx
print(x.grad)
print(z)

tensor([4.8736, 4.9543, 5.7929, 4.2864])
tensor(102.2277, grad_fn=<MeanBackward0>)


In [3]:
# this is how gradient computation is done using autograd  
w = Variable(torch.tensor([2.0]), requires_grad=True)
x = [10 ,17, 29]
y = [21, 32, 43]

L = 0.01

def forward(x):
  return x*w

def loss(x, y):
  y_pred=forward(x)
  return (y_pred-y)*(y_pred-y)

for epoch in range(10):
  for x_val, y_val in zip(x, y):
    l = loss(x_val, y_val)
    l.backward() # after calling backward, dl/dw is calculated and stored in w.grad

    w.data = w.data - L*w.grad.data # weight updation formula w' = w - L*(dl/dw)

    w.grad.data.zero_()#manually reset gradients to zero after updating
  print(' progress: ', epoch, l.data[0])

 progress:  0 tensor(1052.6139)
 progress:  1 tensor(5519017.)
 progress:  2 tensor(3.1597e+10)
 progress:  3 tensor(1.8068e+14)
 progress:  4 tensor(1.0332e+18)
 progress:  5 tensor(5.9080e+21)
 progress:  6 tensor(3.3784e+25)
 progress:  7 tensor(1.9319e+29)
 progress:  8 tensor(1.1047e+33)
 progress:  9 tensor(6.3170e+36)


In [4]:
t_x = torch.randint(17, 42, (10,))
t_y = torch.randint(8, 43, (10,))

t_y, t_x

(tensor([10, 21, 17, 35, 37, 26, 24, 14, 27, 21]),
 tensor([27, 22, 34, 28, 31, 24, 23, 27, 20, 25]))

In [5]:
def model(t_x, w, b):
  return t_x*w + b

def loss_fn(y_p, y):
  diff = (y_p-y)*(y_p-y)
  return diff.mean()

In [6]:
w = torch.ones(1, requires_grad=True)
b = torch.ones(1, requires_grad=True)
y_p = model(t_x, w, b)

y_p

tensor([28., 23., 35., 29., 32., 25., 24., 28., 21., 26.],
       grad_fn=<AddBackward0>)

In [7]:
loss = loss_fn(y_p, t_y)
loss

tensor(97.1000, grad_fn=<MeanBackward0>)

In [8]:
loss.backward()
w.grad, b.grad
# similarly, we can use a loop to train and update the weights

(tensor([235.6000]), tensor([7.8000]))

In [9]:
# MSEloss function of torch module
from torch.nn import MSELoss

loss = MSELoss()
input = torch.randn(10, 5, requires_grad=True)
targets = torch.randn(10, 5)
output = loss(input, targets)
output.backward()

In [13]:
output

tensor(1.9959, grad_fn=<MseLossBackward>)

In [12]:
output.grad_fn

<MseLossBackward at 0x7f273e173810>

In [20]:
# in the above block grad_fn is the derivative of loss function with respective to the parameters of the model, now we will display how is grad_fn computed 
def dloss_fn(t_y, y_p):   # this function is for after derivative of first part of derivative of loss function wrt 'w' 
  d_diff = 2*(t_y-y_p)
  return d_diff

def model(t_x, w, b):
  return t_x*w + b

def dmodel_w(t_x, w, b):
  return t_x

def dmodel_b(t_x, w, b):
  return 1.0

def grad_fn(t_x, t_y, y_p, w, b):
  dloss_dw = dloss_fn(t_y, y_p)*dmodel_w(t_x, w, b)
  dloss_db = dloss_fn(t_y, y_p)*dmodel_b(t_x, w, b)

  return torch.stack([dloss_dw.mean(), dloss_db.mean()])

In [None]:
params = torch.tensor([1.0, 0.0])

learning_rate = 1e-4

n_epochs = 100

for epoch in range(n_epochs):
  w, b = params

  y_pred = model(t_x, w, b)

  loss = loss_fn(y_pred, t_y)

  print('loss: ', loss)

  grad = grad_fn(t_x, t_y, y_pred, w, b)

  params = params - learning_rate*grad

  print('params: ', params)
params


In [35]:
model(10, 0.76, 0.99)

8.59

In [3]:
import numpy as np
#linear regression
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [39]:
torch.manual_seed(12) # to get the same random values every time we run this cell
weights = torch.randn(2,3, requires_grad=True)
bias = torch.randn(2, requires_grad=True)

In [40]:
def model(x, w, b):
  return x@w.t()+b  # @ represents matrix multiplication in pytorch

In [41]:
def mse(x, y, w, b):
  y_pred = model(x, w, b)
  l = (y-y_pred)**2
  return l.mean()  # mean squared error loss function

In [42]:
loss = mse(inputs, targets, weights, bias)

In [44]:
loss.backward()

In [45]:
print(weights)
print(weights.grad)

tensor([[-0.2138, -1.3780, -0.0546],
        [ 0.4515,  0.7858, -1.0884]], requires_grad=True)
tensor([[-17883.0391, -20738.8398, -12449.6934],
        [ -3700.6440,  -4621.4058,  -2911.5078]])


In [47]:
with torch.no_grad():
  weights -= weights.grad*1e-5
  bias -= bias.grad*1e-5
  weights.grad.zero_()
  bias.grad.zero_()

In [48]:
print(weights)

tensor([[-0.0350, -1.1706,  0.0699],
        [ 0.4886,  0.8320, -1.0593]], requires_grad=True)


In [50]:
#calculating loss again
predictions = model(inputs, weights, bias)
loss = mse(inputs, targets, weights, bias)
print(loss)
# the loss value decreased, so we will repeat this process in a loop for a number of times to optimize our weights

tensor(19015.7852, grad_fn=<MeanBackward0>)


In [55]:
n_epochs = 1000
for epoch in range(n_epochs):
  predictions = model(inputs, weights, bias)
  loss = mse(inputs, targets, weights, bias)
  loss.backward()
  with torch.no_grad():
    weights -= weights.grad*1e-5
    bias -= bias.grad*1e-5
    weights.grad.zero_()
    bias.grad.zero_()

In [56]:
preds = model(inputs, weights, bias)

In [57]:
preds

tensor([[ 57.0609,  70.7343],
        [ 82.4694,  95.9387],
        [118.2619, 143.0327],
        [ 21.0487,  39.8850],
        [102.2867, 109.0487]], grad_fn=<AddBackward0>)

In [58]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [None]:
#after training the model for over a 1000 epochs we are getting better results, these results are better given the size of dataset

In [None]:
#we will solve a similar probelm using torch built-ins

In [59]:
import torch.nn as nn

In [60]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [62]:
from torch.utils.data import TensorDataset

train_data = TensorDataset(inputs, targets)

train_data[4:7]

(tensor([[69., 96., 70.],
         [74., 66., 43.],
         [91., 87., 65.]]), tensor([[103., 119.],
         [ 57.,  69.],
         [ 80., 102.]]))

In [64]:
from torch.utils.data import DataLoader

batch_size = 5
train_dl = DataLoader(train_data, batch_size, shuffle=True)

print(train_dl)

<torch.utils.data.dataloader.DataLoader object at 0x7ffb734e16d0>


In [67]:
for x, y in train_dl:
  print(x)
  print(y)
  break

tensor([[ 73.,  67.,  43.],
        [103.,  43.,  36.],
        [ 87., 135.,  57.],
        [102.,  43.,  37.],
        [ 88., 134.,  59.]])
tensor([[ 56.,  70.],
        [ 20.,  38.],
        [118., 134.],
        [ 22.,  37.],
        [118., 132.]])


In [72]:
#model
model = nn.Linear(3, 2)
list(model.parameters())

[Parameter containing:
 tensor([[ 0.5208, -0.5472,  0.1047],
         [-0.5581, -0.1801, -0.0708]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0411, -0.2781], requires_grad=True)]

In [73]:
predicted = model(inputs)

predicted

tensor([[  5.8953, -56.1380],
        [  5.9758, -71.4552],
        [-21.9057, -77.0836],
        [ 33.5020, -67.5760],
        [ -9.2306, -61.0421],
        [  6.9633, -56.5160],
        [  6.6277, -71.3459],
        [-21.2803, -77.7126],
        [ 32.4341, -67.1980],
        [ -9.6467, -60.5548],
        [  6.5471, -56.0287],
        [  7.0438, -71.8332],
        [-22.5576, -77.1929],
        [ 33.9181, -68.0633],
        [-10.2985, -60.6641]], grad_fn=<AddmmBackward>)

In [75]:
# loss calculation

import torch.nn.functional as F

loss_fn = F.mse_loss
loss = loss_fn(predicted, targets)

loss

tensor(17376.6055, grad_fn=<MseLossBackward>)

In [76]:
#optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5) # here model params are passed as an argument because to specify which values to update

In [77]:
def fit(model, n_epochs, train_dl, loss_fn, opt):

  for epoch in range(n_epochs):

    for x, y in train_dl:
      preds = model(x)

      loss = loss_fn(preds, y)

      loss.backward()

      opt.step()

      opt.zero_grad()
    if (epoch+1)%10==0:
      print(epoch+1, " loss = ", loss.item())

In [84]:
fit(model, 300, train_dl, loss_fn, opt) #300 epochs

10  loss =  6.6666998863220215
20  loss =  6.683084964752197
30  loss =  4.599188804626465
40  loss =  5.565306663513184
50  loss =  4.115866661071777
60  loss =  6.152379989624023
70  loss =  3.1166186332702637
80  loss =  3.8137192726135254
90  loss =  4.441690921783447
100  loss =  2.767148733139038


In [85]:
model(inputs)

tensor([[ 57.0875,  70.5629],
        [ 81.4351,  99.3694],
        [119.4505, 135.5342],
        [ 21.4134,  38.5853],
        [100.4726, 115.9034],
        [ 55.8336,  69.4612],
        [ 81.1744,  99.2310],
        [119.6823, 136.0122],
        [ 22.6674,  39.6870],
        [101.4659, 116.8666],
        [ 56.8269,  70.4245],
        [ 80.1811,  98.2677],
        [119.7112, 135.6727],
        [ 20.4201,  37.6220],
        [101.7265, 117.0051]], grad_fn=<AddmmBackward>)

In [86]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

In [87]:
model(torch.tensor([23, 54, 21.]))

tensor([51.2971, 55.6723], grad_fn=<AddBackward0>)