In [1]:
# Source Doc: https://jovian.ml/aakashns/02-linear-regression

import numpy as np
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')
inputs = torch.from_numpy(inputs)
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [3]:
inputs.shape

torch.Size([5, 3])

In [4]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')
targets = torch.from_numpy(targets)
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [5]:
targets.shape

torch.Size([5, 2])

>**Linear Regression**

    yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1
    yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2

In [6]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True) # 2 outputs and 3 features
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.3716, -1.0864, -0.0716],
        [ 1.7949, -1.0946, -0.3089]], requires_grad=True)
tensor([0.7005, 0.2067], requires_grad=True)


#### Regression Model in Matrix Notation
>### X * W<sup>T</sup> + B

In [7]:
# Define model
def reg_model(x):
    return x @ w.t() + b

In [8]:
# Generate Predictions based on random initialisation of weights
preds = reg_model(inputs)
preds

tensor([[-102.2943,   44.6157],
        [-133.3009,   47.4516],
        [-181.3603,   -8.2244],
        [ -86.5678,  124.7891],
        [-134.2460,   -2.6449]], grad_fn=<AddBackward0>)

In [9]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [10]:
# MSE loss
# def mse(t1, t2):
#     diff = t1 - t2
#     return torch.sum(diff * diff) / diff.numel() # numel returns number of tensor

# MSE loss: Alterate implementation
def mse(t1, t2):
    diff = t1 - t2
    return torch.mean(diff**2)

In [11]:
# Compute loss
loss  = mse(preds, targets)
loss

tensor(27523.1465, grad_fn=<MeanBackward0>)

In [12]:
# Compute gradients
loss.backward()

In [13]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[-0.3716, -1.0864, -0.0716],
        [ 1.7949, -1.0946, -0.3089]], requires_grad=True)
tensor([[-16926.4219, -19431.3027, -11713.4082],
        [ -3690.2998,  -6648.0132,  -3595.3179]])


In [14]:
# Before we proceed, we reset the gradients to zero by calling .zero_() method. 
# We need to do this, because PyTorch accumulates, gradients i.e. 
# the next time we call .backward on the loss, 
# the new gradient values will get added to the existing gradient values, 
# which may lead to unexpected results
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


### Adjust weights and biases using gradient descent

We'll reduce the loss and improve our model using the gradient descent optimization algorithm, which has the following steps:

 Generate predictions

    Calculate the loss

    Compute gradients w.r.t the weights and biases

    Adjust the weights by subtracting a small quantity proportional to the gradient

    Reset the gradients to zero

Let's implement the above step by step.

In [15]:
# Generate predictions
preds = reg_model(inputs)
print(preds)

tensor([[-102.2943,   44.6157],
        [-133.3009,   47.4516],
        [-181.3603,   -8.2244],
        [ -86.5678,  124.7891],
        [-134.2460,   -2.6449]], grad_fn=<AddBackward0>)


In [16]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(27523.1465, grad_fn=<MeanBackward0>)


In [17]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-16926.4219, -19431.3027, -11713.4082],
        [ -3690.2998,  -6648.0132,  -3595.3179]])
tensor([-203.7539,  -50.8026])


In [18]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

A few things to note above:

    We use torch.no_grad to indicate to PyTorch that we shouldn't track, calculate or modify gradients while updating the weights and biases. 

    We multiply the gradients with a really small number (10^-5 in this case), to ensure that we don't modify the weights by a really large amount, since we only want to take a small step in the downhill direction of the gradient. This number is called the learning rate of the algorithm. 

    After we have updated the weights, we reset the gradients back to zero, to avoid affecting any future computations.

In [19]:
print(w)
print(b)

tensor([[-0.2024, -0.8921,  0.0456],
        [ 1.8318, -1.0281, -0.2729]], requires_grad=True)
tensor([0.7025, 0.2072], requires_grad=True)


In [20]:
# Calculate loss
preds = reg_model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(19579.8086, grad_fn=<MeanBackward0>)


In [21]:
# Train for 100 epochs
for i in range(100):
    preds = reg_model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [22]:
# calculate loss
preds = reg_model(inputs)
loss = mse(preds,targets)
loss

tensor(932.6644, grad_fn=<MeanBackward0>)

## Linear regression using PyTorch built-ins

In [23]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [24]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [25]:
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [26]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 73.,  67.,  43.],
        [ 69.,  96.,  70.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [ 73.,  67.,  43.]])
tensor([[ 56.,  70.],
        [103., 119.],
        [ 81., 101.],
        [119., 133.],
        [ 56.,  70.]])


In [27]:
# Define model
reg_model2 = nn.Linear(3, 2)
print(reg_model2.weight)
print(reg_model2.bias)

Parameter containing:
tensor([[-0.4432,  0.3406, -0.0900],
        [-0.0532, -0.3979, -0.2754]], requires_grad=True)
Parameter containing:
tensor([0.3045, 0.3222], requires_grad=True)


In [28]:
# Parameters
list(reg_model2.parameters())

[Parameter containing:
 tensor([[-0.4432,  0.3406, -0.0900],
         [-0.0532, -0.3979, -0.2754]], requires_grad=True),
 Parameter containing:
 tensor([0.3045, 0.3222], requires_grad=True)]

In [29]:
preds = reg_model2(inputs)
preds

tensor([[-13.0981, -42.0692],
        [-15.8127, -57.1679],
        [  2.1687, -73.6082],
        [-33.5863, -32.4102],
        [ -3.8769, -60.8324],
        [-13.0981, -42.0692],
        [-15.8127, -57.1679],
        [  2.1687, -73.6082],
        [-33.5863, -32.4102],
        [ -3.8769, -60.8324],
        [-13.0981, -42.0692],
        [-15.8127, -57.1679],
        [  2.1687, -73.6082],
        [-33.5863, -32.4102],
        [ -3.8769, -60.8324]], grad_fn=<AddmmBackward>)

In [30]:
# Define loss function
loss_fn = F.mse_loss
loss_mod2 = loss_fn(reg_model2(inputs),targets)
print(loss_mod2)

tensor(15973.0303, grad_fn=<MseLossBackward>)


In [31]:
# Define optimizer
# model.parameters() is passed as an argument to optim.SGD, 
# so that the optimizer knows which matrices should be modified during the update step
opt = torch.optim.SGD(reg_model2.parameters(), lr=1e-5)

In [32]:
# Define a function to train model
# we'll work batches of data, instead of processing the entire training data in every iteration
def fit_model(num_epochs, model, loss_fn, optimizer):
    # Repeat for given epochs
    for epoch in range(num_epochs):
        # Train with batch data
        for xb, yb in train_dl:
            # Generate predictions
            pred = reg_model2(xb)
            # Calculate loss
            loss = loss_fn(pred, yb)
            # Compute gradients
            loss.backward()
            # Update parameters using gradients
            optimizer.step()
            # Reset gradients to zero
            optimizer.zero_grad()
            
        # Print the progress
        if (epoch+1) %10 == 0:
            # loss.item returns the actual value stored in the loss tensor
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [33]:
# Train model
fit_model(100, reg_model2, loss_fn, opt)

Epoch [10/100], Loss: 177.6140
Epoch [20/100], Loss: 86.6753
Epoch [30/100], Loss: 120.5985
Epoch [40/100], Loss: 116.1549
Epoch [50/100], Loss: 135.8946
Epoch [60/100], Loss: 33.7600
Epoch [70/100], Loss: 51.7913
Epoch [80/100], Loss: 41.3989
Epoch [90/100], Loss: 34.8453
Epoch [100/100], Loss: 20.6047


In [34]:
preds = reg_model2(inputs)
preds

tensor([[ 58.1252,  72.3234],
        [ 80.0964,  98.3822],
        [123.0555, 136.8588],
        [ 25.2073,  46.5031],
        [ 95.8218, 109.5451],
        [ 58.1252,  72.3234],
        [ 80.0964,  98.3822],
        [123.0555, 136.8588],
        [ 25.2073,  46.5031],
        [ 95.8218, 109.5451],
        [ 58.1252,  72.3234],
        [ 80.0964,  98.3822],
        [123.0555, 136.8588],
        [ 25.2073,  46.5031],
        [ 95.8218, 109.5451]], grad_fn=<AddmmBackward>)

In [35]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])