# Autograd
Computing Gradient in Pytorch

require_grad = True

Tells pytorch to compute the gradient for the tensor for the operation that has been performed
i.e. The backpropagation for the specific perceptron layer

Jacobian Matrix of Chain Rule

In [1]:
import torch

In [3]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x + 2
print(y) # Will show the AddBackward
z = y * 2 
print(z) # Will show the MulBackward
z = z.mean()
print(z)

tensor([-0.7851, -0.5255, -0.8914], requires_grad=True)
tensor([1.2149, 1.4745, 1.1086], grad_fn=<AddBackward0>)
tensor([2.4299, 2.9490, 2.2173], grad_fn=<MulBackward0>)
tensor(2.5321, grad_fn=<MeanBackward0>)


In [4]:
# to initiate the computation of gradient w.r.t x
z.backward()
print(x.grad)

tensor([0.6667, 0.6667, 0.6667])


# To prevent gradient update

In [8]:
x = torch.randn(3, requires_grad=True)
print(x)

x.requires_grad_(False) # In-place
y = x.detach()
print(y)
with torch.no_grad():
    y = x + 2
    print(y)


tensor([ 0.4356, -2.1155, -0.8405], requires_grad=True)
tensor([ 0.4356, -2.1155, -0.8405])
tensor([ 2.4356, -0.1155,  1.1595])


# Simulate gradient update

In [12]:
import torch

weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    """
    optimizer = torch.optim.SGD(weights, lr=0.001)
    optimizer.step()
    optimizer.zero_grad()
    """
    model_output = (weights * 3).sum()
    
    # Inititate Backprop
    model_output.backward()

    # Show accumulated gradient
    print(weights.grad)

    # Need to zero the gradient before performing the next epoch
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Manually Compute Simple Linear Regression
(Numpy)

In [4]:
import numpy as np

# We want our function
# f = W * x
# where W is 2

# Sample data
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

# initialise initial weight = 0
w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

# update gradient
# MSE = 1/N * (w*x -y)**2
# dJ/dw = 1/N * 2*x(w*x - y)
def gradient(x, y, y_predicted):
    # dJ/dw = 1/N * 2*x(w*x - y
    return np.dot(2*x, y_predicted-y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# training 
learning_rate = 0.01
n_iters = 20
for epoch in range(n_iters):
    # Prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    dw = gradient(X,Y,y_pred)

    # update weights
    w -= learning_rate*dw

    if epoch % 2 == 0:
        print(f'epoch {epoch+1}: w= {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w= 1.200, loss =30.00000000
epoch 3: w= 1.872, loss =0.76800019
epoch 5: w= 1.980, loss =0.01966083
epoch 7: w= 1.997, loss =0.00050332
epoch 9: w= 1.999, loss =0.00001288
epoch 11: w= 2.000, loss =0.00000033
epoch 13: w= 2.000, loss =0.00000001
epoch 15: w= 2.000, loss =0.00000000
epoch 17: w= 2.000, loss =0.00000000
epoch 19: w= 2.000, loss =0.00000000
Prediction after training: f(5) = 10.000


Manually Compute Linear Regression (Torch)

In [21]:
import torch

# We want our function
# f = W * x
# where W is 2

# Sample data
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

# initialise initial weight = 0
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# training 
learning_rate = 0.0.1
n_iters = 100
for epoch in range(n_iters):
    # Prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    l.backward() #dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradient
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w= {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w= 3.000, loss =30.00000000
epoch 11: w= 2.001, loss =0.00002861
epoch 21: w= 2.000, loss =0.00000000
epoch 31: w= 2.000, loss =0.00000000
epoch 41: w= 2.000, loss =0.00000000
epoch 51: w= 2.000, loss =0.00000000
epoch 61: w= 2.000, loss =0.00000000
epoch 71: w= 2.000, loss =0.00000000
epoch 81: w= 2.000, loss =0.00000000
epoch 91: w= 2.000, loss =0.00000000
Prediction after training: f(5) = 10.000


Pytorch Training Pipeline
1. Design model (Input, Output size , forward pass)
2. Construct loss and optimizer
3. Training loop  
    a. forward pass: compute prediction  
    b. backward pass: gradients  
    c. update weights


In [36]:
import torch
import torch.nn as nn

# We want our function
# f = W * x
# where W is 2

# Sample data
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
# initialise initial weight = 0
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model declaration
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.lin = nn.Linear(in_features=input_dim,
                             out_features=output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# Training 
learning_rate = 0.001
n_iters = 1000

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # Prediction = forward pass
    y_pred = model(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    l.backward() #dl/dw

    # update weights
    optimizer.step()
    
    # zero gradient
    optimizer.zero_grad()

    if epoch % 100 == 0:
        [w, b] = model.parameters()

        print(f'epoch {epoch+1}: w= {w[0][0].item():.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

4 1
Prediction before training: f(5) = -4.454
epoch 1: w= -0.696, loss =67.17584229
epoch 101: w= 1.456, loss =2.31811523
epoch 201: w= 1.856, loss =0.08370954
epoch 301: w= 1.932, loss =0.00651644
epoch 401: w= 1.947, loss =0.00364620
epoch 501: w= 1.951, loss =0.00334841
epoch 601: w= 1.953, loss =0.00315082
epoch 701: w= 1.955, loss =0.00296757
epoch 801: w= 1.956, loss =0.00279507
epoch 901: w= 1.957, loss =0.00263260
Prediction after training : f(5) = 9.915
