### Performing Gradient Descent manually on numpy inputs

In [5]:
import numpy as np

# f = w * x
# f = 2 * x

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# Model Prediction
def forward(x):
    return w*x

# Loss = MSE 
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

# Gradient 
# MSE = ((y_predicted-y)**2).mean() = 1/N * (w*x - y)**2
# dJ/dw = 1/N (2x*(w*x - y))

def gradient(x, y, y_predicted):
    return np.dot(2*x, y_predicted-y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass 
    y_pred = forward(X)

    # loss 
    l = loss(Y, y_pred)

    # gradients
    dw = gradient(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw 

    if epoch % 2 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 3: w = 1.872, loss = 0.76800019
epoch 5: w = 1.980, loss = 0.01966083
epoch 7: w = 1.997, loss = 0.00050331
epoch 9: w = 1.999, loss = 0.00001288
epoch 11: w = 2.000, loss = 0.00000033
epoch 13: w = 2.000, loss = 0.00000001
epoch 15: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


### Performing Gradient Descent using Autograd on Torch inputs

In [9]:
import torch 
# Model Prediction

X = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad=True) 

def forward(x):
    return w*x

# Loss = MSE 
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass 
    y_pred = forward(X)

    # loss 
    l = loss(Y, y_pred)

    # gradients = backward pass
    l.backward() # dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # zero gradients
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


#### Performing Gradient descend using Autograd on Torch Inputs and also using Pytorch loss and optimizer functions

1. Design Model (input, output_size, forward_pass)
2. construct loss and optimizer
3. Training loop

- Forward pass: Compute prediction
- backward pass: gradients
- update weights

In [9]:
import torch 
import torch.nn as nn

X = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

x_test = torch.tensor([5], dtype = torch.float32)
n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)

print(f'Prediction before training: f(5)= {model(x_test).item()}')

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    #loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward()

    # update weights
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction before training: f(5)= {model(x_test).item()}')


4 1
Prediction before training: f(5)= 1.5337207317352295
epoch 1: w = 0.404, loss = 19.23918915
epoch 11: w = 1.414, loss = 0.71782839
epoch 21: w = 1.586, loss = 0.22582799
epoch 31: w = 1.623, loss = 0.20103538
epoch 41: w = 1.638, loss = 0.18903276
epoch 51: w = 1.650, loss = 0.17802233
epoch 61: w = 1.660, loss = 0.16766024
epoch 71: w = 1.670, loss = 0.15790151
epoch 81: w = 1.680, loss = 0.14871079
epoch 91: w = 1.689, loss = 0.14005506
Prediction before training: f(5)= 9.377370834350586


In [10]:
import torch 
import torch.nn as nn

X = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

x_test = torch.tensor([5], dtype = torch.float32)
n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)

print(f'Prediction before training: f(5)= {model(x_test).item()}')

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    #loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward()

    # update weights
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction before training: f(5)= {model(x_test).item()}')

4 1
Prediction before training: f(5)= 0.1034095287322998
epoch 1: w = 0.241, loss = 28.03874016
epoch 11: w = 1.460, loss = 0.85980058
epoch 21: w = 1.663, loss = 0.14879555
epoch 31: w = 1.704, loss = 0.12303431
epoch 41: w = 1.717, loss = 0.11543064
epoch 51: w = 1.726, loss = 0.10870057
epoch 61: w = 1.734, loss = 0.10237332
epoch 71: w = 1.742, loss = 0.09641462
epoch 81: w = 1.750, loss = 0.09080279
epoch 91: w = 1.757, loss = 0.08551763
Prediction before training: f(5)= 9.513472557067871
