Gradient Descent and Training Pipeline for a quick example about linear regression

In [1]:
# doing everything manually Linear Regression
import numpy as np

# f = w*x

# f = 2*x

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0


# model prediction
def forward(x):
    return w * x


# loss MSE loss function
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()


# gradient
# MSE = 1/N * (w*x - y) ** 2
# gradient: dj/dw = 1/n * 2 * (w*x-y) * x = mean * 2 * (y_pred-y) * x


def gradient(x, y, y_pred):
    return np.dot(2 * x, y_pred - y).mean()


print(f"Prediction before training: f(5) = {forward(5):.3f}")

# training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    # gradient
    dw = gradient(X, Y, y_pred)
    # update weights
    w -= learning_rate * dw

    if epoch % 1 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.80000067
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314574
epoch 7: w = 1.997, loss = 0.00050332
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206
epoch 11: w = 2.000, loss = 0.00000033
epoch 12: w = 2.000, loss = 0.00000005
epoch 13: w = 2.000, loss = 0.00000001
epoch 14: w = 2.000, loss = 0.00000000
epoch 15: w = 2.000, loss = 0.00000000
epoch 16: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 18: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
epoch 20: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [2]:
# use autograd for computing the gradient, others are manually
import torch

# f = w*x

# f = 2*x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, requires_grad=True, dtype=torch.float32)


# model prediction
def forward(x):
    return w * x


# loss MSE loss function
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()


print(f"Prediction before training: f(5) = {forward(5):.3f}")

# training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    # gradient = backward pass, then the grad will stored in w.grad()
    l.backward()
    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    # zero grad
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [3]:
# only the prediction is manually, we will use loss function  and optimizer from pytorch
"""
Steps of training model in pytorch
1. design model (input, output size, forward pass)
2. construct loss and optimizer
3. training loop
    - forward pass: compute prediction
    - backward pass: gradients
    - update weight
"""

import torch
import torch.nn as nn

# f = w*x

# f = 2*x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, requires_grad=True, dtype=torch.float32)


# model prediction
def forward(x):
    return w * x


print(f"Prediction before training: f(5) = {forward(5):.3f}")

# training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()  # use the neural network mse loss
optimizer = torch.optim.SGD(
    [w], lr=learning_rate
)  # use the sochastic gradient descent optimizer

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    # gradient = backward pass
    l.backward()
    # update weights - optimization
    optimizer.step()
    # still have to clear the gradient before continuing
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [None]:
# everything will use pytorch lib (forward, backward, optimizer)
"""
Steps of training model in pytorch
1. design model (input, output size, forward pass)
2. construct loss and optimizer
3. training loop
    - forward pass: compute prediction using linear model
    - backward pass: gradients
    - update weight
"""

import torch
import torch.nn as nn

# f = w*x

# f = 2*x

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)
n_samples, n_features = X.shape

input_size = n_features
output_size = n_features


model = nn.Linear(input_size, output_size)


# how to define/create your own model with pytorch
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)


print(f"Prediction before training: f(5) = {model(X_test).item():.3f}")

# training
learning_rate = 0.1
n_iters = 100

loss = nn.MSELoss()  # use the neural network mse loss
optimizer = torch.optim.SGD(
    model.parameters(), lr=learning_rate
)  # use the sochastic gradient descent optimizer

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    # loss
    l = loss(Y, y_pred)
    # gradient = backward pass
    l.backward()
    # update weights - optimization
    optimizer.step()
    # still have to clear the gradient before continuing
    optimizer.zero_grad()

    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f"epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}, ")

print(f"Prediction after training: f(5) = {model(X_test).item():.3f}")

Prediction before training: f(5) = -1.201
epoch 1: w = 3.421, loss = 41.18035889, 
epoch 11: w = 2.035, loss = 0.01383961, 
epoch 21: w = 2.007, loss = 0.00007626, 
epoch 31: w = 2.005, loss = 0.00003904, 
epoch 41: w = 2.004, loss = 0.00002125, 
epoch 51: w = 2.003, loss = 0.00001157, 
epoch 61: w = 2.002, loss = 0.00000630, 
epoch 71: w = 2.001, loss = 0.00000343, 
epoch 81: w = 2.001, loss = 0.00000187, 
epoch 91: w = 2.001, loss = 0.00000102, 
Prediction after training: f(5) = 10.001
