# Gradients Example

**Part 1 (from scratch):**
- Prediction (Manually)
- Gradients Computation (Manually)
- Loss Computation (Manually)
- Parameter Updates (Manually)

**Part 2:**
- Prediction (Manually)
- Gradients Computation (Autograd)
- Loss Computation (Manually)
- Parameter Updates (Manually)

**Part 3:**
- Prediction (Manually)
- Gradients Computation (Autograd)
- Loss Computation (PyTorch loss)
- Parameter Updates (PyTorch optimizer)

**Part 4 (everything automated by PyTorch):**
- Prediction (PyTorch model)
- Gradients Computation (Autograd)
- Loss Computation (PyTorch loss)
- Parameter Updates (PyTorch optimizer)

# Code Implementation

In [14]:
import numpy as np
import torch
import torch.nn as nn


# 1. Design model (input, output size, forward pass)
# 2. Construct loss and optimizer
# 3. Training loop:
#  - forward pass: compute prediction
#  - backward pass: compute gradient
#  - update weights

## Part 1

In [2]:
# f = w * x
# f = 2 * x

X = np.array([1, 2, 3, 4], dtype=np.float32)
y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

In [3]:
# Calculate model prediction
def forward(X):
    return w * X

# Loss = MSE = 1/N * (w*x - y)^2 = K
def loss(y, y_hat):
    return ((y_hat-y)**2).mean()

# Gradient
# dK/dw = 1/N * 2x * (w*x - y)
def gradient(x, y, y_hat):
    return np.dot(2*x, y_hat-y).mean()

In [4]:
print(f'Prediction before training: f(5) = {forward(5)}')

Prediction before training: f(5) = 0.0


In [5]:
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_hat = forward(X)
    
    # loss
    l = loss(y, y_hat)
    
    # gradients
    dw = gradient(X, y, y_hat)
    
    # update weights
    w -= learning_rate * dw
    
    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w}, loss = {l}')

epoch 1: w = 1.2, loss = 30.0
epoch 3: w = 1.8720000267028807, loss = 0.7680001854896545
epoch 5: w = 1.9795200514793394, loss = 0.019660834223031998
epoch 7: w = 1.9967232251167295, loss = 0.000503324146848172
epoch 9: w = 1.9994757366180418, loss = 1.2884394891443662e-05
epoch 11: w = 1.9999160981178281, loss = 3.297340072094812e-07
epoch 13: w = 1.99998655796051, loss = 8.421768171729127e-09
epoch 15: w = 1.9999978733062742, loss = 2.1679014139408537e-10
epoch 17: w = 1.999999668598175, loss = 5.076827847005916e-12
epoch 19: w = 1.9999999547004699, loss = 1.3145040611561853e-13


In [6]:
print(f'Prediction after training: f(5) = {forward(5)}')

Prediction after training: f(5) = 9.99999977350235


# Part 2

In [7]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [8]:
print(f'Prediction before training: f(5) = {forward(5)}')

Prediction before training: f(5) = 0.0


In [9]:
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_hat = forward(X)
    
    # loss
    l = loss(y, y_hat)
    
    # gradients, dl/dw
    l.backward()
    
    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradients
    w.grad.zero_()
    
    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w}, loss = {l}')

epoch 1: w = 0.29999998211860657, loss = 30.0
epoch 11: w = 1.6653136014938354, loss = 1.1627856492996216
epoch 21: w = 1.934108853340149, loss = 0.0450688973069191
epoch 31: w = 1.987027645111084, loss = 0.0017468547448515892
epoch 41: w = 1.9974461793899536, loss = 6.770494655938819e-05
epoch 51: w = 1.9994971752166748, loss = 2.6243997126584873e-06
epoch 61: w = 1.9999010562896729, loss = 1.0175587306093803e-07
epoch 71: w = 1.9999804496765137, loss = 3.9741685498029256e-09
epoch 81: w = 1.999996304512024, loss = 1.4670220593870908e-10
epoch 91: w = 1.9999992847442627, loss = 5.076827847005916e-12


In [10]:
print(f'Prediction after training: f(5) = {forward(5)}')

Prediction after training: f(5) = 9.999998092651367


# Part 3

In [17]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [None]:
print(f'Prediction before training: f(5) = {forward(5)}')

In [18]:
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_hat = forward(X)
    
    # loss
    l = loss(y, y_hat)
    
    # gradients, dl/dw
    l.backward()
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w}, loss = {l}')

epoch 1: w = 0.29999998211860657, loss = 30.0
epoch 11: w = 1.6653136014938354, loss = 1.1627856492996216
epoch 21: w = 1.934108853340149, loss = 0.0450688973069191
epoch 31: w = 1.987027645111084, loss = 0.0017468547448515892
epoch 41: w = 1.9974461793899536, loss = 6.770494655938819e-05
epoch 51: w = 1.9994971752166748, loss = 2.6243997126584873e-06
epoch 61: w = 1.9999010562896729, loss = 1.0175587306093803e-07
epoch 71: w = 1.9999804496765137, loss = 3.9741685498029256e-09
epoch 81: w = 1.999996304512024, loss = 1.4670220593870908e-10
epoch 91: w = 1.9999992847442627, loss = 5.076827847005916e-12


In [16]:
print(f'Prediction after training: f(5) = {forward(5)}')

Prediction after training: f(5) = 9.999998092651367


# Part 4

In [22]:
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape

n_samples, n_features

(4, 1)

In [23]:
input_size = n_features
ouput_size = n_features
test_tensor = torch.tensor([[5]], dtype=torch.float32)

model = nn.Linear(input_size, ouput_size)

In [25]:
print(f'Prediction before training: f(5) = {model(test_tensor).item()}')

Prediction before training: f(5) = 2.141002893447876


In [31]:
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_hat = model(X)
    
    # loss
    l = loss(y, y_hat)
    
    # gradients, dl/dw
    l.backward()
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item()}, loss = {l}')

epoch 1: w = 1.9835059642791748, loss = 0.000395130249671638
epoch 11: w = 1.9839930534362793, loss = 0.0003721289394889027
epoch 21: w = 1.9844659566879272, loss = 0.0003504731575958431
epoch 31: w = 1.9849247932434082, loss = 0.00033007271122187376
epoch 41: w = 1.98537015914917, loss = 0.0003108636010438204
epoch 51: w = 1.9858022928237915, loss = 0.0002927668974734843
epoch 61: w = 1.986221432685852, loss = 0.00027572826365940273
epoch 71: w = 1.986628532409668, loss = 0.0002596823906060308
epoch 81: w = 1.9870237112045288, loss = 0.00024456510436721146
epoch 91: w = 1.9874069690704346, loss = 0.0002303271903656423


In [32]:
print(f'Prediction after training: f(5) = {model(test_tensor).item()}')

Prediction after training: f(5) = 9.974750518798828


# Custom Linear Regression Model

In [34]:
class LinearRegression(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        
        # Define layers
        self.lin = nn.Linear(input_dim, output_dim)
        
    def forward(self, X):
        return self.lin(X)
    
model = LinearRegression(input_size, ouput_size)

In [35]:
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape

n_samples, n_features

(4, 1)

In [36]:
input_size = n_features
ouput_size = n_features
test_tensor = torch.tensor([[5]], dtype=torch.float32)

model = LinearRegression(input_size, ouput_size)

In [37]:
print(f'Prediction before training: f(5) = {model(test_tensor).item()}')

Prediction before training: f(5) = 0.0273057222366333


In [38]:
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_hat = model(X)
    
    # loss
    l = loss(y, y_hat)
    
    # gradients, dl/dw
    l.backward()
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item()}, loss = {l}')

epoch 1: w = 0.3071673512458801, loss = 29.882219314575195
epoch 11: w = 1.563792109489441, loss = 0.8276547193527222
epoch 21: w = 1.7707607746124268, loss = 0.07276983559131622
epoch 31: w = 1.8087502717971802, loss = 0.050250012427568436
epoch 41: w = 1.8194210529327393, loss = 0.04685216769576073
epoch 51: w = 1.8255629539489746, loss = 0.044112883508205414
epoch 61: w = 1.8308454751968384, loss = 0.04154498502612114
epoch 71: w = 1.8358629941940308, loss = 0.03912683576345444
epoch 81: w = 1.8407148122787476, loss = 0.03684947267174721
epoch 91: w = 1.8454203605651855, loss = 0.03470461815595627


In [39]:
print(f'Prediction after training: f(5) = {model(test_tensor).item()}')

Prediction after training: f(5) = 9.690062522888184
