In [5]:
import torch
import math

In [6]:
# create dummy dataset
x = torch.linspace(-5, 5, 100).reshape(-1, 1)
y = 2 * x + 1

In [7]:
# Initialize Parameters
torch.manual_seed(0)

W1 = torch.randn(1, 10)
b1 = torch.zeros(10)

W2 = torch.randn(10, 1)
b2 = torch.zeros(1)

In [8]:
# Activation Funtion
def relu(z):
    return torch.clamp(z, min=0)

def relu_grad(z):
    return (z > 0).float()

In [9]:
# Forward Pass
def forward(x):
    z1 = x @ W1 + b1
    a1 = relu(z1)
    z2 = a1 @ W2 + b2
    return z1, a1, z2

In [10]:
# Mean Squared Error
def mse_loss(y_pred, y_true):
    return ((y_pred - y_true) ** 2).mean()

In [11]:
# Backward Pass
def backward(x, y, z1, a1, y_pred):
    global W1, b1, W2, b2

    n = x.shape[0]

    dloss_dy = 2 * (y_pred - y) / n

    dW2 = a1.t() @ dloss_dy
    db2 = dloss_dy.sum(0)

    da1 = dloss_dy @ W2.t()
    dz1 = da1 * relu_grad(z1)

    dW1 = x.t() @ dz1
    db1 = dz1.sum(0)

    return dW1, db1, dW2, db2

In [12]:
# Training
lr = 0.01
epochs = 500

for epoch in range(epochs):
    z1, a1, y_pred = forward(x)
    loss = mse_loss(y_pred, y)

    dW1, db1, dW2, db2 = backward(x, y, z1, a1, y_pred)

    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 50 == 0:
        print("Epoch", epoch, "Loss", loss.item())

Epoch 0 Loss 50.44289016723633
Epoch 50 Loss 0.0275622196495533
Epoch 100 Loss 0.009179612621665001
Epoch 150 Loss 0.003178477520123124
Epoch 200 Loss 0.0012737560318782926
Epoch 250 Loss 0.0006423292797990143
Epoch 300 Loss 0.00041676583350636065
Epoch 350 Loss 0.0003278276417404413
Epoch 400 Loss 0.00028294147341512144
Epoch 450 Loss 0.00025406747590750456


In [13]:
# Testing
test_x = torch.tensor([[4.0]])
_, _, pred = forward(test_x)
print("Prediction", pred.item())

Prediction 8.998025894165039
