In [None]:
from torch import nn, optim
from torch.nn import functional as F
from torch import Tensor
from random import random, normalvariate
# import torch
import plotly.express as px

In [37]:
lin = nn.Linear(1,1, bias=True)
nn.init.ones_(lin.weight)
nn.init.zeros_(lin.bias)
lin.weight, lin.bias

(Parameter containing:
 tensor([[1.]], requires_grad=True),
 Parameter containing:
 tensor([0.], requires_grad=True))

In [32]:
xs = Tensor([[random()] for _ in range(100)])
ys = Tensor([[0.8 * x.item() - 1.2 + normalvariate(0, 0.1)] for x in xs])
xs.shape, ys.shape 

(torch.Size([100, 1]), torch.Size([100, 1]))

In [33]:
px.scatter(x=xs.squeeze(), y=ys.squeeze(), template='plotly_dark').update_traces(marker=dict(size=5)).show()

In [38]:
preds = lin(xs)
F.mse_loss(preds, ys)

tensor(1.6744, grad_fn=<MseLossBackward0>)

In [14]:
lin.weight.grad, lin.bias.grad

(None, None)

In [39]:
n_steps = 200

opt = optim.Adam(lin.parameters(), lr=0.1)

lossi = []
for i in range (1, n_steps + 1):
    # forward pass
    preds = lin(xs)
    loss = F.mse_loss(preds, ys)

    # backpropagation
    opt.zero_grad(set_to_none=True)
    loss.backward()
    opt.step()
    lossi.append(loss.item())
    
    if i == 1 or i % 10 == 0:
        weight = lin.weight.item()
        bias = lin.bias.item()
        print(f"Step {i}: loss = {loss.item():.4f}, weight = {weight:.4f}, bias = {bias:.4f}")

Step 1: loss = 1.6744, weight = 0.9000, bias = -0.1000
Step 10: loss = 0.0419, weight = 0.1182, bias = -0.9016
Step 20: loss = 0.1605, weight = 0.0441, bias = -1.1256
Step 30: loss = 0.0248, weight = 0.4461, bias = -0.9121
Step 40: loss = 0.0273, weight = 0.5561, bias = -0.9694
Step 50: loss = 0.0146, weight = 0.5377, bias = -1.1248
Step 60: loss = 0.0086, weight = 0.6571, bias = -1.1168
Step 70: loss = 0.0084, weight = 0.7202, bias = -1.1303
Step 80: loss = 0.0077, weight = 0.7247, bias = -1.1719
Step 90: loss = 0.0075, weight = 0.7550, bias = -1.1675
Step 100: loss = 0.0075, weight = 0.7601, bias = -1.1730
Step 110: loss = 0.0075, weight = 0.7583, bias = -1.1768
Step 120: loss = 0.0075, weight = 0.7607, bias = -1.1729
Step 130: loss = 0.0074, weight = 0.7568, bias = -1.1740
Step 140: loss = 0.0074, weight = 0.7563, bias = -1.1724
Step 150: loss = 0.0074, weight = 0.7552, bias = -1.1721
Step 160: loss = 0.0074, weight = 0.7548, bias = -1.1720
Step 170: loss = 0.0074, weight = 0.7549, 

In [40]:
# Plot training loss
fig = px.line(y=lossi, labels={'x': 'Training Step', 'y': 'MSE Loss'}, title="Training Loss over Time")
fig.show()