In [1]:
import torch
import math

In [2]:
dtype = torch.float
device = torch.device('cpu')

In [3]:
#random input and o/p
x = torch.linspace(-math.pi,math.pi,2000)
y = torch.sin(x)

In [4]:
#randomly init weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

In [5]:
learning_rate = 1e-6
for t in range(2000):
    #Forward pass
    y_pred = a + b*x + c*x**2 + d*x**3
    
    #compute loss 
    loss = (y_pred-y).pow(2).sum().item()
    if t%100 == 99:
        print(t,loss)
    
    #backpropagate to compute gradient of a,b,c,d wrt loss
    grad_y_pred = 2.0*(y_pred-y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred*x).sum()
    grad_c = (grad_y_pred*x**2).sum()
    grad_d = (grad_y_pred*x**3).sum()
    
    #update weight using gradient descent
    a -= learning_rate*grad_a
    b -= learning_rate*grad_b
    c -= learning_rate*grad_c
    d -= learning_rate*grad_d
    
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 5296.33056640625
199 3559.50439453125
299 2395.149169921875
399 1613.981201171875
499 1089.4815673828125
599 737.028564453125
699 499.98675537109375
799 340.4244689941406
899 232.9198760986328
999 160.4210205078125
1099 111.48267364501953
1199 78.41535186767578
1299 56.04933547973633
1399 40.9061279296875
1499 30.64220428466797
1599 23.677936553955078
1699 18.947446823120117
1799 15.730602264404297
1899 13.540682792663574
1999 12.048158645629883
Result: y = -0.04287254810333252 + 0.8179735541343689 x + 0.007396227680146694 x^2 + -0.08781607449054718 x^3


# Aurograd

In [9]:
#random input and o/p
x = torch.linspace(-math.pi,math.pi,2000,device=device,dtype=dtype)
y = torch.sin(x)

In [10]:
#randomly init weights
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

In [13]:
learning_rate = 1e-6
for t in range(2000):
    #Forward pass
    y_pred = a + b*x + c*x**2 + d*x**3
    
    #compute loss 
    loss = (y_pred-y).pow(2).sum()
    if t%100 == 99:
        print(t,loss.item())
    
    loss.backward()
    
    with torch.no_grad():
        a -= learning_rate*grad_a
        b -= learning_rate*grad_b
        c -= learning_rate*grad_c
        d -= learning_rate*grad_d
        
        #Manually zero the grad after updating the weight
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 713601.75
199 713607.9375
299 713614.25
399 713620.9375
499 713627.8125
599 713634.9375
699 713642.25
799 713649.75
899 713657.5625
999 713665.5
1099 713673.6875
1199 713682.125
1299 713690.75
1399 713699.625
1499 713708.75
1599 713718.125
1699 713727.6875
1799 713737.5
1899 713747.625
1999 713757.875
Result: y = 1.0860239267349243 + 0.40834811329841614 x + 0.8347156643867493 x^2 + -1.5916025638580322 x^3


# NN Module

In [14]:
# tensor (x, x^2, x^3).
p = torch.tensor([1,2,3])
xx = x.unsqueeze(-1).pow(p)

In [18]:
model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

In [19]:
loss_fn = torch.nn.MSELoss(reduction='sum')


In [20]:
learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)
    
    loss = loss_fn(y_pred,y)
    if t%100==99:
        print(t,loss.item())
    
    model.zero_grad()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate*param.grad

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
        

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

# Optim

In [21]:
model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

In [22]:
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3

optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)



In [23]:
for t in range(2000):
    y_pred = model(xx)
    
    loss = loss_fn(y_pred,y)
    if t%100==99:
        print(t,loss.item())
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    

99 29957.072265625
199 14522.5615234375
299 6435.462890625
399 2615.17919921875
499 1286.90771484375
599 989.86865234375
699 876.6983642578125
799 751.5184936523438
899 616.2755126953125
999 486.0467834472656
1099 369.83026123046875
1199 270.88580322265625
1299 189.72262573242188
1399 125.6277847290039
1499 77.58940887451172
1599 44.237274169921875
1699 23.742752075195312
1799 13.365751266479492
1899 9.638579368591309
1999 8.912074089050293


In [24]:
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
        

Result: y = -0.0004854503786191344 + 0.8494857549667358 x + -0.0004854584112763405 x^2 + -0.09260600805282593 x^3
