In [66]:
import torch
import math

In [67]:
dtype = torch.float
device = torch.device("cpu")


In [68]:
#Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype) #The function torch.linspace() returns a one-dimensional tensor of steps equally spaced points between start and end.
y = torch.sin(x)

In [69]:
#Randomaly intialize weights
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)


In [70]:
learning_rate = 1e-6

In [71]:
for t in range(2000):
    
    #forward pass :compute predicted y
    y_pred = a + (b*x) + (c*x**2) + (d*x**3)
    
    #compute and print loss
    loss = (y_pred -y).pow(2).sum()
    
    if t%100 == 99:
        print(t, loss.item())
    
    #use autograd to compute the backward pass. This clall will compute the gradient of loss with respect
    #to all Tensors with requires_grad=True. After this call a.grad, b.grad, c.grad and d.grad will be 
    # Tensor holding the gradient of the loss with respecct to a, b, c, d respectively.
    loss.backward()
   
    
    #Manually update weights using gradient descent. Wrap in torch.no_grad()
    #
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
        #manually zero the gradients after updating the weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
    
print(f"\nResult: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3")
    

99 753.1223754882812
199 504.23077392578125
299 338.703369140625
399 228.57781982421875
499 155.28262329101562
599 106.4803466796875
699 73.97210693359375
799 52.307899475097656
899 37.863548278808594
999 28.227968215942383
1099 21.796829223632812
1199 17.502073287963867
1299 14.63230037689209
1399 12.713552474975586
1499 11.429835319519043
1599 10.57037353515625
1699 9.994599342346191
1799 9.60855484008789
1899 9.349535942077637
1999 9.175618171691895

Result: y = -0.010473142378032207 + 0.8410497903823853 x + 0.0018067918717861176 x^2 + -0.09109847247600555 x^3


### Custom Autograd Function

In [72]:
class LegendrePolynomial3(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing torch.autograd.Function 
    and implementing the forward and backward passes which operate on Tensors
    """
    
    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return a Tensor
        containing the output. xtx is a context object that can be used to stash information
        for backward computation. You can cashe arbitrary objects for use in the backward 
        pass using the ctx.save_for_backward method.
        """
        
        ctx.save_for_backward(input)
        return 0.5 * ( 5 * input ** 3 - 3*input)
    
    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss with
        respect to the output, and we need to compute the gradient of the loss with respect
        to the input
        """
        input, = ctx.saved_tensors
        return grad_output * 0.5 * (15 * input ** 2 - 3)
        

In [73]:
for t in range(2000):
    
    
    
    #To apply our function, we use function.appy method. we alis this as "P3"
    P3 = LegendrePolynomial3.apply
    
    #forward pass :compute predicted y
    y_pred = a + b * P3(c+d*x)
    
    #compute and print loss
    loss = (y_pred -y).pow(2).sum()
    
    if t%100 == 99:
        print(t, loss.item())
    
    #use autograd to compute the backward pass. This clall will compute the gradient of loss with respect
    #to all Tensors with requires_grad=True. After this call a.grad, b.grad, c.grad and d.grad will be 
    # Tensor holding the gradient of the loss with respecct to a, b, c, d respectively.
    loss.backward()
   
    
    #Manually update weights using gradient descent. Wrap in torch.no_grad()
    #
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
        #manually zero the gradients after updating the weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
    
print(f"\nResult: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3")

99 364.7557373046875
199 336.39178466796875
299 311.39404296875
399 288.35430908203125
499 267.107421875
599 247.50640869140625
699 229.41806030273438
799 212.721435546875
899 197.30628967285156
999 183.07151794433594
1099 169.92442321777344
1199 157.78001403808594
1299 146.5602264404297
1399 136.19332885742188
1499 126.61357116699219
1599 117.75997924804688
1699 109.57664489746094
1799 102.01229858398438
1899 95.01947021484375
1999 88.55453491210938

Result: y = -3.423275984459906e-06 + 1.5930246114730835 x + 4.4686668587701206e-08 x^2 + -0.2502686679363251 x^3


## In this example we se the nn package to implement our polynomial model network:

In [101]:
import torch 
import math

#Create Tensors to hold input and outputs
x = torch.linspace(-math.pi, math.pi, 3000)
y = torch.sin(x)


# For this example, the output y is a linear function of (x, x^2, x^3), so we can 
# consider it a linear layer neural network. Let's prepare the tensor (x, x^2, x^3).
p = torch.tensor([1,2,3])
xx = x.unsqueeze(-1).pow(p)

# IN the above code, x.unqueeze(-1) has shape (2000, 1), and p has shape (3,), for this 
#case, broadcasting semantics will apply to obtain a tensor of shape (2000, 3)

# Use the nn package to define our model as a sequence layers. nn.Sequential is a module which contain
# other modules, and applies them in sequence to produce its output. The linear module computes from 
# input using a linear function, and holds internal Tensors for its weight and bias. The Flatten
# layer flattens the output of the linear layer to a 1D tensor, to match the shape of 'y'. 

model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

#The nn Package also contains definitions of popular loss functions: in this case we will use 
# Mean Squared Error (MSE) as our loss function
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6


In [102]:
for t in range(3000):
    
    #forward pass :compute predicted y
    y_pred = model(xx)
    
    #compute and print loss
    loss = loss_fn(y_pred, y)
    
    if t%100 == 99:
        print(t, loss.item())
        
        
    #Zero the gradients before running the backward passs
    model.zero_grad()
    
    
    #backward pass
    loss.backward()
    
    
    #update the weights using gradient descent. Each parameter is a Tensor, so we can access its gradients like we did it before
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
            

#first layer of the model
linear_layer = model[0]
    
 
#for linear layer it's parameters are storead as weight and bias    
print(f"\nResult: y = {linear_layer.bias.item()} + {linear_layer.weight[:,0].item()} x + {linear_layer.weight[:,1].item()} x^2 + {linear_layer.weight[:,2].item()} x^3")

99 436.988525390625
199 241.79603576660156
299 136.55018615722656
399 79.78561401367188
499 49.15975570678711
599 32.630332946777344
699 23.705530166625977
799 18.884628295898438
899 16.279359817504883
999 14.870682716369629
1099 14.108551025390625
1199 13.695989608764648
1299 13.472505569458008
1399 13.35135269165039
1499 13.285618782043457
1599 13.249917030334473
1699 13.230517387390137
1799 13.219962120056152
1899 13.214214324951172
1999 13.211074829101562
2099 13.20936393737793
2199 13.208427429199219
2299 13.207916259765625
2399 13.207633018493652
2499 13.207480430603027
2599 13.20739459991455
2699 13.20734691619873
2799 13.20732307434082
2899 13.207306861877441
2999 13.207298278808594

Result: y = -5.291602064971812e-05 + 0.8567584156990051 x + 9.130395483225584e-06 x^2 + -0.09334048628807068 x^3


### In this example we will use the nn package to define our model, but we will optimize the model using the RMSprop algorithm provided by the `optim` package

In [108]:
import torch 
import math

#Create Tensors to hold input and outputs
x = torch.linspace(-math.pi, math.pi, 3000)
y = torch.sin(x)


# For this example, the output y is a linear function of (x, x^2, x^3), so we can 
# consider it a linear layer neural network. Let's prepare the tensor (x, x^2, x^3).
p = torch.tensor([1,2,3])
xx = x.unsqueeze(-1).pow(p)

# IN the above code, x.unqueeze(-1) has shape (2000, 1), and p has shape (3,), for this 
#case, broadcasting semantics will apply to obtain a tensor of shape (2000, 3)

# Use the nn package to define our model as a sequence layers. nn.Sequential is a module which contain
# other modules, and applies them in sequence to produce its output. The linear module computes from 
# input using a linear function, and holds internal Tensors for its weight and bias. The Flatten
# layer flattens the output of the linear layer to a 1D tensor, to match the shape of 'y'. 

model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

#The nn Package also contains definitions of popular loss functions: in this case we will use 
# Mean Squared Error (MSE) as our loss function
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

In [109]:
for t in range(3000):
    
    #forward pass :compute predicted y
    y_pred = model(xx)
    
    #compute and print loss
    loss = loss_fn(y_pred, y)
    
    if t%100 == 99:
        print(t, loss.item())
        
        
    #Zero the gradients before running the backward passs
    optimizer.zero_grad()
    
    
    #backward pass
    loss.backward()
    
    
    #optimizers
    optimizer.step()
       
            

#first layer of the model
linear_layer = model[0]
    
 
#for linear layer it's parameters are storead as weight and bias    
print(f"\nResult: y = {linear_layer.bias.item()} + {linear_layer.weight[:,0].item()} x + {linear_layer.weight[:,1].item()} x^2 + {linear_layer.weight[:,2].item()} x^3")

99 27459.169921875
199 11208.5712890625
299 4106.93603515625
399 1621.7874755859375
499 1079.236328125
599 945.0682373046875
699 810.76953125
799 657.9105224609375
899 505.5042724609375
999 368.8593444824219
1099 255.0446319580078
1199 165.7698211669922
1299 99.91158294677734
1399 55.59292984008789
1499 29.57561492919922
1599 17.510820388793945
1699 13.816474914550781
1799 13.349454879760742
1899 13.300216674804688
1999 13.297232627868652
2099 13.352880477905273
2199 13.393037796020508
2299 13.36282730102539
2399 13.352320671081543
2499 13.362598419189453
2599 13.366425514221191
2699 13.362287521362305
2799 13.361160278320312
2899 13.362712860107422
2999 13.363043785095215

Result: y = -0.0005000153323635459 + 0.8573224544525146 x + -0.0005000244709663093 x^2 + -0.09284860640764236 x^3
