In [1]:
import numpy as np
import math

In [5]:
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

In [6]:
x, x.shape, y.shape

(array([-3.14159265, -3.13844949, -3.13530633, ...,  3.13530633,
         3.13844949,  3.14159265]),
 (2000,),
 (2000,))

In [7]:
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

In [8]:
learning_rate = 1e-6

In [14]:
for t in range(200):
#     print(t)
    y_pred = a+b*x+c*x**2+d*x**3
    
    loss = np.square(y_pred - y).sum()
    
    if t%100 == 99:
        print(t, loss)
    
    grad_y_pred = 2.0*(y_pred-y)
    
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()
    
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d
    
print(f'Results: y = {a} + {b}x + {c}x^2+{d}x^3')

99 3414.0000637180647
199 2295.35263135234
Results: y = -0.7717203336888667 + -0.431037286382271x + 0.13313460820615472x^2+0.0898448358039112x^3


In [12]:
99 % 100

99

In [16]:
import torch
dtype = torch.float

In [17]:
device = torch.device("cpu")

In [18]:
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

In [20]:
type(x), x.shape, y.shape

(torch.Tensor, torch.Size([2000]), torch.Size([2000]))

In [28]:
a = torch.randn((), device=device, dtype=dtype, requires_grad = True)
b = torch.randn((), device=device, dtype=dtype, requires_grad = True)
c = torch.randn((), device=device, dtype=dtype, requires_grad = True)
d = torch.randn((), device=device, dtype=dtype, requires_grad = True)

In [32]:
learning_rate = 1e-6
for t in range(2000):
    
    y_pred = a+b*x+c*x**2+d*x**3
    
    loss = (y_pred-y).pow(2).sum()
    if t% 100 == 99:
        print(t, loss)
#     grad_y_pred = 2.0*(y_pred-y)
    loss.backward()
#     grad_a = grad_y_pred.sum()
#     grad_b = (grad_y_pred * x).sum()
#     grad_c = (grad_y_pred * x ** 2).sum()
#     grad_d = (grad_y_pred * x ** 3).sum()
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
    
print(f'Result f = {a}+{b}*x+{c}*x^2+{d}*x^3')

99 tensor(63394.6992, grad_fn=<SumBackward0>)
199 tensor(104852.0625, grad_fn=<SumBackward0>)
299 tensor(43428.5859, grad_fn=<SumBackward0>)
399 tensor(86801.5234, grad_fn=<SumBackward0>)
499 tensor(25483.4883, grad_fn=<SumBackward0>)
599 tensor(62461.6602, grad_fn=<SumBackward0>)
699 tensor(15263.6777, grad_fn=<SumBackward0>)
799 tensor(42625.2617, grad_fn=<SumBackward0>)
899 tensor(20206.1074, grad_fn=<SumBackward0>)
999 tensor(31638.0156, grad_fn=<SumBackward0>)
1099 tensor(41114.6641, grad_fn=<SumBackward0>)
1199 tensor(33504.0469, grad_fn=<SumBackward0>)
1299 tensor(69373.7891, grad_fn=<SumBackward0>)
1399 tensor(44823.1641, grad_fn=<SumBackward0>)
1499 tensor(97262.4375, grad_fn=<SumBackward0>)
1599 tensor(56411.7500, grad_fn=<SumBackward0>)
1699 tensor(114052.3984, grad_fn=<SumBackward0>)
1799 tensor(64220.4570, grad_fn=<SumBackward0>)
1899 tensor(111358.0547, grad_fn=<SumBackward0>)
1999 tensor(64031.6680, grad_fn=<SumBackward0>)
Result f = 0.8448341488838196+-0.229703396558761

In [37]:
class LegendrePolynomial3(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, input):
        
        ctx.save_for_backward(input)
        return 0.5*(5*input**3 - 3*input)
    
    @staticmethod
    def backward(ctx, grad_output):
        
        input,  = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input **2 -1)

In [39]:
for t in range(2000):
    
    P3 = LegendrePolynomial3.apply
    
    y_pred = a + b * P3(c+d*x)
    
    loss = (y_pred-y).pow(2).sum()
    
    if t % 100 == 99:
        print(t, loss.item())
        
    loss.backward()
    
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
        
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
        
print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 439.35931396484375
199 437.4896545410156
299 435.80694580078125
399 434.2837219238281
499 432.8973388671875
599 431.6296691894531
699 430.4652404785156
799 429.391845703125
899 428.39825439453125
999 427.4754943847656
1099 426.6161193847656
1199 425.8133544921875
1299 425.06146240234375
1399 424.35565185546875
1499 423.69110107421875
1599 423.064697265625
1699 422.47259521484375
1799 421.9120788574219
1899 421.3804016113281
1999 420.87542724609375
Result: y = -0.618481457233429 + -0.4685993492603302 * P3(-1.022571325302124 + -0.09171546995639801 x)


In [42]:
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)


tensor([[ -3.1416,   9.8696, -31.0063],
        [ -3.1384,   9.8499, -30.9133],
        [ -3.1353,   9.8301, -30.8205],
        ...,
        [  3.1353,   9.8301,  30.8205],
        [  3.1384,   9.8499,  30.9133],
        [  3.1416,   9.8696,  31.0063]])

In [49]:
x.shape, x.unsqueeze(-1).pow(torch.tensor([1, 2, 3]))

(torch.Size([2000]),
 tensor([[ -3.1416,   9.8696, -31.0063],
         [ -3.1384,   9.8499, -30.9133],
         [ -3.1353,   9.8301, -30.8205],
         ...,
         [  3.1353,   9.8301,  30.8205],
         [  3.1384,   9.8499,  30.9133],
         [  3.1416,   9.8696,  31.0063]]))

In [50]:
model = torch.nn.Sequential(torch.nn.Linear(3, 1),
                           torch.nn.Flatten(0, 1)
                           )

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6

for t in range(2000):
    
    y_pred = model(xx)
    
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss)
    model.zero_grad()
    
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate*param.grad

99 tensor(233.2124, grad_fn=<MseLossBackward>)
199 tensor(161.6628, grad_fn=<MseLossBackward>)
299 tensor(113.0458, grad_fn=<MseLossBackward>)
399 tensor(79.9742, grad_fn=<MseLossBackward>)
499 tensor(57.4520, grad_fn=<MseLossBackward>)
599 tensor(42.0965, grad_fn=<MseLossBackward>)
699 tensor(31.6154, grad_fn=<MseLossBackward>)
799 tensor(24.4532, grad_fn=<MseLossBackward>)
899 tensor(19.5532, grad_fn=<MseLossBackward>)
999 tensor(16.1971, grad_fn=<MseLossBackward>)
1099 tensor(13.8959, grad_fn=<MseLossBackward>)
1199 tensor(12.3161, grad_fn=<MseLossBackward>)
1299 tensor(11.2303, grad_fn=<MseLossBackward>)
1399 tensor(10.4833, grad_fn=<MseLossBackward>)
1499 tensor(9.9687, grad_fn=<MseLossBackward>)
1599 tensor(9.6139, grad_fn=<MseLossBackward>)
1699 tensor(9.3690, grad_fn=<MseLossBackward>)
1799 tensor(9.1997, grad_fn=<MseLossBackward>)
1899 tensor(9.0826, grad_fn=<MseLossBackward>)
1999 tensor(9.0015, grad_fn=<MseLossBackward>)


In [53]:
linear_layer = model[0]

In [54]:
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

Result: y = -0.012291478924453259 + 0.8499004244804382 x + 0.0021204836666584015 x^2 + -0.09235739707946777 x^3


In [55]:
model = torch.nn.Sequential(torch.nn.Linear(3, 1),
                           torch.nn.Flatten(0, 1))

loss_fn = torch.nn.MSELoss(reduction='sum')

optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)

In [None]:
for t in range(2000):
    
    y_pred = model(xx)
    
    loss = loss_fn(y_pred,y)
    
    optimizer.zero_grad()
    
    