# Introduction to PyTorch

Firsts tests to get used with `torch` library.

https://docs.pytorch.org/tutorials/beginner/pytorch_with_examples.html

In [2]:
import torch
import math
import numpy as np

## Warm-up with Numpy

In [3]:
x = np.linspace(-math.pi , math.pi , 2000)
y = np.sin(x)

a,b,c,d = np.random.randn(4)

learning_rate = 1e-6

np.random.seed(0)

for t in range(2000):
    y_pred = a + b*x + c*(x**2) + d*(x**3)

    loss = np.square(y-y_pred).sum()

    if t%100 == 99:
        print(f"{t+1:4} : Loss = {loss}")

    grad_y_pred = 2.0 * (y_pred-y)
    
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()


    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f" y = {a:2.3} + {b:2.3} * x + {c:2.3} * x**2 + {d:2.3} * x**3")

 100 : Loss = 1008.9237898035005
 200 : Loss = 671.5664533444124
 300 : Loss = 448.0717400673915
 400 : Loss = 299.990105563784
 500 : Loss = 201.86183767919056
 600 : Loss = 136.82642126022023
 700 : Loss = 93.71695144579269
 800 : Loss = 65.13665343363905
 900 : Loss = 46.18545199203122
1000 : Loss = 33.616833464055745
1100 : Loss = 25.279555057408373
1200 : Loss = 19.74793327494139
1300 : Loss = 16.07698871972965
1400 : Loss = 13.640264278036721
1500 : Loss = 12.022390398525037
1600 : Loss = 10.947908021556774
1700 : Loss = 10.234106600802937
1800 : Loss = 9.759770212612985
1900 : Loss = 9.444462778551056
2000 : Loss = 9.234796351462531
 y = -0.00695 + 0.838 * x + 0.0012 * x**2 + -0.0907 * x**3


## Tensors

In [4]:
torch.manual_seed(0)

dtype  = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi , math.pi , 2000 , device=device , dtype=dtype)
y = torch.sin(x)

a,b,c,d = torch.randn(4 , device=device , dtype=dtype)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b*x + c*(x**2) + d*(x**3)

    loss = (y_pred - y).pow(2).sum().item()

    if t%100 == 99:
        print(f"{t+1:4} : Loss {loss:.2}")

    grad_y_pred = 2.0 * (y_pred-y)

    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()


    a -= grad_a*learning_rate
    b -= grad_b*learning_rate
    c -= grad_c*learning_rate
    d -= grad_d*learning_rate




print(f"y = {a.item():2.3} + {b.item():2.3} * x + {c.item():2.3} * x^2 + {d.item():2.3} * x^3")

 100 : Loss 3.2e+03
 200 : Loss 2.2e+03
 300 : Loss 1.6e+03
 400 : Loss 1.1e+03
 500 : Loss 7.6e+02
 600 : Loss 5.3e+02
 700 : Loss 3.7e+02
 800 : Loss 2.6e+02
 900 : Loss 1.8e+02
1000 : Loss 1.3e+02
1100 : Loss 9.4e+01
1200 : Loss 6.9e+01
1300 : Loss 5.1e+01
1400 : Loss 3.8e+01
1500 : Loss 2.9e+01
1600 : Loss 2.3e+01
1700 : Loss 1.9e+01
1800 : Loss 1.6e+01
1900 : Loss 1.4e+01
2000 : Loss 1.2e+01
y = 0.0586 + 0.837 * x + -0.0101 * x^2 + -0.0906 * x^3


## Autograd

In [5]:
torch.manual_seed(0)

dtype  = torch.float
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"

print(f"Using {device} device")

torch.device(device)

x = torch.linspace(-math.pi , math.pi , 2000 , dtype=dtype)
y = torch.sin(x)

a = torch.randn(() , dtype=dtype , requires_grad=True)
b = torch.randn(() , dtype=dtype , requires_grad=True)
c = torch.randn(() , dtype=dtype , requires_grad=True)
d = torch.randn(() , dtype=dtype , requires_grad=True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b*x + c*(x**2) + d*(x**3)

    loss = (y_pred - y).pow(2).sum()

    if t%100 == 99:
        print(f"{t+1:4} : Loss {loss.item():.2}")

    loss.backward()

    with torch.no_grad():
        a -= learning_rate*a.grad
        b -= learning_rate*b.grad
        c -= learning_rate*c.grad
        d -= learning_rate*d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None


print(f"y = {a.item():2.3} + {b.item():2.3} * x + {c.item():2.3} * x^2 + {d.item():2.3} * x^3")

Using cpu device
 100 : Loss 3.2e+03
 200 : Loss 2.2e+03
 300 : Loss 1.6e+03
 400 : Loss 1.1e+03
 500 : Loss 7.6e+02
 600 : Loss 5.3e+02
 700 : Loss 3.7e+02
 800 : Loss 2.6e+02
 900 : Loss 1.8e+02
1000 : Loss 1.3e+02
1100 : Loss 9.4e+01
1200 : Loss 6.9e+01
1300 : Loss 5.1e+01
1400 : Loss 3.8e+01
1500 : Loss 2.9e+01
1600 : Loss 2.3e+01
1700 : Loss 1.9e+01
1800 : Loss 1.6e+01
1900 : Loss 1.4e+01
2000 : Loss 1.2e+01
y = 0.0586 + 0.837 * x + -0.0101 * x^2 + -0.0906 * x^3


### Custom Autograd

In [6]:
torch.manual_seed(0)

class LegandrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx , input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input**3 - 3 * input)
    
    @staticmethod
    def backward(ctx , grad_outputs):
        input, = ctx.saved_tensors
        return grad_outputs * 1.5 * (5 * input**2 - 1)
    
dtype  = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi , math.pi , 2000 , device=device , dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0 , device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0 , device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3 , device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6

for t in range(2000):
    P3 = LegandrePolynomial3.apply

    y_pred = a + b * P3(c + d*x)

    loss = (y_pred - y).pow(2).sum()

    if t%100 == 99:
        print(f"{t+1:4} : Loss {loss.item():.2}")

    loss.backward()

    with torch.no_grad():
        a -= learning_rate*a.grad
        b -= learning_rate*b.grad
        c -= learning_rate*c.grad
        d -= learning_rate*d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None


print(f"y = {a.item():2.3} + {b.item():2.3} * P3( {c.item():2.3} + {d.item():2.3} * x )")

 100 : Loss 2.1e+02
 200 : Loss 1.4e+02
 300 : Loss 1e+02
 400 : Loss 7.1e+01
 500 : Loss 5.1e+01
 600 : Loss 3.7e+01
 700 : Loss 2.8e+01
 800 : Loss 2.2e+01
 900 : Loss 1.8e+01
1000 : Loss 1.5e+01
1100 : Loss 1.3e+01
1200 : Loss 1.2e+01
1300 : Loss 1.1e+01
1400 : Loss 1e+01
1500 : Loss 9.7
1600 : Loss 9.4
1700 : Loss 9.2
1800 : Loss 9.1
1900 : Loss 9.0
2000 : Loss 8.9
y = -6.71e-10 + -2.21 * P3( -3.39e-10 + 0.255 * x )


## `nn` module

In [7]:
torch.manual_seed(0)

x = torch.linspace(-math.pi , math.pi , 2000)
y = torch.sin(x)

p  = torch.tensor([1 , 2 , 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3 , 1) ,
    torch.nn.Flatten(0 , 1)
)

loss_fn = torch.nn.MSELoss(reduction="sum")

learning_rate = 1e-6

for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred , y)

    if t%100 == 99:
        print(f"{t+1:4} : Loss {loss.item():.2}")

    model.zero_grad()

    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

 100 : Loss 5.8e+02
 200 : Loss 3.9e+02
 300 : Loss 2.7e+02
 400 : Loss 1.8e+02
 500 : Loss 1.3e+02
 600 : Loss 8.8e+01
 700 : Loss 6.3e+01
 800 : Loss 4.5e+01
 900 : Loss 3.3e+01
1000 : Loss 2.5e+01
1100 : Loss 2e+01
1200 : Loss 1.6e+01
1300 : Loss 1.4e+01
1400 : Loss 1.2e+01
1500 : Loss 1.1e+01
1600 : Loss 1e+01
1700 : Loss 9.9
1800 : Loss 9.6
1900 : Loss 9.3
2000 : Loss 9.2
Result: y = -0.01462855190038681 + 0.8441022634506226 x + 0.0025236690416932106 x^2 + -0.09153266251087189 x^3


### optim

In [15]:
torch.manual_seed(0)

x = torch.linspace(-math.pi , math.pi , 2000)
y = torch.sin(x)

p  = torch.tensor([1 , 2 , 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3 , 1) ,
    torch.nn.Flatten(0 , 1)
)

loss_fn = torch.nn.MSELoss(reduction = "sum")

learning_rate = 1e-3

optimizer = torch.optim.RMSprop(model.parameters() , lr=learning_rate)

for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred , y)

    if t%100 == 99:
        print(f"{t+1:4} : Loss = {loss.item():.2}")

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

 100 : Loss = 2.5e+04
 200 : Loss = 1.1e+04
 300 : Loss = 4.1e+03
 400 : Loss = 1.2e+03
 500 : Loss = 3.3e+02
 600 : Loss = 1.9e+02
 700 : Loss = 1.6e+02
 800 : Loss = 1.4e+02
 900 : Loss = 1.1e+02
1000 : Loss = 7.6e+01
1100 : Loss = 4.8e+01
1200 : Loss = 2.7e+01
1300 : Loss = 1.5e+01
1400 : Loss = 1e+01
1500 : Loss = 8.9
1600 : Loss = 8.8
1700 : Loss = 8.8
1800 : Loss = 8.8
1900 : Loss = 8.9
2000 : Loss = 8.9
Result: y = 3.1972486880249562e-09 + 0.8573270440101624 x + -1.537604887857924e-08 x^2 + -0.09273074567317963 x^3


### Custom `nn` Modules

In [18]:
torch.manual_seed(0)

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self , x):
        return self.a + self.b * x + self.c * (x**2) + self.d * (x**3)
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
    
x = torch.linspace(-math.pi , math.pi , 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters() , lr = 1e-6)

for t in range(2000):
    y_pred = model(x)
    loss   = criterion(y_pred , y)

    if t%100 == 99:
        print(f' {t:4} : Loss = {loss.item():.2}')

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

print(model.string())


   99 : Loss = 3.2e+03
  199 : Loss = 2.2e+03
  299 : Loss = 1.6e+03
  399 : Loss = 1.1e+03
  499 : Loss = 7.6e+02
  599 : Loss = 5.3e+02
  699 : Loss = 3.7e+02
  799 : Loss = 2.6e+02
  899 : Loss = 1.8e+02
  999 : Loss = 1.3e+02
 1099 : Loss = 9.4e+01
 1199 : Loss = 6.9e+01
 1299 : Loss = 5.1e+01
 1399 : Loss = 3.8e+01
 1499 : Loss = 2.9e+01
 1599 : Loss = 2.3e+01
 1699 : Loss = 1.9e+01
 1799 : Loss = 1.6e+01
 1899 : Loss = 1.4e+01
 1999 : Loss = 1.2e+01
y = 0.058623358607292175 + 0.8372474908828735 x + -0.010113503783941269 x^2 + -0.09055762737989426 x^3


### Control Flow + Weight Sharing

In [22]:
import random

torch.manual_seed(0)
random.seed(0)

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self , x):
        y = self.a + self.b * x + self.c * (x**2) + self.d * (x**3)

        for exp in range(4 , random.randint(4,6)):
            y = y + self.e * (x**exp)
        return y
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
    
x = torch.linspace(-math.pi , math.pi , 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters() , lr = 1e-8 , momentum = 0.9)

for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred , y)
    if t%2000 == 1999:
        print(f' {t:4} : Loss = {loss.item():.2}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(model.string())

 1999 : Loss = 2.2e+03
 3999 : Loss = 1.1e+03
 5999 : Loss = 5.2e+02
 7999 : Loss = 2.6e+02
 9999 : Loss = 1.3e+02
 11999 : Loss = 6.8e+01
 13999 : Loss = 4.5e+01
 15999 : Loss = 2.3e+01
 17999 : Loss = 1.6e+01
 19999 : Loss = 1.2e+01
 21999 : Loss = 1.1e+01
 23999 : Loss = 9.8
 25999 : Loss = 9.3
 27999 : Loss = 9.1
 29999 : Loss = 8.9
y = 0.011253771372139454 + 0.8554857969284058 x + -0.0025563975796103477 x^2 + -0.09336294233798981 x^3 + 0.00011736536544049159 x^4 ? + 0.00011736536544049159 x^5 ?
