# 예제로 배우는 파이토치

## 텐서

### 준비운동: numpy

In [2]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b + x + c * x ** 2 + d * x ** 3

    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f"Result: y = {a} + {b}x + {c}x^2 + {d}X^3")

99 2396.0763544671104
199 1770.8972180132923
299 1319.1483079716904
399 991.5956926215016
499 753.1808115854881
599 578.8958957329253
699 450.87716704873947
799 356.3428286926286
899 286.12966592262643
999 233.65392497705454
1099 194.17299631855855
1199 164.26043632053816
1299 141.43235664990522
1399 123.8812643011216
1499 110.28622105111519
1599 99.67724748244078
1699 91.33831260659039
1799 84.73779631971153
1899 79.4785340845968
1999 75.2618376348419
Result: y = 1.603985272729849 + -1.828428970900139x + 0.03807204379784221x^2 + -0.1136313537541581X^3


## 파이토치(Pytorch): 텐서(Tensor)

In [3]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b*x + c*x**2 + d*x**3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1041.2509765625
199 712.8997802734375
299 489.52764892578125
399 337.3980712890625
499 233.67098999023438
599 162.8655242919922
699 114.47737121582031
799 81.37104034423828
899 58.694244384765625
999 43.143497467041016
1099 32.46736145019531
1199 25.12946891784668
1299 20.080347061157227
1399 16.602294921875
1499 14.203780174255371
1599 12.547990798950195
1699 11.403715133666992
1799 10.612112045288086
1899 10.063936233520508
1999 9.68395709991455
Result: y = -0.02690296061336994 + 0.8422964215278625 x + 0.004641206935048103 x^2 + -0.09127578884363174 x^3


## Autograd
### PyTorch: 텐서와 autograd

In [5]:
import torch
import math

dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), dtype=dtype, requires_grad=True)
b = torch.randn((), dtype=dtype, requires_grad=True)
c = torch.randn((), dtype=dtype, requires_grad=True)
d = torch.randn((), dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b*x + c*x**2 + d*x**3

    loss = (y_pred -y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())
    
    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f"Result: y = {a.item()} + {b.item}x + {c.item}x^2 + {d.item}x^3")

99 1137.4031982421875
199 785.8331298828125
299 544.3726806640625
399 378.34771728515625
499 264.06298828125
599 185.30755615234375
699 130.97682189941406
799 93.45578002929688
899 67.51637268066406
999 49.565223693847656
1099 37.129913330078125
1199 28.507122039794922
1299 22.522258758544922
1399 18.364484786987305
1499 15.473392486572266
1599 13.461374282836914
1699 12.059953689575195
1799 11.083028793334961
1899 10.401504516601562
1999 9.925701141357422
Result: y = 0.03225264698266983 + <built-in method item of Tensor object at 0x7fa843aec590>x + <built-in method item of Tensor object at 0x7fa7920e6ab0>x^2 + <built-in method item of Tensor object at 0x7fa7920e6c90>x^3


### Pytorch: 새 autograd Function 정의하기

In [9]:
import torch
import math

class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)
    
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)
    
dtype = torch.float
device = torch.device("cpu")

# 입력값과 출력값을 갖는 텐서들을 생성
x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype = dtype)
y = torch.sin(x)

# 가중치를 갖는 임의의 텐서를 생성, 3차 다항식이므로 4개의 가중치가 필요

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = LegendrePolynomial3.apply

    y_pred = a + b * P3(c + d * x)
    
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.978511810302734
599 37.403133392333984
699 28.206867218017578
799 21.97318458557129
899 17.7457275390625
999 14.877889633178711
1099 12.93176555633545
1199 11.610918045043945
1299 10.71425724029541
1399 10.10548210144043
1499 9.692105293273926
1599 9.411375999450684
1699 9.220745086669922
1799 9.091285705566406
1899 9.003361701965332
1999 8.943641662597656
Result: y = -6.71270206087371e-10 + -2.208526849746704 * P3(-3.392665037793563e-10 + 0.2554861009120941 x)


## nn모듈

In [12]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    model.zero_grad()

    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 1389.775390625
199 922.7545166015625
299 613.709716796875
399 409.19244384765625
499 273.8414306640625
599 184.2598114013672
699 124.96697235107422
799 85.7191162109375
899 59.737815856933594
999 42.53746032714844
1099 31.149349212646484
1199 23.60885238647461
1299 18.615478515625
1399 15.308577537536621
1499 13.118315696716309
1599 11.66740608215332
1699 10.70625114440918
1799 10.069391250610352
1899 9.647388458251953
1999 9.367691040039062
Result: y = 0.005132834892719984 + 0.8344249725341797 x + -0.0008854999905452132 x^2 + -0.09015615284442902 x^3


## PyTorch: optim

In [14]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
model_fn = torch.nn.MSELoss(reduction = 'sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
for t in range(2000):
    # 순전파 단계: 모델에 x를 전달하여 예측값 y를 계산합니다.
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()

    #역전파 단계
    loss.backward()
    optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 13218.296875
199 4496.5830078125
299 1419.328369140625
399 641.0751953125
499 498.46990966796875
599 425.03192138671875
699 350.4445495605469
799 274.3376159667969
899 201.9947509765625
999 138.92855834960938
1099 88.4695816040039
1199 51.748779296875
1299 28.026073455810547
1399 15.205949783325195
1499 10.11483383178711
1599 9.077219009399414
1699 8.843135833740234
1799 8.908886909484863
1899 8.979445457458496
1999 8.905778884887695
Result: y = -3.39617116651425e-08 + 0.8562856912612915 x + -3.366927003867204e-08 x^2 + -0.09381669014692307 x^3


## 사용자 정의 nn 모듈

In [24]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
         return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    y_pred = model(x)
    
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 1103.344970703125
199 735.394287109375
299 491.2627868652344
399 329.2471923828125
499 221.70132446289062
599 150.29440307617188
699 102.86955261230469
799 71.36370849609375
899 50.42681884765625
999 36.5090446472168
1099 27.253990173339844
1199 21.097396850585938
1299 17.000333786010742
1399 14.272804260253906
1499 12.456226348876953
1599 11.245759010314941
1699 10.438852310180664
1799 9.900655746459961
1899 9.54153060913086
1999 9.30174446105957
Result: y = 0.009769970551133156 + 0.8373141288757324 x + -0.0016854822169989347 x^2 + -0.09056710451841354 x^3


## PyTorch: 제어흐름 + 가중치 공유

In [25]:
import random
import torch
import math

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x **3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()}x + {self.c.item()}x^2 + {self.d.item()}x^3 + {self.e.item()}x^4 + {self.e.item()}x^5 ?'
    
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 1054.506591796875
3999 450.39984130859375
5999 214.65936279296875
7999 100.25425720214844
9999 49.076385498046875
11999 26.642013549804688
13999 16.75096893310547
15999 12.294881820678711
17999 10.304195404052734
19999 9.523414611816406
21999 8.960441589355469
23999 8.773473739624023
25999 8.89212417602539
27999 8.629688262939453
29999 8.853524208068848
Result: y = 0.0023367558605968952 + 0.8545345067977905x + -0.0010018933098763227x^2 + -0.09332714974880219x^3 + 0.0001054343010764569x^4 + 0.0001054343010764569x^5 ?
