<a href="https://colab.research.google.com/github/jungeun919/Pytorch_study/blob/main/Pytorch_Tutorial/Learn_Pytorch_with_Examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy로 네트워크 구현

- Numpy는 GPU를 사용하여 수치 계산을 가속화 할 수 없음

In [1]:
import numpy as np
import math


# random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# 가중치 랜덤 초기화
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate= 1e-6
for t in range(2000):
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # 역전파
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 가중치 업데이트
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f"Result: y = {a} + {b}x + {c}x^2 + {d}x^3")

99 58.5792012617506
199 42.25006490902771
299 31.2982962184495
399 23.94721073642355
499 19.009009659247702
599 15.688925600013523
699 13.454812389408442
799 11.950113456264061
899 10.935747551891556
999 10.25128113310081
1099 9.788970856730401
1199 9.476399095041078
1299 9.264850080349525
1399 9.121523434253717
1499 9.02431445960672
1599 8.958312433031477
1699 8.913449593983099
1799 8.882921412235918
1899 8.8621242088965
1999 8.847940075617029
Result: y = -0.004226769240617829 + 0.8604841886350437x + 0.0007291880277503295x^2 + -0.09386284734934805x^3


# Tensor로 네트워크 구현

In [2]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# GPU에서 실행할 경우
# device = torch.device("cuda:0")

# random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# 가중치 랜덤 초기화
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # 역전파
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 가중치 업데이트
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f"Result: y = {a.item()} + {b.item()}x + {c.item()}x^2 + {d.item()}x^3")

99 969.1831665039062
199 681.3494262695312
299 480.0590515136719
399 339.1997985839844
499 240.56826782226562
599 171.46426391601562
699 123.0206298828125
799 89.04234313964844
899 65.19779968261719
999 48.456504821777344
1099 36.69710159301758
1199 28.4334716796875
1299 22.62396240234375
1399 18.538135528564453
1499 15.663555145263672
1599 13.640427589416504
1699 12.216072082519531
1799 11.212965965270996
1899 10.50631332397461
1999 10.008365631103516
Result: y = -0.035617757588624954 + 0.8492801785469055x + 0.0061446549370884895x^2 + -0.0922691747546196x^3


# Autograd

- 네트워크의 순방향: 계산 그래프 정의
- 네트워크의 역방향: 기울기 계산
- autograd를 사용하면 자동미분을 통해 역방향 계산을 자동화 할 수 있음

In [3]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")

# 입출력을 저장할 tensor 생성
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# 가중치 랜덤 생성
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # forward
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()
    
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # 가중치 업데이트 후 gradient 0으로 초기화
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f"Result: y = {a.item()} + {b.item()}x + {c.item()}x^2 + {d.item()}x^3")

99 1687.673828125
199 1120.4041748046875
299 744.8753662109375
399 496.25836181640625
499 331.6490478515625
599 222.6516571044922
699 150.4713134765625
799 102.66726684570312
899 71.00382232666016
999 50.02885055541992
1099 36.13262939453125
1199 26.924983978271484
1299 20.823148727416992
1399 16.77890396118164
1499 14.097983360290527
1599 12.320512771606445
1699 11.141830444335938
1799 10.360054016113281
1899 9.841438293457031
1999 9.497323989868164
Result: y = -0.006986424792557955 + 0.83221435546875x + 0.0012052766978740692x^2 + -0.08984170854091644x^3


# nn Module

In [4]:
import torch
import math


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# input tensor (x, x^2, x^3)
# x.unsqueeze(-1) -> (2000, 1)
# p -> (3,) -> (브로드캐스팅) (2000, 3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
    # forward: x를 모델에 전달하여 예측값 y_pred를 계산
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # gradient 0으로 초기화
    model.zero_grad()

    # backward: loss의 기울기 계산
    loss.backward()

    # 경사하강법으로 가중치 업데이트
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad


linear_layer = model[0]

print(f"Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0]}x +\
      {linear_layer.weight[:, 1].item()}x^2 + {linear_layer.weight[:, 2].item()}x^3")

99 1412.4658203125
199 940.0906982421875
299 626.823486328125
399 419.03179931640625
499 281.1739501953125
599 189.6933135986328
699 128.97349548339844
799 88.66118621826172
899 61.89052200317383
999 44.10751724243164
1099 32.2913703918457
1199 24.437511444091797
1299 19.215492248535156
1399 15.742103576660156
1499 13.430985450744629
1599 11.892620086669922
1699 10.868165016174316
1799 10.18569564819336
1899 9.730833053588867
1999 9.427485466003418
Result: y = -0.010259432718157768 + tensor([0.8347], grad_fn=<SelectBackward>)x +      0.0017699227901175618x^2 + -0.09018833935260773x^3


# 최적화

In [5]:
import torch
import math


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# input tensor (x, x^2, x^3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

# 가중치를 업데이트 할 optimizer: RMSprop
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for t in range(2000):
    # forward
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()

    # backward
    loss.backward()
    optimizer.step()


linear_layer = model[0]

print(f"Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0]}x +\
      {linear_layer.weight[:, 1].item()}x^2 + {linear_layer.weight[:, 2].item()}x^3")

99 12076.09765625
199 4148.8369140625
299 1298.625732421875
399 583.816650390625
499 471.20318603515625
599 410.79815673828125
699 337.67108154296875
799 258.92864990234375
899 185.15484619140625
999 123.33406066894531
1099 76.03056335449219
1199 43.09998321533203
1299 22.95142364501953
1399 12.926902770996094
1499 9.482279777526855
1599 8.889137268066406
1699 8.847436904907227
1799 8.88914966583252
1899 8.98213005065918
1999 8.934618949890137
Result: y = 0.0005021418328396976 + tensor([0.8562], grad_fn=<SelectBackward>)x +      0.0005021427641622722x^2 + -0.09385624527931213x^3


# Custom nn Module

In [6]:
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        생성자에서 4 개의 매개 변수를 인스턴스화하고 멤버변수로 할당한다.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        return self.a + self.b * x + self.c + x ** 2 + self.d * x ** 3

    def string(self):
        """
        PyTorch 모듈에서 사용자 지정 메서드를 정의 할 수도 있다.
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

for t in range(2000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f"Result: {model.string()}")

99 19698.212890625
199 17865.951171875
299 17486.072265625
399 17401.716796875
499 17379.40625
599 17371.3671875
699 17367.396484375
799 17365.04296875
899 17363.5390625
999 17362.556640625
1099 17361.91015625
1199 17361.48046875
1299 17361.19921875
1399 17361.01171875
1499 17360.88671875
1599 17360.8046875
1699 17360.751953125
1799 17360.716796875
1899 17360.693359375
1999 17360.677734375
Result: y = -1.9387927055358887 + 0.8513712882995605 x + -1.354352355003357 x^2 + -0.09256662428379059 x^3


# 제어 흐름 + 가중치 공유

In [7]:
import random
import torch
import math


class DynamicNet(torch.nn.Module):
    def __init__(self):
        """
        생성자에서 5개의 매개변수를 인스턴스화하고 멤버로 할당한다.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x **3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 +\
        {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'


x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)

for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f"Result: {model.string()}")

1999 3686.485595703125
3999 1656.988525390625
5999 744.3120727539062
7999 343.7672119140625
9999 152.99716186523438
11999 75.1351318359375
13999 38.67062759399414
15999 22.256267547607422
17999 14.87530517578125
19999 11.528840065002441
21999 11.587810516357422
23999 9.400721549987793
25999 9.083019256591797
27999 8.967328071594238
29999 8.902935028076172
Result: y = 0.005774965509772301 + 0.8520311117172241 x + -0.001510755275376141 x^2 +        -0.09294140338897705 x^3 + 0.0001134744961746037 x^4 ? + 0.0001134744961746037 x^5 ?
