# Numpy
- [Link to Page](https://tutorials.pytorch.kr/beginner/examples_tensor/two_layer_net_numpy.html)

In [1]:
import numpy as np

In [2]:
# N : 배치 크기
# D_in : 입력의 차원
# H : 은닉층 차원
# D_out : 출력 차원
N, D_in, H, D_out = 64, 1000, 100, 10

In [3]:
# 입출력 데이터 생성
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# 가중치 초기화 ( 무작위 )
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

In [4]:
# lr
learning_rate = 1e-6

for t in range(501):
    # forward pass ( 순전파 )
    # x * w1 --> h
    # h * relu --> h_relu
    # h_relu * w2 --> y_pred
    
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # loss
    loss = np.square(y_pred - y).sum()
    if t % 10 == 0:
        print(t,"{:.4f}".format(loss))
    
    # w1, w2의 변화도 계산, 역전파
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred) # grad_y_pred : upstream gradient, dot --> gradient switch
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy() # relu --> 그대로 전파
    grad_h[ h < 0 ] = 0 # relu --> 0보다 작으면 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weight
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 27356899.0647
10 2200733.4902
20 235291.2504
30 76192.7672
40 29665.8436
50 12776.0463
60 5909.9944
70 2874.6869
80 1447.4187
90 747.9379
100 394.0387
110 210.7956
120 114.3980
130 62.7363
140 34.6977
150 19.3271
160 10.8321
170 6.1032
180 3.4551
190 1.9640
200 1.1206
210 0.6415
220 0.3683
230 0.2121
240 0.1224
250 0.0708
260 0.0411
270 0.0238
280 0.0139
290 0.0081
300 0.0047
310 0.0028
320 0.0016
330 0.0009
340 0.0006
350 0.0003
360 0.0002
370 0.0001
380 0.0001
390 0.0000
400 0.0000
410 0.0000
420 0.0000
430 0.0000
440 0.0000
450 0.0000
460 0.0000
470 0.0000
480 0.0000
490 0.0000
500 0.0000


# Tensor
- [Link to Page](https://tutorials.pytorch.kr/beginner/examples_tensor/two_layer_net_tensor.html)

In [5]:
import torch

In [6]:
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # only using gpu

In [7]:
N, D_in, H, D_out = 64, 1000, 100, 10

In [8]:
# 입출력 데이터 생성
x = torch.randn(N, D_in, device = device, dtype= dtype )
y = torch.randn(N, D_out, device = device, dtype= dtype)

# 가중치 초기화 ( 무작위 )
w1 = torch.randn(D_in, H, device = device, dtype= dtype)
w2 = torch.randn(H, D_out, device = device, dtype= dtype)

In [9]:
# lr 
learning_rate = 1e-6
for t in range(501):
    # forward pass
    h = x.mm(w1) # matmul
    h_relu = h.clamp(min = 0)
    y_pred = h_relu.mm(w2)
    
    # loss
    loss = (y_pred - y).pow(2).sum().item() # .item() --> return value
    if t % 10 == 0:
        print(t,"{:.4f}".format(loss))
    
    # backward pass
    grad_y_pred = 2. * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[ h < 0 ] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # update weight
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
    

0 35550796.0000
10 1010266.6875
20 227799.6250
30 77326.8906
40 32102.8184
50 15031.2969
60 7670.0991
70 4188.3511
80 2403.4258
90 1434.7886
100 883.6292
110 558.9139
120 361.2712
130 237.8179
140 158.9673
150 107.6461
160 73.6961
170 50.9278
180 35.4751
190 24.8799
200 17.5519
210 12.4444
220 8.8620
230 6.3346
240 4.5428
250 3.2673
260 2.3557
270 1.7023
280 1.2325
290 0.8938
300 0.6491
310 0.4721
320 0.3437
330 0.2506
340 0.1829
350 0.1336
360 0.0976
370 0.0714
380 0.0524
390 0.0384
400 0.0282
410 0.0208
420 0.0153
430 0.0114
440 0.0084
450 0.0063
460 0.0048
470 0.0036
480 0.0028
490 0.0021
500 0.0016
