# Numpy
- [Link to Page](https://tutorials.pytorch.kr/beginner/examples_tensor/two_layer_net_numpy.html)

In [1]:
import numpy as np

In [2]:
# N : 배치 크기
# D_in : 입력의 차원
# H : 은닉층 차원
# D_out : 출력 차원
N, D_in, H, D_out = 64, 1000, 100, 10

In [3]:
# 입출력 데이터 생성
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# 가중치 초기화 ( 무작위 )
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

In [4]:
# lr
learning_rate = 1e-6

for t in range(500):
    # forward pass ( 순전파 )
    # x * w1 --> h
    # h * relu --> h_relu
    # h_relu * w2 --> y_pred
    
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # loss
    loss = np.square(y_pred - y).sum()
    if t % 11 == 10:
        print(t,"{:.4f}".format(loss))
    
    # w1, w2의 변화도 계산, 역전파
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred) # grad_y_pred : upstream gradient, dot --> gradient switch
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy() # relu --> 그대로 전파
    grad_h[ h < 0 ] = 0 # relu --> 0보다 작으면 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weight
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

9 2339826.0278
19 264878.1227
29 82255.2141
39 31119.1982
49 13319.5620
59 6207.4764
69 3067.2312
79 1584.8695
89 848.5967
99 467.4283
109 263.4418
119 151.2725
129 88.2027
139 52.0815
149 31.0732
159 18.6983
169 11.3321
179 6.9090
189 4.2335
199 2.6052
209 1.6091
219 0.9970
229 0.6195
239 0.3858
249 0.2408
259 0.1506
269 0.0943
279 0.0592
289 0.0372
299 0.0234
309 0.0147
319 0.0093
329 0.0059
339 0.0037
349 0.0023
359 0.0015
369 0.0009
379 0.0006
389 0.0004
399 0.0002
409 0.0002
419 0.0001
429 0.0001
439 0.0000
449 0.0000
459 0.0000
469 0.0000
479 0.0000
489 0.0000
499 0.0000


# Tensor
- [Link to Page](https://tutorials.pytorch.kr/beginner/examples_tensor/two_layer_net_tensor.html)

In [5]:
import torch

In [6]:
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # only using gpu

In [7]:
N, D_in, H, D_out = 64, 1000, 100, 10

In [8]:
# 입출력 데이터 생성
x = torch.randn(N, D_in, device = device, dtype= dtype )
y = torch.randn(N, D_out, device = device, dtype= dtype)

# 가중치 초기화 ( 무작위 )
w1 = torch.randn(D_in, H, device = device, dtype= dtype)
w2 = torch.randn(H, D_out, device = device, dtype= dtype)

In [9]:
# lr 
learning_rate = 1e-6
for t in range(500):
    # forward pass
    h = x.mm(w1) # matmul
    h_relu = h.clamp(min = 0)
    y_pred = h_relu.mm(w2)
    
    # loss
    loss = (y_pred - y).pow(2).sum().item() # .item() --> return value
    if t % 11 == 10:
        print(t,"{:.4f}".format(loss))
    
    # backward pass
    grad_y_pred = 2. * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[ h < 0 ] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # update weight
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
    

9 2889501.0000
19 264929.9375
29 86883.1094
39 34666.5039
49 15199.4824
59 7023.6533
69 3362.4287
79 1654.9215
89 831.9147
99 425.8176
109 221.3177
119 116.6317
129 62.2507
139 33.6157
149 18.3562
159 10.1276
169 5.6439
179 3.1750
189 1.8024
199 1.0323
209 0.5964
219 0.3473
229 0.2039
239 0.1206
249 0.0719
259 0.0432
269 0.0261
279 0.0160
289 0.0099
299 0.0062
309 0.0039
319 0.0026
329 0.0017
339 0.0012
349 0.0008
359 0.0006
369 0.0005
379 0.0003
389 0.0003
399 0.0002
409 0.0002
419 0.0001
429 0.0001
439 0.0001
449 0.0001
459 0.0001
469 0.0001
479 0.0001
489 0.0000
499 0.0000
