# [3] PyTorch Basic

In [None]:
import torch

torch.__version__

'1.6.0+cu101'

In [None]:
dtype = torch.float
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda', index=0)

### (1) Parameter 설정
* N : Batch size
* D_in : Input dimention / H : Hidden dimention / D_out : Output dimention


In [None]:
N = 64
D_in, H, D_out = 1000, 100, 10

### (2) input (x) / target (y) 설정

In [None]:
x  = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

x.size(), y.size()

(torch.Size([64, 1000]), torch.Size([64, 10]))

In [None]:
x[0, :20]

tensor([ 0.9284,  1.6228,  1.7541, -2.7384, -0.2308, -1.0436,  0.1682, -0.0155,
         2.3583,  1.2076,  0.1973, -0.6411,  0.2044,  1.1255, -0.1575, -0.5051,
         1.5404, -0.1795, -1.3157, -0.8399], device='cuda:0')

### (3) Model / Loss function / Optimizer 정의

In [None]:
simple_model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

simple_model.to(device)

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [None]:
class TwoLayerNet(torch.nn.Module):
  def __init__(self, D_in, H, D_out):
    super(TwoLayerNet, self).__init__()

    self.linear1 = torch.nn.Linear(D_in, H)
    self.linear2 = torch.nn.Linear(H, D_out)

  def forward(self, x):
    h_relu = self.linear1(x).clamp(min=0)
    y_pred = self.linear2(h_relu)

    return y_pred

In [None]:
model = TwoLayerNet(D_in, H, D_out)
model.to(device)

TwoLayerNet(
  (linear1): Linear(in_features=1000, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=10, bias=True)
)

In [None]:
loss_fn = torch.nn.MSELoss(reduction='sum')

In [None]:
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### (4) Training

In [None]:
model.train()

for epoch in range(10):
  y_pred = model(x)

  loss = loss_fn(y_pred, y)
  print(f'[Epoch {epoch}] loss: {loss.item()}')

  #gradient 초기화
  optimizer.zero_grad()

  #각 weight에 대한 gradient 계산
  loss.backward()

  # weight 업데이트
  optimizer.step()

[Epoch 0] loss: 710.5137939453125
[Epoch 1] loss: 692.9041748046875
[Epoch 2] loss: 675.7921142578125
[Epoch 3] loss: 659.0867919921875
[Epoch 4] loss: 642.8419189453125
[Epoch 5] loss: 627.0905151367188
[Epoch 6] loss: 611.8078002929688
[Epoch 7] loss: 597.099365234375
[Epoch 8] loss: 582.84228515625
[Epoch 9] loss: 569.018798828125


### (5) Infer

In [None]:
x_i  = torch.randn(N, D_in, device=device, dtype=dtype)

x_i.size()

torch.Size([64, 1000])

In [None]:
# eval() : Dropout 비활성화 / Batch Normalization : 학습 시 저장된 파라미터 사용
model.eval()

y_i = model(x_i)

In [None]:
y_i.size()

torch.Size([64, 10])