In [2]:
import torch

In [5]:
t = torch.FloatTensor([0., 1., 2., 3., 4., 5.])

print(t)
print(t.dim())
print(t.shape)
print(t.size())

tensor([0., 1., 2., 3., 4., 5.])
1
torch.Size([6])
torch.Size([6])


# 선형회귀 구현

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### x, y dataset define

In [21]:
torch.manual_seed(1)

x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

print("x_train => \n",x_train)
print("y_train => \n",y_train)

x_train => 
 tensor([[1.],
        [2.],
        [3.]])
y_train => 
 tensor([[2.],
        [4.],
        [6.]])


### Weight, bias define

In [22]:
W = torch.zeros(1, requires_grad=True)
# requires_grad=True는 train을 통해 값이 계속 변하는 변수임을 의미함
print(W)

b = torch.zeros(1, requires_grad=True)
print(b)

# y = Wx + b  ==>   y = 0 * x + 0 인 상태

tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)


### loss function define
###### H(x) = Wx + b
###### cost(W,b)=1/n* n∑i=1 [y(i)−H(x(i))]2
###### 평균 제곱 오차 MSE

In [26]:
hypothesis = x_train * W + b
print(hypothesis)

cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)
tensor(18.6667, grad_fn=<MeanBackward0>)


### optimizer function define

In [27]:
optimizer = optim.SGD([W,b], lr=0.01)
print(optimizer)
print(W)
print(b)
# gradient를 0으로 초기화
optimizer.zero_grad()
print(optimizer)
print(W)
print(b)
# cost function을 미분하여 gradient 계산
print(cost)
cost.backward()
print(cost)
print(W)
print(b)

# W와 b 업데이트
optimizer.step()
print(optimizer)
print(W)
print(b)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)
tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)
tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)
tensor(18.6667, grad_fn=<MeanBackward0>)
tensor(18.6667, grad_fn=<MeanBackward0>)
tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)
tensor([0.1867], requires_grad=True)
tensor([0.0800], requires_grad=True)


# training

In [69]:
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[2],[4],[6]])

W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = optim.SGD([W, b], lr = 0.01)

epochs = 2000

for epoch in range(epochs + 1) :
    
    hypothesis = x_train * W + b
    
    cost = torch.mean((hypothesis - y_train) ** 2)

#     optimizer.zero_grad()
    grad_hypo = 2 * (hypothesis - y_train)
    print("grad_hypo : ",grad_hypo)
    grad_w = x_train * grad_hypo + b
    print("grad_w : ",grad_w)
    grad_b = hypothesis - x_train * grad_w
    
    W = W - 0.01 * grad_w
    b = b - 0.01 * grad_b
    
    #cost.backward()
    #optimizer.step()
    
    # .item()으로 tensor 형식의 변수에서 값만 가져올 수 있음
    if epoch % 100 == 0 :
#         print(W)
#         print(b)
#         print(cost)
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, epochs, W.sum().item(), b.sum().item(), cost.item()))

grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[ -4.],
        [-16.],
        [-36.]], grad_fn=<AddBackward0>)
Epoch    0/2000 W: 0.560, b: -1.440 Cost: 18.666666
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[ -4.0400],
        [-16.3200],
        [-37.0800]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[ -4.0804],
        [-16.6464],
        [-38.1924]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[ -4.1212],
        [-16.9793],
        [-39.3382]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[ -4.1624],
        [-17.3189],
        [-40.5183]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-

        [-12.0010]], grad_fn=<MulBackward0>)
grad_w :  tensor([[  -16.9301],
        [ -282.5822],
        [-2616.3801]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-12.0010]], grad_fn=<MulBackward0>)
grad_w :  tensor([[  -17.0994],
        [ -288.2338],
        [-2694.8716]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-12.0010]], grad_fn=<MulBackward0>)
grad_w :  tensor([[  -17.2704],
        [ -293.9985],
        [-2775.7178]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-12.0010]], grad_fn=<MulBackward0>)
grad_w :  tensor([[  -17.4431],
        [ -299.8785],
        [-2858.9893]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-12.0010]], grad_fn=<MulBackward0>)
grad_w :  tensor([[  -17.6175],
        [ -305.8760],
        [-2944.7590]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
     

        [-11.9375]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.9550e+01],
        [-4.7029e+03],
        [-1.7401e+05]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-11.9688]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-7.0245e+01],
        [-4.7970e+03],
        [-1.7924e+05]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-11.9375]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-7.0947e+01],
        [-4.8929e+03],
        [-1.8461e+05]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-11.9375]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-7.1657e+01],
        [-4.9908e+03],
        [-1.9015e+05]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ -8.0000],
        [-11.9688]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-7.2374e+01],
        [-5.0906e+03],
        [-1.9586e+05]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.0000],
        [ 

        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.7731e+02],
        [-7.3755e+04],
        [-1.0592e+07]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.8008e+02],
        [-7.5230e+04],
        [-1.0909e+07]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.8288e+02],
        [-7.6735e+04],
        [-1.1237e+07]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.8571e+02],
        [-7.8270e+04],
        [-1.1574e+07]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-14.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.8857e+02],
        [-7.9835e+04],
        [-1.1921e+07]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w : 

        [-5.0891e+08]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-76.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0313e+03],
        [-1.0070e+06],
        [-5.2417e+08]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0416e+03],
        [-1.0271e+06],
        [-5.3990e+08]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[  -4.],
        [  -8.],
        [-140.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0520e+03],
        [-1.0476e+06],
        [-5.5610e+08]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[  -4.],
        [  -8.],
        [-140.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0626e+03],
        [-1.0686e+06],
        [-5.7278e+08]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0732e+03],
        [-1.0900e+06],
        [-5.8996e+08]], grad_fn=<AddBackward

        [-2.1725e+10]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 4.0840e+03]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.6492e+03],
        [-1.2452e+07],
        [-2.2377e+10]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 4.0840e+03]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.6857e+03],
        [-1.2701e+07],
        [-2.3048e+10]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 4.0840e+03]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.7226e+03],
        [-1.2955e+07],
        [-2.3740e+10]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.7598e+03],
        [-1.3214e+07],
        [-2.4452e+10]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 4.0840e+03]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.

grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 2.6213e+05]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.3707e+04],
        [-1.7340e+08],
        [-1.1407e+12]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 2.6213e+05]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.3844e+04],
        [-1.7687e+08],
        [-1.1749e+12]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 2.6213e+05]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.3983e+04],
        [-1.8041e+08],
        [-1.2101e+12]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.4122e+04],
        [-1.8402e+08],
        [-1.2464e+12]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 2.6213e+05]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.4264e+04],
        [-1.8770e+08],
        [-1.28

grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 5.0332e+07]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.4085e+04],
        [-3.7332e+09],
        [-1.1141e+14]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 5.0332e+07]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.4726e+04],
        [-3.8079e+09],
        [-1.1475e+14]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 5.0332e+07]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.5373e+04],
        [-3.8841e+09],
        [-1.1819e+14]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 6.7109e+07]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.6027e+04],
        [-3.9617e+09],
        [-1.2174e+14]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 6.7109e+07]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-6.6687e+04],
        [-4.0410

grad_w :  tensor([[-3.0870e+05],
        [-8.5293e+10],
        [-1.1891e+16]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-6.4425e+09]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.1178e+05],
        [-8.6999e+10],
        [-1.2247e+16]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.2950e+09]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.1490e+05],
        [-8.8739e+10],
        [-1.2615e+16]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.2950e+09]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.1805e+05],
        [-9.0514e+10],
        [-1.2993e+16]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-2.1475e+09]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-3.2123e+05],
        [-9.2324e+10],
        [-1.3383e+16]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000

        [-1.6076e+18]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-2.7488e+11]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.6263e+06],
        [-2.3289e+12],
        [-1.6558e+18]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-2.7488e+11]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.6426e+06],
        [-2.3754e+12],
        [-1.7055e+18]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.6590e+06],
        [-2.4229e+12],
        [-1.7567e+18]], grad_fn=<AddBackward0>)
Epoch 1300/2000 W: 603126932933967872.000, b: -1809379561851322368.000 Cost: 18.666666
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.6756e+06],
        [-2.4714e+12],
        [-1.8094e+18]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], g

grad_w :  tensor([[-8.4830e+06],
        [-6.2341e+13],
        [-2.2387e+20]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-3.5184e+13]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-8.5679e+06],
        [-6.3587e+13],
        [-2.3059e+20]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-3.5184e+13]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-8.6535e+06],
        [-6.4859e+13],
        [-2.3750e+20]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-3.5184e+13]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-8.7401e+06],
        [-6.6156e+13],
        [-2.4463e+20]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-8.8275e+06],
        [-6.7479e+13],
        [-2.5197e+20]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 3.51

grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.5036e+15]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-4.2947e+07],
        [-1.5725e+15],
        [-2.7699e+22]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.5036e+15]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-4.3377e+07],
        [-1.6040e+15],
        [-2.8530e+22]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-9.0072e+15]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-4.3810e+07],
        [-1.6361e+15],
        [-2.9386e+22]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.5036e+15]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-4.4249e+07],
        [-1.6688e+15],
        [-3.0267e+22]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [-4.5036e+15]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-4.4691e+07],
        [-1.7022

grad_w :  tensor([[-2.0483e+08],
        [-3.5223e+16],
        [-2.8702e+24]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 5.7646e+17]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.0688e+08],
        [-3.5927e+16],
        [-2.9563e+24]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 1.1529e+18]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.0895e+08],
        [-3.6646e+16],
        [-3.0450e+24]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 1.1529e+18]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.1103e+08],
        [-3.7379e+16],
        [-3.1363e+24]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 1.1529e+18]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-2.1315e+08],
        [-3.8126e+16],
        [-3.2304e+24]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000

        [-3.2499e+26]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0166e+09],
        [-8.5399e+17],
        [-3.3474e+26]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0267e+09],
        [-8.7107e+17],
        [-3.4478e+26]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[ -4.],
        [ -8.],
        [-12.]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0370e+09],
        [-8.8849e+17],
        [-3.5512e+26]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 7.3787e+19]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0474e+09],
        [-9.0626e+17],
        [-3.6577e+26]], grad_fn=<AddBackward0>)
grad_hypo :  tensor([[-4.0000e+00],
        [-8.0000e+00],
        [ 7.3787e+19]], grad_fn=<MulBackward0>)
grad_w :  tensor([[-1.0578e+09],
        [-9.2439e+17],
        

In [38]:
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[2],[4],[6]])

W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = optim.SGD([W, b], lr = 0.01)

epochs = 2000

for epoch in range(epochs + 1) :
    
    hypothesis = x_train * W + b
    
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # .item()으로 tensor 형식의 변수에서 값만 가져올 수 있음
    if epoch % 100 == 0 :
#         print(W)
#         print(b)
#         print(cost)
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, epochs, W.item(), b.item(), cost.item()))

Epoch    0/2000 W: 0.187, b: 0.080 Cost: 18.666666
Epoch  100/2000 W: 1.746, b: 0.578 Cost: 0.048171
Epoch  200/2000 W: 1.800, b: 0.454 Cost: 0.029767
Epoch  300/2000 W: 1.843, b: 0.357 Cost: 0.018394
Epoch  400/2000 W: 1.876, b: 0.281 Cost: 0.011366
Epoch  500/2000 W: 1.903, b: 0.221 Cost: 0.007024
Epoch  600/2000 W: 1.924, b: 0.174 Cost: 0.004340
Epoch  700/2000 W: 1.940, b: 0.136 Cost: 0.002682
Epoch  800/2000 W: 1.953, b: 0.107 Cost: 0.001657
Epoch  900/2000 W: 1.963, b: 0.084 Cost: 0.001024
Epoch 1000/2000 W: 1.971, b: 0.066 Cost: 0.000633
Epoch 1100/2000 W: 1.977, b: 0.052 Cost: 0.000391
Epoch 1200/2000 W: 1.982, b: 0.041 Cost: 0.000242
Epoch 1300/2000 W: 1.986, b: 0.032 Cost: 0.000149
Epoch 1400/2000 W: 1.989, b: 0.025 Cost: 0.000092
Epoch 1500/2000 W: 1.991, b: 0.020 Cost: 0.000057
Epoch 1600/2000 W: 1.993, b: 0.016 Cost: 0.000035
Epoch 1700/2000 W: 1.995, b: 0.012 Cost: 0.000022
Epoch 1800/2000 W: 1.996, b: 0.010 Cost: 0.000013
Epoch 1900/2000 W: 1.997, b: 0.008 Cost: 0.000008

## optimizer.zero_grad() 가 필요한 이유

    pytorch는 gradient를 loss.backward()를 통해서 계산하는데, 
    이 함수는 gradient를 덮어서 새로 계산하는 것이 아니라
    이전 gradient에 누적하여 계산한다.

    이렇게 누적해서 계산하는 이유는 RNN 모델을 구현 할 때 필요하기 때문이다.
    그래서 RNN 모델이 아닌 다른 모델은 gradient를 누적 할 필요가 없기 때문에
    model에 input을 통과 시키기전에 optimizer.zero_grad()를

## .backward()가 어떤 것인지와 진행되는 방법

    backpropagation과 같음.