## Imports

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x2b3a34023d0>

## Data

In [4]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [5]:
print(x_train)
print(x_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


In [6]:
print(y_train)
print(y_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


## Weight Initialization
requires_grad : True여야만 학습할 수 있음. (미분)

In [7]:
W = torch.zeros(1, requires_grad=True)
print(W)

b = torch.zeros(1, requires_grad=True)
print(b)

tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)


## Hypothesis
H(x) = Wx + b

In [8]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


## Cost

In [9]:
print(hypothesis)
print(y_train)
print(hypothesis - y_train)
print((hypothesis - y_train) ** 2)

# MSE loss 
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)
tensor([[1.],
        [2.],
        [3.]])
tensor([[-1.],
        [-2.],
        [-3.]], grad_fn=<SubBackward0>)
tensor([[1.],
        [4.],
        [9.]], grad_fn=<PowBackward0>)
tensor(4.6667, grad_fn=<MeanBackward0>)


## Gradient Descent

In [10]:
optimizer=optim.SGD([W, b], lr=0.01)

optimizer.zero_grad
cost.backward()
optimizer.step()

print(W)
print(b)

hypothesis = x_train * W + b
print(hypothesis)

cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor([0.0933], requires_grad=True)
tensor([0.0400], requires_grad=True)
tensor([[0.1333],
        [0.2267],
        [0.3200]], grad_fn=<AddBackward0>)
tensor(3.6927, grad_fn=<MeanBackward0>)


## Training with Full Code

In [11]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    hypothesis = W * x_train + b
    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch    0/1000 W: 0.093, b: 0.040 Cost: 4.666667
Epoch  100/1000 W: 0.873, b: 0.289 Cost: 0.012043
Epoch  200/1000 W: 0.900, b: 0.227 Cost: 0.007442
Epoch  300/1000 W: 0.921, b: 0.179 Cost: 0.004598
Epoch  400/1000 W: 0.938, b: 0.140 Cost: 0.002842
Epoch  500/1000 W: 0.951, b: 0.110 Cost: 0.001756
Epoch  600/1000 W: 0.962, b: 0.087 Cost: 0.001085
Epoch  700/1000 W: 0.970, b: 0.068 Cost: 0.000670
Epoch  800/1000 W: 0.976, b: 0.054 Cost: 0.000414
Epoch  900/1000 W: 0.981, b: 0.042 Cost: 0.000256
Epoch 1000/1000 W: 0.985, b: 0.033 Cost: 0.000158


## High-level 구현
nn.Module 이용하기

In [12]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

linear regression 모델 구성

In [14]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)
    
    def forward(self, x):
        return self.linear(x)
    

model = LinearRegressionModel()

hypothesis : y = wx + b

In [15]:
hypothesis = model(x_train)
print(hypothesis)

tensor([[0.0739],
        [0.5891],
        [1.1044]], grad_fn=<AddmmBackward0>)


cost

In [16]:
print(hypothesis)
print(y_train)

cost = F.mse_loss(hypothesis, y_train)
print(cost)

tensor([[0.0739],
        [0.5891],
        [1.1044]], grad_fn=<AddmmBackward0>)
tensor([[1.],
        [2.],
        [3.]])
tensor(2.1471, grad_fn=<MseLossBackward0>)


gradient descent 
: PyTorch의 torch.optim에 있는 optimizer 중 하나 사용 가능 

In [17]:
optimizer = optim.SGD(model.parameters(), lr = 0.01)

optimizer.zero_grad()
cost.backward()
optimizer.step()

full code

In [18]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
model = LinearRegressionModel()

# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs):

    prediction = model(x_train)
    cost = F.mse_loss(prediction, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f}, Cost: {:.6f}'.format(epoch, nb_epochs, W, b, cost.item()))

Epoch    0/1000 W: -0.101, b: 0.508, Cost: 4.630286
Epoch  100/1000 W: 0.713, b: 0.653, Cost: 0.061555
Epoch  200/1000 W: 0.774, b: 0.514, Cost: 0.038037
Epoch  300/1000 W: 0.822, b: 0.404, Cost: 0.023505
Epoch  400/1000 W: 0.860, b: 0.317, Cost: 0.014525
Epoch  500/1000 W: 0.890, b: 0.250, Cost: 0.008975
Epoch  600/1000 W: 0.914, b: 0.196, Cost: 0.005546
Epoch  700/1000 W: 0.932, b: 0.154, Cost: 0.003427
Epoch  800/1000 W: 0.947, b: 0.121, Cost: 0.002118
Epoch  900/1000 W: 0.958, b: 0.095, Cost: 0.001309
