# Lab 2: Linear Regression

### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x1e49977acf0>

### Data

In [3]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

print(x_train)
print(x_train.shape)
print()
print(y_train)
print(y_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


### Weight Initialization

In [4]:
W = torch.zeros(1, requires_grad = True) # 학습할것임을 명시
b = torch.zeros(1, requires_grad = True)

### Hypothesis

> - $H(x) = Wx + b$

In [5]:
hypothesis = x_train * W + b
print(hypothesis)
print(hypothesis.shape)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)
torch.Size([3, 1])


### Cost
> - $cost(W, b) = \frac{1}{m} \sum_{i=1}^{m}(H(x^{(i)}) - y^{(i)})^2$

In [6]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(4.6667, grad_fn=<MeanBackward0>)


### Gradient Descent

In [7]:
optimizer = optim.SGD([W, b], lr=.01)

optimizer.zero_grad()
cost.backward()
optimizer.step()

In [8]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [9]:
W, b

(tensor([0.0933], requires_grad=True), tensor([0.0400], requires_grad=True))

In [10]:
hypothesis = x_train * W + b
hypothesis

tensor([[0.1333],
        [0.2267],
        [0.3200]], grad_fn=<AddBackward0>)

In [11]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost) # cost가 줄어듦

tensor(3.6927, grad_fn=<MeanBackward0>)


### Training with Full Code

In [21]:
# Data
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[1],[2],[3]])

# init weight
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # H(x)
    hypothesis = x_train * W + b
    
    # cost
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    # weight upgrade
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print("Epoch {:4d}/{} W : {:.3f}, b : {:.3f} Cost : {:.6f}".format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch    0/1000 W : 0.093, b : 0.040 Cost : 4.666667
Epoch  100/1000 W : 0.873, b : 0.289 Cost : 0.012043
Epoch  200/1000 W : 0.900, b : 0.227 Cost : 0.007442
Epoch  300/1000 W : 0.921, b : 0.179 Cost : 0.004598
Epoch  400/1000 W : 0.938, b : 0.140 Cost : 0.002842
Epoch  500/1000 W : 0.951, b : 0.110 Cost : 0.001756
Epoch  600/1000 W : 0.962, b : 0.087 Cost : 0.001085
Epoch  700/1000 W : 0.970, b : 0.068 Cost : 0.000670
Epoch  800/1000 W : 0.976, b : 0.054 Cost : 0.000414
Epoch  900/1000 W : 0.981, b : 0.042 Cost : 0.000256
Epoch 1000/1000 W : 0.985, b : 0.033 Cost : 0.000158


### High-level Implementation with `nn.Module`

In [30]:
# data
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [31]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1) # input dim, output dim
        
    def forward(self, x):
        return self.linear(x)

In [32]:
model = LinearRegressionModel()

### Hypothesis

In [33]:
hypothesis = model(x_train)

print(hypothesis)

tensor([[-0.3417],
        [-1.2832],
        [-2.2246]], grad_fn=<AddmmBackward>)


### Cost

In [34]:
cost = F.mse_loss(hypothesis, y_train)
print(cost)

tensor(13.2919, grad_fn=<MseLossBackward>)


### Gradient Descent

In [35]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

optimizer.zero_grad()
cost.backward()
optimizer.step()

### Training with Full Code

In [40]:
# data
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# init
model = LinearRegressionModel()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # H(x)
    prediction = model(x_train)
    
    # cost
    cost = F.mse_loss(prediction, y_train)
    
    # weight update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))

Epoch    0/1000 W: -0.114, b: 0.547 Cost: 4.589475
Epoch  100/1000 W: 0.700, b: 0.683 Cost: 0.067199
Epoch  200/1000 W: 0.764, b: 0.537 Cost: 0.041525
Epoch  300/1000 W: 0.814, b: 0.422 Cost: 0.025660
Epoch  400/1000 W: 0.854, b: 0.332 Cost: 0.015856
Epoch  500/1000 W: 0.885, b: 0.261 Cost: 0.009798
Epoch  600/1000 W: 0.910, b: 0.205 Cost: 0.006055
Epoch  700/1000 W: 0.929, b: 0.161 Cost: 0.003741
Epoch  800/1000 W: 0.944, b: 0.127 Cost: 0.002312
Epoch  900/1000 W: 0.956, b: 0.100 Cost: 0.001429
Epoch 1000/1000 W: 0.966, b: 0.078 Cost: 0.000883
