# Loss & Optimizer

In [3]:
# Typical PyTorch Pipeline
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) training Loop
#    - forward pass: compute prediction
#    - backward pass: gradients
#    - update weights

import torch
import torch.nn as nn

In [4]:
# f = wx    Linear Regression without bias
# f = 2x    Weights must be 2
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

# weight w의 gradient 구하기 위해 requires_grad=True로 설정
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# TODO: model prediction
def forward(x):
    return w * x

# TODO: loss = MSE (Mean Squared Error)
# def loss(y, y_pred):    # y_pred: model output
#     return ((y_pred-y)**2).mean()   # Square의 Mean을 Loss로 함

In [5]:
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss() # pytorch Mean Square Error
optimizer = torch.optim.SGD([w], lr=learning_rate)   # Stocastic Gradient Descent: optimizer할 model parameter, learning_rate 전달

for epoch in range(n_iters):
    # Prediction (== forward pass)
    y_pred = forward(X)
    
    # loss
    l = loss(y, y_pred)

    # Calculate gradients(== backward pass)
    l.backward()    # Calculate dl/dw

    # Update weights (Gradient Descent)
    # with torch.no_grad():
    #     w -= learning_rate * w.grad     # w의 gradient에 learning rate 곱해서 빼는 것 (==gradient descent)
    optimizer.step()
    
    # zero gradient
    # w.grad.zero_()
    optimizer.zero_grad()
    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


# Model

In [17]:
# f = wx    Linear Regression without bias
# f = 2x    Weights must be 2
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)   # 2D array로 변경해야됨 (row: num_sample, column: num_features)
y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features
# weight w의 gradient 구하기 위해 requires_grad=True로 설정
# w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# # TODO: model prediction
# def forward(x):
#     return w * x

# 1 Layer model
model = nn.Linear(input_size, output_size)

# Simple lInearRegression model class
# LinearRegression == nn.Linear
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # Define Layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)

4 1


In [18]:
X_test = torch.tensor([5], dtype=torch.float32)
print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss() # pytorch Mean Square Error
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)   # Stocastic Gradient Descent: optimizer할 model parameter, learning_rate 전달

for epoch in range(n_iters):
    # Prediction (== forward pass)
    # y_pred = forward(X)
    y_pred = model(X)
    
    # loss
    l = loss(y, y_pred)

    # Calculate gradients(== backward pass)
    l.backward()    # Calculate dl/dw

    # Update weights (Gradient Descent)
    # with torch.no_grad():
    #     w -= learning_rate * w.grad     # w의 gradient에 learning rate 곱해서 빼는 것 (==gradient descent)
    optimizer.step()
    
    # zero gradient
    # w.grad.zero_()
    optimizer.zero_grad()
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

Prediction before training: f(5) = 2.712
epoch 1: w = 0.574, loss = 13.67382050
epoch 11: w = 1.425, loss = 0.59571403
epoch 21: w = 1.573, loss = 0.24327002
epoch 31: w = 1.606, loss = 0.22088894
epoch 41: w = 1.621, loss = 0.20781927
epoch 51: w = 1.633, loss = 0.19571762
epoch 61: w = 1.644, loss = 0.18432567
epoch 71: w = 1.654, loss = 0.17359692
epoch 81: w = 1.664, loss = 0.16349272
epoch 91: w = 1.674, loss = 0.15397660
Prediction after training: f(5) = 9.347
