# 선형 회귀 모델의 학습에서 다양한 옵티마이저를 적용해보기

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Boston data loader

In [2]:
# 데이터 로드 
boston = load_boston()
X = boston.data # 학습데이터
y = boston.target # 라벨데이터

# 데이터 스케일링
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("X_train len >> ",len(X_train))
print("X_test len >> ",len(X_test))
print("y_train len >> ",len(y_train))
print("y_test len >> ",len(y_test))

X_train len >>  354
X_test len >>  152
y_train len >>  354
y_test len >>  152



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

## 모델 생성 및 하이퍼파라미터 설정

In [3]:
input_dim = X.shape[1]
output_dim = 1
learning_rate = 0.0001
num_epochs = 1000

# 모델 생성
model = nn.Linear(input_dim, output_dim)

### 다양한 옵티마이저 설정

In [4]:
optimizers = {"SGD" : optim.SGD(model.parameters(), lr = learning_rate),
             "Momentum": optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9),
             "Adagrad": optim.Adagrad(model.parameters(), lr = learning_rate),
             "RMSprop": optim.RMSprop(model.parameters(), lr = learning_rate),
             "Adam": optim.Adam(model.parameters(), lr = learning_rate)}

In [5]:
# 모델 학습
for optimizer_name, optimizer in optimizers.items():
    criterion = nn.MSELoss()
    optimizer.zero_grad()
    
    for epoch in range(num_epochs):
        inputs = torch.tensor(X_train, dtype=torch.float32)
        labels = torch.tensor(y_train, dtype=torch.float32)
#         print(inputs)
#         print(labels)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        # Print progress
        if (epoch+1) % 100 == 0:
            print(f"{optimizer_name} - Epoch[{epoch+1}/{num_epochs}], Loss:{loss.item():.4f}")

  return F.mse_loss(input, target, reduction=self.reduction)


SGD - Epoch[100/1000], Loss:104.9936
SGD - Epoch[200/1000], Loss:565.5245
SGD - Epoch[300/1000], Loss:218.5592
SGD - Epoch[400/1000], Loss:417.5939
SGD - Epoch[500/1000], Loss:382.2391
SGD - Epoch[600/1000], Loss:256.2150
SGD - Epoch[700/1000], Loss:525.0541
SGD - Epoch[800/1000], Loss:145.6483
SGD - Epoch[900/1000], Loss:592.8229
SGD - Epoch[1000/1000], Loss:127.4958
Momentum - Epoch[100/1000], Loss:152.6374
Momentum - Epoch[200/1000], Loss:182.0156
Momentum - Epoch[300/1000], Loss:46139.2109
Momentum - Epoch[400/1000], Loss:11078895.0000
Momentum - Epoch[500/1000], Loss:817977408.0000
Momentum - Epoch[600/1000], Loss:2258617856.0000
Momentum - Epoch[700/1000], Loss:8778222665728.0000
Momentum - Epoch[800/1000], Loss:3207783029669888.0000
Momentum - Epoch[900/1000], Loss:327495001531482112.0000
Momentum - Epoch[1000/1000], Loss:5116460811623071744.0000
Adagrad - Epoch[100/1000], Loss:8742050926191181824.0000
Adagrad - Epoch[200/1000], Loss:8742050926191181824.0000
Adagrad - Epoch[300/