### Preparation

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Load Dataset: Boston Housing Price

In [2]:
# Load dataset
boston_dataset = load_boston()
print(boston_dataset)

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]]), 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
       18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
       15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
       13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
       21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
       35.4, 24.7, 31.6, 23.3, 19.6, 1


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

In [3]:
# Get values
x_data = boston_dataset.data    # Learning Data
y_data = boston_dataset.target  # Label Data
print(x_data)
print(y_data)

[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]
[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15.  18.9 21.7 20.4
 18.2 19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8
 18.4 21.  12.7 14.5 13.2 13.1 13.5 18.9 20.  21.  24.7 30.8 34.9 26.6
 25.3 24.7 21.2 19.3 20.  16.6 14.4 19.4 19.7 20.5 25.  23.4 18.9 35.4
 24.7 31.6 23.3 19.6 18.7 16.  22.2 25.  33.  23.5 19.4 22.  17.4 20.9
 24.2 21.7 22.8 23.4 24.1 21.4 20.  20.8 21.2 20.3 28.  23.9 24.8 22.9
 23.9 26.6 22.5 22.2 23.6 28.7 22.6 22.  22.9 25.  20.6 28.4 21.4 38.7
 43.8 33.2 27.5 26.5 18.6 19.3 20.1 19.5 19.5 20.4 19.8 19.

### Data Pre-Processing

In [4]:
# Standardization
scaler = StandardScaler()
s_data = scaler.fit_transform(x_data)

In [5]:
# Split data
x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                   y_data,
                                                   test_size = 0.1,
                                                   random_state = 85)

print('x_train len: ', len(x_train))
print('x_test len: ', len(x_test))
print('y_train len: ', len(y_train))
print('y_test len: ', len(y_test))

x_train len:  455
x_test len:  51
y_train len:  455
y_test len:  51


### Create Models

In [6]:
# Set parameters
#print(x_data.shape)
input_dim = x_data.shape[1]  # 13
output_dim = 1
lr = 0.00000000005  # 0.0000025 - SGD only
                # 0.00000025 - Momentum 1st only
num_epochs = 1000

# Create a model
model = nn.Linear(input_dim, output_dim)  # Linear Regression
print(model)

Linear(in_features=13, out_features=1, bias=True)


### Set Optimizers

In [7]:
optimizers = {'SGD': optim.SGD(model.parameters(), lr = lr),
              'Momentum': optim.SGD(model.parameters(), lr = lr, momentum = 0.9),
              'Adagrad': optim.Adagrad(model.parameters(), lr = lr),
              'RMSprop': optim.RMSprop(model.parameters(), lr = lr),
              'Adam': optim.Adam(model.parameters(), lr = lr)
             }
#print(optimizers)

### Fit Models

In [9]:
# Fit models
#print(optimizers.items())
for optimizer_name, optimizer in optimizers.items():
    
    # Set Loss functions
    criterion = nn.MSELoss()
    optimizer.zero_grad()
    
    # Fit a model
    for epoch in range(num_epochs):
        # Get inputs and labels
        inputs = torch.tensor(x_train, dtype = torch.float32)
        labels = torch.tensor(y_train, dtype = torch.float32)
        #print(inputs, labels)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        #print(outputs, loss)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        # Display progress
        if (epoch + 1) % 100 == 0:
            print(f'{optimizer_name}: Epoch - [{epoch + 1} / {num_epochs}], Loss - {loss.item():.4f}')

SGD: Epoch - [100 / 1000], Loss - 121.2258
SGD: Epoch - [200 / 1000], Loss - 119.2226
SGD: Epoch - [300 / 1000], Loss - 118.4763
SGD: Epoch - [400 / 1000], Loss - 119.7500
SGD: Epoch - [500 / 1000], Loss - 121.5019
SGD: Epoch - [600 / 1000], Loss - 121.6216
SGD: Epoch - [700 / 1000], Loss - 119.8077
SGD: Epoch - [800 / 1000], Loss - 117.9077
SGD: Epoch - [900 / 1000], Loss - 117.8383
SGD: Epoch - [1000 / 1000], Loss - 119.4280
Momentum: Epoch - [100 / 1000], Loss - 116.8616
Momentum: Epoch - [200 / 1000], Loss - 166.1118
Momentum: Epoch - [300 / 1000], Loss - 121.8707
Momentum: Epoch - [400 / 1000], Loss - 183.2732
Momentum: Epoch - [500 / 1000], Loss - 155.0927
Momentum: Epoch - [600 / 1000], Loss - 181.9057
Momentum: Epoch - [700 / 1000], Loss - 245.9613
Momentum: Epoch - [800 / 1000], Loss - 150.6368
Momentum: Epoch - [900 / 1000], Loss - 418.8108
Momentum: Epoch - [1000 / 1000], Loss - 109.6309
Adagrad: Epoch - [100 / 1000], Loss - 108.7004
Adagrad: Epoch - [200 / 1000], Loss - 108