# Regression Boston Housing dataset 

In [None]:
from sklearn.datasets import load_boston

In [None]:
boston_data = load_boston()

In [None]:
print(boston_data.keys())

In [None]:
N, D = boston_data.data.shape
print("Number of samples: ", N)
print("Number of features: ", D)

In [None]:
print(boston_data.DESCR)

In [None]:
import pandas as pd
bos = pd.DataFrame(boston_data.data)

In [None]:
bos.head()

In [None]:
bos.columns = boston_data.feature_names

In [None]:
bos.head()

In [None]:
bos['price'] = boston_data.target

In [None]:
bos.head()

# Data preprocessing

In [None]:
# Load packages
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
import numpy as np
X = np.array(boston_data.data)
X = scaler.fit_transform(X)

In [None]:
Y = np.array(boston_data.target)
Y = Y.reshape(-1, 1)

##### Split the dataset into train and test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

## Data Loading

### If the dataloader is already defined for standard dataset, use them

In [None]:
import torch
from torchvision import datasets, transforms
train_loader = torch.utils.data.DataLoader(
                datasets.MNIST("../data", train=True, download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081))
                              ])),
                batch_size=32, shuffle=True)

In [None]:
train_loader.dataset

### Custom DataLoader 

## Three steps
1. Define initialization method (\_\_init\_\_)
2. Define length method (\_\_len\_\_)
3. Define method to return one item on the index (\_\_getitem\_\_)

In [None]:
from torch.utils.data import Dataset

class RegressionDataset(Dataset):
    def __init__(self, data, output):
        self.data = data
        self.output = output

    def __len__(self):
        return len(self.data)

    def __getitem__(self, ind):
        return self.data[ind], self.output[ind]

In [None]:
train_dataset = RegressionDataset(X_train, Y_train)
test_dataset = RegressionDataset(X_test, Y_test)

In [None]:
from torch.utils.data import DataLoader

In [None]:
train_loader = DataLoader(train_dataset, batch_size=50, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=50)

# Define Model

In [None]:
from torch import nn
from torch.nn import functional as F

In [None]:
class RegressionModel(nn.Module):
    def __init__(self, feature_dim, hidden_dim, output_dim):
        super(RegressionModel, self).__init__()
        
        self.hidden = nn.Linear(feature_dim, hidden_dim)
        self.predict = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = F.elu(self.hidden(x))
        x = self.predict(x)
        return x

In [None]:
model = RegressionModel(D, 50, 1)

In [None]:
print(model)

In [None]:
from draw_neural_net import draw_neural_net
from matplotlib import pyplot as plt
%matplotlib inline

# Training Strategy

- We need Loss function to compute the prediction error
- and an optimization function to update the parameter

##### Let's define values to setup training process

In [None]:
num_epochs = 100 
lr = 0.01

In [None]:
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
from tqdm import trange

In [None]:
# Training loop
losses = []
epochs = trange(num_epochs, desc="Training Loss")
for epoch in epochs:
    running_loss = 0
    for data in train_loader:
        # get the data
        inputs, outputs = data
        inputs = inputs.type(torch.FloatTensor)
        outputs = outputs.type(torch.FloatTensor)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass: Calculate predicted price by passing x to the model
        y_pred = model(inputs)
        
        # compute loss
        loss = criterion(y_pred, outputs)
        running_loss += loss.item()
        # perform backward pass and update the parameters
        loss.backward()
        optimizer.step()
    
    epoch_loss = round(running_loss/len(train_loader.dataset), 4)
    epochs.set_description("Training_loss: %g" % epoch_loss)
    losses.append(epoch_loss)

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
plt.plot(losses)
plt.show()

#### Evaluate the model

In [None]:
with torch.no_grad():
    model.eval()
    
    preds = []
    for data in test_loader:
        # get the data
        inputs, outputs = data
        inputs = inputs.type(torch.FloatTensor)
        outputs = outputs.type(torch.FloatTensor)
        
        pred = model(inputs)
        preds.append(pred.numpy())

    prediction = np.concatenate(preds)

In [None]:
from sklearn.metrics import r2_score
print(r2_score(prediction, Y_test))

In [None]:
plt.scatter(Y_test, prediction)
plt.xlabel("Prices: $Y_i$")
plt.ylabel(r"Predicted prices: $\hat{Y}_i$")
plt.title(r"Prices vs Predicted prices: $Y_i$ vs $\hat{Y}_i$")