In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
import pandas as pd
import numpy as np
import torch

url = "https://raw.githubusercontent.com/mwardeh-png/Intro-to-ML/main/Housing.csv"
hf = pd.read_csv(url)

variable_list = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

def binary_mapping(x):
  return x.map({'no' : 0, 'yes' : 1})

# **Pre-processing and Training**


> **Separating desired Features and Outputs**


In [2]:
hf[variable_list] = hf[variable_list].apply(binary_mapping)
hf = hf.drop('furnishingstatus', axis=1)

y = hf.pop('price').values.reshape(-1,1)
features = hf.columns.tolist()
x = hf.values


> **Splitting Training and Validation Set**


In [3]:
x_n = torch.tensor(StandardScaler().fit_transform(x), dtype=torch.float32)
y_n = torch.tensor(StandardScaler().fit_transform(y), dtype=torch.float32)


x_train, x_valid, y_train, y_valid = train_test_split(x_n, y_n, test_size=0.8, random_state=42)


> **Training Loop**


In [5]:
def training(epochs, optimizer, model, loss_fn, x_train, x_valid, y_train, y_valid):
  for epoch in range(1, epochs+1):
    h_train = model(x_train)
    train_loss = loss_fn(h_train, y_train)

    h_valid = model(x_valid)
    valid_loss = loss_fn(h_valid, y_valid)

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
      print(f"Epoch {epoch}. Training loss {train_loss.item()}, Validation loss {valid_loss.item()}")


**1a. SGD Optimized Linear Model with 1 hidden layer**

In [6]:
seq_model = nn.Sequential(
    nn.Linear(11, 32),
    nn.Tanh(),
    nn.Linear(32, 1))

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)
training(
    epochs = 5000,
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    x_train = x_train,
    x_valid = x_valid,
    y_train = y_train,
    y_valid = y_valid)

Epoch 1000. Training loss 0.35265854001045227, Validation loss 0.39989182353019714
Epoch 2000. Training loss 0.31663796305656433, Validation loss 0.41963985562324524
Epoch 3000. Training loss 0.29788681864738464, Validation loss 0.4310794174671173
Epoch 4000. Training loss 0.2831394672393799, Validation loss 0.4347243010997772
Epoch 5000. Training loss 0.26930317282676697, Validation loss 0.43406355381011963


**1b. SGD Optimized Linear Model with 3 hidden layer**

In [7]:
seq_model = nn.Sequential(
    nn.Linear(11, 32),
    nn.Tanh(),
    nn.Linear(32, 64),
    nn.Tanh(),
    nn.Linear(64, 16),
    nn.Tanh(),
    nn.Linear(16,1))

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)
training(
    epochs = 5000,
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    x_train = x_train,
    x_valid = x_valid,
    y_train = y_train,
    y_valid = y_valid)

Epoch 1000. Training loss 0.36544960737228394, Validation loss 0.4467775225639343
Epoch 2000. Training loss 0.33395904302597046, Validation loss 0.45538777112960815
Epoch 3000. Training loss 0.3176318109035492, Validation loss 0.46148884296417236
Epoch 4000. Training loss 0.30381396412849426, Validation loss 0.46185576915740967
Epoch 5000. Training loss 0.2900848090648651, Validation loss 0.459676057100296
