# Homework #6
---
Student Name: Sam Crane

Student ID: 801101091

GitHub: https://github.com/samofuture/Intro-to-ML

In [17]:
# %matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
import torch.optim as optim
import torch.nn as nn
# use seaborn plotting defaults
import seaborn as sns; sns.set()

## Problem 1

In [18]:
def prep_data() -> pd.DataFrame:
    df = pd.read_csv("Housing.csv")
    furnish_encoder = LabelEncoder()
    df['mainroad'] = df['mainroad'].apply(lambda x: 1 if x == 'yes' else 0)
    df['guestroom'] = df['guestroom'].apply(lambda x: 1 if x == 'yes' else 0)
    df['basement'] = df['basement'].apply(lambda x: 1 if x == 'yes' else 0)
    df['hotwaterheating'] = df['hotwaterheating'].apply(lambda x: 1 if x == 'yes' else 0)
    df['airconditioning'] = df['airconditioning'].apply(lambda x: 1 if x == 'yes' else 0)
    df['prefarea'] = df['prefarea'].apply(lambda x: 1 if x == 'yes' else 0)
    # df['furnishingstatus'] = df['furnishingstatus'].apply(lambda x: 2 if x == 'furnished' else 0)
    df['furnishingstatus'] = furnish_encoder.fit_transform(df['furnishingstatus'])

    return df

In [19]:
df = prep_data()
price = df.pop('price').to_numpy()
df

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,0
1,8960,4,4,4,1,0,0,0,1,3,0,0
2,9960,3,2,2,1,0,1,0,0,2,1,1
3,7500,4,2,2,1,0,1,0,1,3,1,0
4,7420,4,1,2,1,1,1,0,1,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
540,3000,2,1,1,1,0,1,0,0,2,0,2
541,2400,3,1,1,0,0,0,0,0,0,0,1
542,3620,2,1,1,1,0,0,0,0,0,0,2
543,2910,3,1,1,0,0,0,0,0,0,0,0


In [20]:
scaler_x = StandardScaler()
data = scaler_x.fit_transform(df)

scaler_y = StandardScaler()
price = scaler_y.fit_transform(price.reshape(-1, 1))

X_train, X_test, Y_train, Y_test = train_test_split(data, price, test_size=0.2, random_state=15)

train_inputs = torch.tensor(X_train).float()
train_outputs = torch.tensor(Y_train).float()
Y_test = torch.tensor(Y_test).float()
X_test = torch.tensor(X_test).float()

In [21]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [22]:
def training_loop(num_epochs, optimizer, model, t_in, t_out, v_in, v_out):
    for epoch in range(1, num_epochs+1):
        t_p = model(t_in)
        train_loss = loss_fn(t_p, t_out)
        v_p = model(v_in)
        val_loss = loss_fn(v_p, v_out)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print(f"Epoch {epoch}:")
            print(f"\tTraining Loss: {float(train_loss)}")
            print(f"\tValidation Loss: {float(val_loss)}")

    return model.parameters()

In [31]:
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold

def training_loop_with_cv(num_epochs, optimizer, model, loss_fn, inputs, targets, num_folds=5):
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    for fold, (train_idx, val_idx) in enumerate(kf.split(inputs, targets)):
        t_in, v_in = inputs[train_idx], inputs[val_idx]
        t_out, v_out = targets[train_idx], targets[val_idx]

        print(f"Training Fold {fold + 1}/{num_folds}:")

        for epoch in range(1, num_epochs + 1):
            # Training
            model.train()
            t_p = model(t_in)
            train_loss = loss_fn(t_p, t_out)

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            # Validation
            model.eval()
            v_p = model(v_in)
            val_loss = loss_fn(v_p, v_out)

            if epoch % 500 == 0:
                print(f"Epoch {epoch}:")
                print(f"\tTraining Loss: {float(train_loss)}")
                print(f"\tValidation Loss: {float(val_loss)}")

    return model

In [29]:
housing_model = nn.Sequential(
                nn.Linear(len(df.columns), 32),
                nn.ReLU(),
                nn.Linear(32, 1))
optimizer = optim.SGD(housing_model.parameters(), lr=0.001)

training_loop(5000, optimizer, housing_model, train_inputs, train_outputs, X_test, Y_test)

Epoch 500:
	Training Loss: 0.5303583145141602
	Validation Loss: 0.5774089097976685
Epoch 1000:
	Training Loss: 0.3847433030605316
	Validation Loss: 0.4511083662509918
Epoch 1500:
	Training Loss: 0.3361700177192688
	Validation Loss: 0.413506418466568
Epoch 2000:
	Training Loss: 0.31517893075942993
	Validation Loss: 0.39737868309020996
Epoch 2500:
	Training Loss: 0.30296996235847473
	Validation Loss: 0.38794344663619995
Epoch 3000:
	Training Loss: 0.2946121096611023
	Validation Loss: 0.3811964690685272
Epoch 3500:
	Training Loss: 0.2884846031665802
	Validation Loss: 0.3763652741909027
Epoch 4000:
	Training Loss: 0.2837289869785309
	Validation Loss: 0.373212069272995
Epoch 4500:
	Training Loss: 0.27984151244163513
	Validation Loss: 0.3709720969200134
Epoch 5000:
	Training Loss: 0.27657550573349
	Validation Loss: 0.3696404695510864


<generator object Module.parameters at 0x00000253835530A0>

In [33]:
housing_model = nn.Sequential(
                nn.Linear(len(df.columns), 32),
                nn.ReLU(),
                nn.Linear(32, 1))
optimizer = optim.SGD(housing_model.parameters(), lr=0.001)

training_loop_with_cv(5000, optimizer, housing_model, loss_fn, train_inputs, train_outputs)

Training Fold 1/5:
Epoch 500:
	Training Loss: 0.4473831355571747
	Validation Loss: 0.47375765442848206
Epoch 1000:
	Training Loss: 0.3494022786617279
	Validation Loss: 0.3721732199192047
Epoch 1500:
	Training Loss: 0.31816479563713074
	Validation Loss: 0.34029075503349304
Epoch 2000:
	Training Loss: 0.3020055890083313
	Validation Loss: 0.3241982161998749
Epoch 2500:
	Training Loss: 0.291962206363678
	Validation Loss: 0.314543753862381
Epoch 3000:
	Training Loss: 0.2848953306674957
	Validation Loss: 0.3083204925060272
Epoch 3500:
	Training Loss: 0.279646635055542
	Validation Loss: 0.3044143617153168
Epoch 4000:
	Training Loss: 0.2755013108253479
	Validation Loss: 0.30214956402778625
Epoch 4500:
	Training Loss: 0.27199235558509827
	Validation Loss: 0.3008939027786255
Epoch 5000:
	Training Loss: 0.2689095735549927
	Validation Loss: 0.3002000153064728
Training Fold 2/5:
Epoch 500:
	Training Loss: 0.2680773437023163
	Validation Loss: 0.2865132987499237
Epoch 1000:
	Training Loss: 0.26497277

Sequential(
  (0): Linear(in_features=12, out_features=32, bias=True)
  (1): ReLU()
  (2): Linear(in_features=32, out_features=1, bias=True)
)