<a href="https://colab.research.google.com/github/mgersins-design/Machine-Learning/blob/main/HW5/Housing_Prediction_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

#mounting google drive
from google.colab import drive
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/Colab Notebooks/Housing.csv'
housing = pd.DataFrame(pd.read_csv(file_path))
housing.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [None]:
# Checking Null values
housing.isnull().sum()*100/housing.shape[0]
# There are no NULL values in the dataset, hence it is clean.

Unnamed: 0,0
price,0.0
area,0.0
bedrooms,0.0
bathrooms,0.0
stories,0.0
mainroad,0.0
guestroom,0.0
basement,0.0
hotwaterheating,0.0
airconditioning,0.0


In [None]:
# List of variables to map

varlist =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

# Defining the map function
def binary_map(x):
    return x.map({'yes': 1, 'no': 0})

# Applying the function to the housing list
housing[varlist] = housing[varlist].apply(binary_map)

housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished


In [None]:
relevant_data = housing[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']]

# Train-Val-Split
n_samples = relevant_data.shape[0]                      # saving number of rows in n_samples
n_val = int(0.2 * n_samples)                            # amount of train-val-split (80%/20%)

shuffled_indices = torch.randperm(n_samples)            # random list of indices

train_indices = shuffled_indices[:-n_val]               # cutting off the last indices of the list (200 in this case)
val_indices = shuffled_indices[-n_val:]                 # using the last (200) indices of the list

train_df = relevant_data.iloc[train_indices.numpy()]    # converting data in pytorch tensors into numpy arrays depending on the indices
val_df = relevant_data.iloc[val_indices.numpy()]

In [None]:
Y_train_df = train_df[['price']]
X_train_df = train_df.drop('price', axis = 1)

Y_val_df = val_df[['price']]
X_val_df = val_df.drop('price', axis = 1)

from sklearn.preprocessing import StandardScaler
x_scaler = StandardScaler()
y_scaler = StandardScaler()

x_scaler.fit(X_train_df)
y_scaler.fit(Y_train_df)

scaled_X_train_np = x_scaler.transform(X_train_df)
scaled_Y_train_np = y_scaler.transform(Y_train_df)
scaled_X_val_np = x_scaler.transform(X_val_df)
scaled_Y_val_np = y_scaler.transform(Y_val_df)

X_train = torch.tensor(scaled_X_train_np, dtype = torch.float32)
Y_train = torch.tensor(scaled_Y_train_np, dtype = torch.float32)
X_val = torch.tensor(scaled_X_val_np, dtype = torch.float32)
Y_val = torch.tensor(scaled_Y_val_np, dtype = torch.float32)

In [None]:
import time

def training_loop(n_epochs, optimizer, model, loss_fn, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    start_time = time.time() # Starting time

    print("Training is starting...")
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u)
        train_loss = loss_fn(train_t_p, train_t_c)

        val_t_p = model(val_t_u)
        val_loss = loss_fn(val_t_p, val_t_c)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 50 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")


    end_time = time.time() # Finishing time
    total_time = end_time - start_time

    print(f"Training done after {total_time:.2f} seconds")

In [None]:
import torch.nn as nn
seq_model = nn.Sequential(
            nn.Linear(5, 8),    # 5 input features into 8 neurons/hidden features
            nn.ReLU(),          # ReLU as activation function
            nn.Linear(8, 1)     # combining the 8 resulting numbers of the hidden features into one output
)

seq_model

Sequential(
  (0): Linear(in_features=5, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [None]:
optimizer = torch.optim.Adam(seq_model.parameters(), lr=1e-2)
loss_fn = nn.MSELoss()

training_loop(n_epochs = 200,
              optimizer = optimizer,
              model = seq_model,
              loss_fn = loss_fn,
              train_t_u = X_train,
              val_t_u = X_val,
              train_t_c = Y_train,
              val_t_c = Y_val
)

Training is starting...
Epoch 1, Training loss 1.2196, Validation loss 1.0423
Epoch 2, Training loss 1.1535, Validation loss 0.9836
Epoch 3, Training loss 1.0923, Validation loss 0.9289
Epoch 50, Training loss 0.4252, Validation loss 0.3521
Epoch 100, Training loss 0.4053, Validation loss 0.3469
Epoch 150, Training loss 0.3931, Validation loss 0.3568
Epoch 200, Training loss 0.3802, Validation loss 0.3622
Training done after 0.42 seconds


In [None]:
seq_model_three_layer = nn.Sequential(
                        nn.Linear(5, 13),
                        nn.ReLU(),
                        nn.Linear(13, 10),
                        nn.ReLU(),
                        nn.Linear(10, 8),
                        nn.ReLU(),
                        nn.Linear(8, 1)
)

seq_model_three_layer

Sequential(
  (0): Linear(in_features=5, out_features=13, bias=True)
  (1): ReLU()
  (2): Linear(in_features=13, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=8, bias=True)
  (5): ReLU()
  (6): Linear(in_features=8, out_features=1, bias=True)
)

In [None]:
optimizer_three_layer = torch.optim.Adam(seq_model_three_layer.parameters(), lr=1e-2)
loss_fn = nn.MSELoss()

training_loop(n_epochs = 200,
              optimizer = optimizer_three_layer,
              model = seq_model_three_layer,
              loss_fn = loss_fn,
              train_t_u = X_train,
              val_t_u = X_val,
              train_t_c = Y_train,
              val_t_c = Y_val
)

Training is starting...
Epoch 1, Training loss 1.0825, Validation loss 0.8090
Epoch 2, Training loss 1.0435, Validation loss 0.7816
Epoch 3, Training loss 1.0085, Validation loss 0.7583
Epoch 50, Training loss 0.4117, Validation loss 0.3264
Epoch 100, Training loss 0.3645, Validation loss 0.3715
Epoch 150, Training loss 0.3198, Validation loss 0.4525
Epoch 200, Training loss 0.2724, Validation loss 0.5263
Training done after 0.67 seconds
