<a href="https://colab.research.google.com/github/mgersins-design/Machine-Learning/blob/main/HW5/Housing_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

#mounting google drive
from google.colab import drive
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/Colab Notebooks/Housing.csv'
housing = pd.DataFrame(pd.read_csv(file_path))
housing.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [14]:
# Checking Null values
housing.isnull().sum()*100/housing.shape[0]
# There are no NULL values in the dataset, hence it is clean.

Unnamed: 0,0
price,0.0
area,0.0
bedrooms,0.0
bathrooms,0.0
stories,0.0
mainroad,0.0
guestroom,0.0
basement,0.0
hotwaterheating,0.0
airconditioning,0.0


In [15]:
# List of variables to map

varlist =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

# Defining the map function
def binary_map(x):
    return x.map({'yes': 1, 'no': 0})

# Applying the function to the housing list
housing[varlist] = housing[varlist].apply(binary_map)

housing.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished


In [16]:
relevant_data = housing[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']]

# Train-Val-Split

n_samples = relevant_data.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([347, 484, 403, 177, 544,  85, 519,  99, 453, 336, 161, 430, 533,
         314, 356, 387,  83, 193, 459, 466,  67, 444, 223, 136, 442, 244,
         460, 373, 503, 111, 357, 286, 265, 433,   6,  14, 508,  53, 155,
         509, 239, 147, 152,  87, 441,  55, 514, 312, 413, 153, 362,  92,
         405, 375, 379, 238, 491, 401, 204,  94, 355, 243, 219, 287, 504,
         263, 435, 364, 132, 525, 293, 367, 392, 229, 167, 439, 335, 431,
         218, 192, 353, 269, 341,  44, 198,  47, 305, 393, 112,  43, 216,
         210, 418,  20, 450, 350, 321, 248,  96, 281,   0, 464, 162, 523,
         130, 486, 388, 174, 294, 190, 259, 487, 410,  18, 340,  16,   9,
         324, 201,  39,  84, 313, 205,  95, 468,  15, 261, 104, 398, 276,
          45, 382, 106, 165, 448,  13, 320,  71, 282, 488, 345, 187, 168,
         144,  66, 463, 148, 267, 184,  69, 203,  91, 157, 200,  88,  64,
         494, 513, 307, 447, 236, 528, 500, 426, 124,  17, 476, 512, 100,
         423, 268, 417, 252, 214, 179,

In [17]:
train_df = relevant_data.iloc[train_indices.numpy()]
val_df = relevant_data.iloc[val_indices.numpy()]

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_df)

scaled_train_np = scaler.transform(train_df)
scaled_val_np = scaler.transform(val_df)

scaled_train_t = torch.tensor(scaled_train_np, dtype = torch.float32)
scaled_val_t = torch.tensor(scaled_val_np, dtype = torch.float32)

In [18]:
X_train = scaled_train_t[:, 1:]
Y_train = scaled_train_t[:, :1]

X_val = scaled_val_t[:, 1:]
Y_val = scaled_val_t[:, :1]

print("Train Inputs Shape:", X_train.shape)
print("Train Outputs Shape:", Y_train.shape)
print("Validation Inputs Shape:", X_val.shape)
print("Validation Outputs Shape:", Y_val.shape)

Train Inputs Shape: torch.Size([436, 5])
Train Outputs Shape: torch.Size([436, 1])
Validation Inputs Shape: torch.Size([109, 5])
Validation Outputs Shape: torch.Size([109, 1])


In [19]:
def model(X, w, b):
    return X @ w + b

def loss_fn(Y_p, Y):
    squared_error = (Y_p - Y)**2
    return squared_error.mean()

def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)

        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)

        optimizer.zero_grad()
        train_loss.backward() # <2>
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
    return params

In [20]:
import torch.optim as optim
dir(optim)

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_adafactor',
 '_functional',
 'lr_scheduler',
 'swa_utils']

In [21]:
w = torch.ones(5, 1, requires_grad=True)
b = torch.ones(1, requires_grad=True)

params = [w, b]
learning_rate = 1e-1
optimizer = torch.optim.SGD(params, lr=learning_rate)

print("Training is starting...")
training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    train_t_u = X_train,
    val_t_u = X_val,
    train_t_c = Y_train,
    val_t_c = Y_val,
)
print("Training done.")

Training is starting...
Epoch 1, Training loss 7.1063, Validation loss 7.2966
Epoch 2, Training loss 3.2223, Validation loss 3.3478
Epoch 3, Training loss 1.6671, Validation loss 1.7395
Epoch 500, Training loss 0.4408, Validation loss 0.3754
Epoch 1000, Training loss 0.4408, Validation loss 0.3754
Epoch 1500, Training loss 0.4408, Validation loss 0.3754
Epoch 2000, Training loss 0.4408, Validation loss 0.3754
Epoch 2500, Training loss 0.4408, Validation loss 0.3754
Epoch 3000, Training loss 0.4408, Validation loss 0.3754
Epoch 3500, Training loss 0.4408, Validation loss 0.3754
Epoch 4000, Training loss 0.4408, Validation loss 0.3754
Epoch 4500, Training loss 0.4408, Validation loss 0.3754
Epoch 5000, Training loss 0.4408, Validation loss 0.3754
Training done.


In [22]:
w = torch.ones(5, 1, requires_grad=True)
b = torch.ones(1, requires_grad=True)
params = [w, b]

learning_rate = 1e-2
optimizer = torch.optim.SGD(params, lr=learning_rate)

print("Training is starting...")
training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    train_t_u = X_train,
    val_t_u = X_val,
    train_t_c = Y_train,
    val_t_c = Y_val,
)
print("Training done.")

Training is starting...
Epoch 1, Training loss 7.1063, Validation loss 7.2966
Epoch 2, Training loss 6.6382, Validation loss 6.8230
Epoch 3, Training loss 6.2041, Validation loss 6.3835
Epoch 500, Training loss 0.4408, Validation loss 0.3754
Epoch 1000, Training loss 0.4408, Validation loss 0.3754
Epoch 1500, Training loss 0.4408, Validation loss 0.3754
Epoch 2000, Training loss 0.4408, Validation loss 0.3754
Epoch 2500, Training loss 0.4408, Validation loss 0.3754
Epoch 3000, Training loss 0.4408, Validation loss 0.3754
Epoch 3500, Training loss 0.4408, Validation loss 0.3754
Epoch 4000, Training loss 0.4408, Validation loss 0.3754
Epoch 4500, Training loss 0.4408, Validation loss 0.3754
Epoch 5000, Training loss 0.4408, Validation loss 0.3754
Training done.


In [23]:
w = torch.ones(5, 1, requires_grad=True)
b = torch.ones(1, requires_grad=True)
params = [w, b]

learning_rate = 1e-3
optimizer = torch.optim.SGD(params, lr=learning_rate)

print("Training is starting...")
training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    train_t_u = X_train,
    val_t_u = X_val,
    train_t_c = Y_train,
    val_t_c = Y_val,
)
print("Training done.")

Training is starting...
Epoch 1, Training loss 7.1063, Validation loss 7.2966
Epoch 2, Training loss 7.0587, Validation loss 7.2484
Epoch 3, Training loss 7.0115, Validation loss 7.2006
Epoch 500, Training loss 0.7055, Validation loss 0.7077
Epoch 1000, Training loss 0.4647, Validation loss 0.4162
Epoch 1500, Training loss 0.4441, Validation loss 0.3835
Epoch 2000, Training loss 0.4413, Validation loss 0.3774
Epoch 2500, Training loss 0.4409, Validation loss 0.3760
Epoch 3000, Training loss 0.4408, Validation loss 0.3756
Epoch 3500, Training loss 0.4408, Validation loss 0.3755
Epoch 4000, Training loss 0.4408, Validation loss 0.3755
Epoch 4500, Training loss 0.4408, Validation loss 0.3754
Epoch 5000, Training loss 0.4408, Validation loss 0.3754
Training done.


In [24]:
w = torch.ones(5, 1, requires_grad=True)
b = torch.ones(1, requires_grad=True)
params = [w, b]

learning_rate = 1e-4
optimizer = torch.optim.SGD(params, lr=learning_rate)

print("Training is starting...")
training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    train_t_u = X_train,
    val_t_u = X_val,
    train_t_c = Y_train,
    val_t_c = Y_val,
)
print("Training done.")

Training is starting...
Epoch 1, Training loss 7.1063, Validation loss 7.2966
Epoch 2, Training loss 7.1016, Validation loss 7.2917
Epoch 3, Training loss 7.0968, Validation loss 7.2869
Epoch 500, Training loss 5.1163, Validation loss 5.2799
Epoch 1000, Training loss 3.7363, Validation loss 3.8741
Epoch 1500, Training loss 2.7783, Validation loss 2.8918
Epoch 2000, Training loss 2.1103, Validation loss 2.2016
Epoch 2500, Training loss 1.6423, Validation loss 1.7137
Epoch 3000, Training loss 1.3126, Validation loss 1.3663
Epoch 3500, Training loss 1.0789, Validation loss 1.1170
Epoch 4000, Training loss 0.9121, Validation loss 0.9365
Epoch 4500, Training loss 0.7922, Validation loss 0.8047
Epoch 5000, Training loss 0.7052, Validation loss 0.7074
Training done.
