<a href="https://colab.research.google.com/github/ssarker21/Introduction-to-Machine-Learning/blob/main/Homework5_problem2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import torch
import torch.optim as optim

# -------------------------
# 0) Load dataset
# -------------------------
url = "https://github.com/HamedTabkhi/Intro-to-ML/raw/main/Dataset/Housing.csv"
df = pd.read_csv(url)

# Inputs (X1..X5) and target
cols_X = ["area", "bedrooms", "bathrooms", "stories", "parking"]
col_y  = "price"

X_full = torch.tensor(df[cols_X].values, dtype=torch.float32)
y_full = torch.tensor(df[col_y].values,  dtype=torch.float32)

# -------------------------
# 1) Split (80/20) exactly like slides pattern using randperm
# -------------------------
n_samples = X_full.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)
train_indices = shuffled_indices[:-n_val]
val_indices   = shuffled_indices[-n_val:]

train_X = X_full[train_indices]
train_y = y_full[train_indices]

val_X = X_full[val_indices]
val_y = y_full[val_indices]

# -------------------------
# 2) Preprocessing (z-score only for input)
# -------------------------
X_mu  = train_X.mean(dim=0)
X_std = train_X.std(dim=0) + 1e-8      # avoid divide-by-zero

train_Xs = (train_X - X_mu) / X_std
val_Xs   = (val_X   - X_mu) / X_std

# Keep outputs (target) in original units
train_ys = train_y
val_ys   = val_y

# -------------------------
# 3) Model & Loss
# -------------------------
def model(t_u, w5, w4, w3, w2, w1, b):
    # t_u columns: [area, bedrooms, bathrooms, stories, parking]
    return (
        w5 * t_u[:, 4] +   # parking  (X5)
        w4 * t_u[:, 3] +   # stories  (X4)
        w3 * t_u[:, 2] +   # bathrooms(X3)
        w2 * t_u[:, 1] +   # bedrooms (X2)
        w1 * t_u[:, 0] +   # area     (X1)
        b
    )

def loss_fn(t_p, t_c):
    return ((t_p - t_c) ** 2).mean()

def r2_score(y_true, y_pred):
    ss_res = ((y_true - y_pred)**2).sum()
    ss_tot = ((y_true - y_true.mean())**2).sum()
    return float(1.0 - ss_res / (ss_tot + 1e-12))

# -------------------------
# 4) Training loop
# -------------------------
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        # forward on train
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        # forward on validation (no backward)
        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            val_r2   = r2_score(val_t_c, val_t_p)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(
                f"Epoch {epoch}, "
                f"Validation loss {val_loss.item():.4f}, "
            )

    return params

# -------------------------
# 5) Four trainings (lr: 0.1, 0.01, 0.001, 0.0001)
# -------------------------
lrs = [1e-1, 1e-2, 1e-3, 1e-4]
results = {}

for lr in lrs:
    print(f"\n=== Training with lr={lr} ===")
    params = torch.zeros(6, requires_grad=True)  # [w5, w4, w3, w2, w1, b]
    opt = optim.SGD([params], lr=lr)

    final_params = training_loop(
        n_epochs = 5000,
        optimizer = opt,
        params = params,
        train_t_u = train_Xs,
        val_t_u   = val_Xs,
        train_t_c = train_ys,
        val_t_c   = val_ys
    )

    with torch.no_grad():
        val_pred = model(val_Xs, *final_params)
        final_val_loss = float(loss_fn(val_pred, val_ys))
        final_val_r2   = r2_score(val_ys, val_pred)

    results[lr] = {
        "params": final_params.detach().clone(),
        "val_loss": final_val_loss,
    }
    print(f"Final (lr={lr}) -> Val loss: {final_val_loss:.6f}, Val R^2: {final_val_r2:.6f}")

# -------------------------
# 6) Pick best model by lowest validation loss
# -------------------------
best_lr = min(results, key=lambda k: results[k]["val_loss"])
best = results[best_lr]
w5, w4, w3, w2, w1, b = best["params"]

print("\n================== Best Linear Model ==================")
print(f"Chosen lr: {best_lr}")
print(f"Best Val loss: {best['val_loss']:.6f}")
print(f"Best Val R^2 : {final_val_r2:.6f}")
print("\nParameters in ORIGINAL UNITS (already scaled properly):")
print(f"W1 (area)      = {float(w1):.6f}")
print(f"W2 (bedrooms)  = {float(w2):.6f}")
print(f"W3 (bathrooms) = {float(w3):.6f}")
print(f"W4 (stories)   = {float(w4):.6f}")
print(f"W5 (parking)   = {float(w5):.6f}")
print(f"B (bias)       = {float(b):.6f}")


=== Training with lr=0.1 ===
Epoch 1, Validation loss 25051734736896.0000, 
Epoch 2, Validation loss 15890309447680.0000, 
Epoch 3, Validation loss 10393048055808.0000, 
Epoch 500, Validation loss 1584917774336.0000, 
Epoch 1000, Validation loss 1584917774336.0000, 
Epoch 1500, Validation loss 1584917774336.0000, 
Epoch 2000, Validation loss 1584917774336.0000, 
Epoch 2500, Validation loss 1584917774336.0000, 
Epoch 3000, Validation loss 1584917774336.0000, 
Epoch 3500, Validation loss 1584917774336.0000, 
Epoch 4000, Validation loss 1584917774336.0000, 
Epoch 4500, Validation loss 1584917774336.0000, 
Epoch 5000, Validation loss 1584917774336.0000, 
Final (lr=0.1) -> Val loss: 1584917774336.000000, Val R^2: 0.416596

=== Training with lr=0.01 ===
Epoch 1, Validation loss 25051734736896.0000, 
Epoch 2, Validation loss 24026042859520.0000, 
Epoch 3, Validation loss 23046916145152.0000, 
Epoch 500, Validation loss 1584905977856.0000, 
Epoch 1000, Validation loss 1584915283968.0000, 
Epo