In [3]:
import pandas as pd
df = pd.read_csv("USA_Housing.csv")

In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [7]:
# Features and target
X = df.drop("Price", axis=1).values
y = df["Price"].values.reshape(-1, 1)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:


# Step 2: Train-Validation-Test Split (56%-14%-30%)
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.44, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=30/44, random_state=42)

# Add bias column (intercept term)
X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_val_bias = np.c_[np.ones((X_val.shape[0], 1)), X_val]
X_test_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Step 3: Gradient Descent function
def gradient_descent(X, y, lr=0.01, iterations=1000):
    m, n = X.shape
    beta = np.zeros((n, 1))   # Initialize coefficients
    for i in range(iterations):
        gradients = (2/m) * X.T @ (X @ beta - y)
        beta = beta - lr * gradients
    return beta

# Step 4: Try different learning rates
learning_rates = [0.001, 0.01, 0.1, 1]
results = {}

for lr in learning_rates:
    beta = gradient_descent(X_train_bias, y_train, lr=lr, iterations=1000)

    # Predictions
    y_val_pred = X_val_bias @ beta
    y_test_pred = X_test_bias @ beta

    # R2 Scores
    val_r2 = r2_score(y_val, y_val_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    results[lr] = {"beta": beta, "val_r2": val_r2, "test_r2": test_r2}
    print(f"Learning Rate = {lr}: Validation R2 = {val_r2:.4f}, Test R2 = {test_r2:.4f}")

# Step 5: Find best learning rate
best_lr = max(results, key=lambda lr: results[lr]["val_r2"])
print("\nBest Learning Rate:", best_lr)
print("Best Validation R2:", results[best_lr]["val_r2"])
print("Corresponding Test R2:", results[best_lr]["test_r2"])


Learning Rate = 0.001: Validation R2 = 0.6451, Test R2 = 0.6537
Learning Rate = 0.01: Validation R2 = 0.9200, Test R2 = 0.9134
Learning Rate = 0.1: Validation R2 = 0.9200, Test R2 = 0.9134
Learning Rate = 1: Validation R2 = -inf, Test R2 = -inf

Best Learning Rate: 0.1
Best Validation R2: 0.9199649194854793
Corresponding Test R2: 0.9134494051887397


  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
