In [19]:
# ===============================
# House Price Prediction (assignment-5 )
# - Linear Regression & Random Forest
# - Using Cleaned Dataset
# ===============================


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


In [22]:
# 1) Load Dataset
CSV_PATH = "clean_house_l5_dataset.csv" 
df = pd.read_csv(CSV_PATH)
print(" Dataset Loaded! Shape:", df.shape)


 Dataset Loaded! Shape: (99, 13)


In [23]:
# 2) Prepare Features (X) and Target (y)
X = df.drop(columns=["Price", "LogPrice"])
y = df["Price"]



In [32]:

# 3) Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# 4) Helper to print metrics nicely
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    print(f"\n{name} Performance:")
    print(f"  R²   : {r2:.3f}")
    print(f"  MAE  : {mae:,.0f}")
    print(f"  MSE  : {mse:,.0f}")
    print(f"  RMSE : {rmse:,.0f}")
    return y_pred


In [None]:
# 5) Train Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_preds = evaluate_model("Linear Regression", lr, X_test, y_test)



Linear Regression Performance:
  R²   : 0.848
  MAE  : 63,086
  MSE  : 5,718,940,941
  RMSE : 75,624


In [39]:
# 6) Train Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_preds = evaluate_model("Random Forest", rf, X_test, y_test)



Random Forest Performance:
  R²   : 0.859
  MAE  : 52,524
  MSE  : 5,283,317,455
  RMSE : 72,686


In [None]:
# 7) Compare Rows Sanity Check
i = 3 
row = X_test.iloc[[i]]
actual_price = y_test.iloc[i]
lr_price = lr.predict(row)[0]
rf_price = rf.predict(row)[0]

print("\nSingle-row sanity check:")
print(f"  Actual Price: ${actual_price:,.0f}")
print(f"  LR Pred     : ${lr_price:,.0f}")
print(f"  RF Pred     : ${rf_price:,.0f}")


Single-row sanity check:
  Actual Price: $554,800
  LR Pred     : $594,041
  RF Pred     : $557,028


In [None]:
# ) Compare Rows Sanity Check
i = 7  
row = X_test.iloc[[i]]
actual_price = y_test.iloc[i]
lr_price = lr.predict(row)[0]
rf_price = rf.predict(row)[0]

print("\nSingle-row sanity check:")
print(f"  Actual Price: ${actual_price:,.0f}")
print(f"  LR Pred     : ${lr_price:,.0f}")
print(f"  RF Pred     : ${rf_price:,.0f}")


Single-row sanity check:
  Actual Price: $743,700
  LR Pred     : $727,107
  RF Pred     : $724,944


In [None]:
# ) Single Row Sanity Check
i = 10 
row = X_test.iloc[[i]]
actual_price = y_test.iloc[i]
lr_price = lr.predict(row)[0]
rf_price = rf.predict(row)[0]

print("\nSingle-row sanity check:")
print(f"  Actual Price: ${actual_price:,.0f}")
print(f"  LR Pred     : ${lr_price:,.0f}")
print(f"  RF Pred     : ${rf_price:,.0f}")


Single-row sanity check:
  Actual Price: $366,000
  LR Pred     : $299,971
  RF Pred     : $311,571


In [None]:
# ) Single Row Sanity Check
i = 15 
row = X_test.iloc[[i]]
actual_price = y_test.iloc[i]
lr_price = lr.predict(row)[0]
rf_price = rf.predict(row)[0]

print("\nSingle-row sanity check:")
print(f"  Actual Price: ${actual_price:,.0f}")
print(f"  LR Pred     : ${lr_price:,.0f}")
print(f"  RF Pred     : ${rf_price:,.0f}")


Single-row sanity check:
  Actual Price: $806,000
  LR Pred     : $747,846
  RF Pred     : $779,024
