In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Load the cleaned dataset from Lesson 3
# Note: Ensure the file name matches your previously saved file
df = pd.read_csv('clean_house_l5_dataset.csv') 

# Prepare Features & Target
# Target (y) is Price. Features (X) is everything else except Price and LogPrice
X = df.drop(columns=['Price', 'LogPrice'])
y = df['Price']

# Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

Training set size: 79
Testing set size: 20


In [2]:
def evaluate_model(name, actual, predicted):
    r2 = r2_score(actual, predicted)
    mae = mean_absolute_error(actual, predicted)
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    
    print(f"\n{name} Performance:")
    print(f"  R²   : {r2:.4f}")
    print(f"  MAE  : {mae:,.2f}")
    print(f"  MSE  : {mse:,.2f}")
    print(f"  RMSE : {rmse:,.2f}")

# Initialize and Train Models
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Get Predictions
lr_preds = lr_model.predict(X_test)
rf_preds = rf_model.predict(X_test)

# Run Evaluation
evaluate_model("Linear Regression", y_test, lr_preds)
evaluate_model("Random Forest", y_test, rf_preds)


Linear Regression Performance:
  R²   : 0.8478
  MAE  : 63,085.84
  MSE  : 5,718,940,940.60
  RMSE : 75,623.68

Random Forest Performance:
  R²   : 0.8594
  MAE  : 52,523.85
  MSE  : 5,283,317,454.95
  RMSE : 72,686.43


In [4]:
# Pick index 5 (or any index) from the test set
index = 5
sample_x = X_test.iloc[[index]]
actual_price = y_test.iloc[index]

lr_single_pred = lr_model.predict(sample_x)[0]
rf_single_pred = rf_model.predict(sample_x)[0]

print(f"- Sanity Check (Row {index}) -")
print(f"Actual Price      : {actual_price:,.2f}")
print(f"LR Predicted Price: {lr_single_pred:,.2f}")
print(f"RF Predicted Price: {rf_single_pred:,.2f}")

- Sanity Check (Row 5) -
Actual Price      : 419,200.00
LR Predicted Price: 411,139.22
RF Predicted Price: 297,368.00
