In [None]:
1. Import Libraries


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
2. Load Dataset



CSV_PATH = 'dataset/clean_house_l5_dataset.csv'
df = pd.read_csv(CSV_PATH)

print("Dataset Shape:", df.shape)
df.head(5)

3. Prepare Features and Target

X = df.drop(columns=['Price', 'LogPrice'])
y = df['Price']

4. Split Data

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

5. Helper Function for Evaluation

def print_metrics(name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{name} Performance:")
    print(f"  RÂ²   : {r2:.3f}")
    print(f"  MAE  : {mae:,.0f}")
    print(f"  MSE  : {mse:,.0f}")
    print(f"  RMSE : {rmse:,.0f}")

6. Train Models (LR & RF)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

7. Evaluate Both Models

print_metrics("Linear Regression", y_test, lr_pred)
print_metrics("Random Forest", y_test, rf_pred)

8. Single-row Sanity Check

i = 2 
x_one = X_test.iloc[[i]]
y_true = y_test.iloc[i]

p_lr = float(lr.predict(x_one)[0])
p_rf = float(rf.predict(x_one)[0])

print("\nSingle-row Sanity Check:")
print(f"  Actual Price : ${y_true:,.0f}")
print(f"  LR Pred      : ${p_lr:,.0f}")
print(f"  RF Pred      : ${p_rf:,.0f}")