In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

df = pd.read_csv("Usa Housing Dataset.csv")
df = df.drop(['date', 'street', 'country'], axis=1)

le = LabelEncoder()
df['city'] = le.fit_transform(df['city'])
df['statezip'] = le.fit_transform(df['statezip'])

X = df.drop('price', axis=1)
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor(random_state=42)
meta_model = LinearRegression()

stack = StackingRegressor(estimators=[('rf', rf), ('xgb', xgb)], final_estimator=meta_model)

rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)
stack.fit(X_train, y_train)

rf_pred = rf.predict(X_test)
xgb_pred = xgb.predict(X_test)
stack_pred = stack.predict(X_test)

def get_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return [mse, rmse, mae, r2]

rf_scores = get_metrics(y_test, rf_pred)
xgb_scores = get_metrics(y_test, xgb_pred)
stack_scores = get_metrics(y_test, stack_pred)

results_df = pd.DataFrame([rf_scores, xgb_scores, stack_scores], 
                          columns=["MSE", "RMSE", "MAE", "R² Score"],
                          index=["Random Forest", "XGBoost", "Stacked Model"])

print(results_df)

                        MSE           RMSE            MAE  R² Score
Random Forest  6.099665e+10  246975.003584  133137.746003  0.418168
XGBoost        1.829371e+11  427711.518661  135991.142701 -0.744993
Stacked Model  6.214739e+10  249293.791676  163187.102281  0.407191
