# Import required libraries

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.datasets import fetch_openml
from bartpy.sklearnmodel import SklearnModel as BART


In [29]:
# Load Boston Housing dataset (available via OpenML)
boston = fetch_openml(name='boston', version=1, as_frame=True)
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target


In [31]:
# Split into training (70%) and test (30%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [32]:
# Preprocessing: Scale features only for Linear Regression
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Bagging": BaggingRegressor(n_estimators=100, random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, max_features='sqrt', random_state=42),
    "Boosting (GBM)": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "BART": BART(n_trees=50, n_chains=3, n_samples=100) }


In [None]:
# Train models and evaluate performance
results = {}
for name, model in models.items():
    if name == "Linear Regression":
        # Use scaled data for linear regression
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        # Use original data for tree-based methods
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    results[name] = rmse


In [12]:
# Display results
results_df = pd.DataFrame(results.items(), columns=["Method", "Test RMSE"])
print(results_df.sort_values(by="Test RMSE"))


Empty DataFrame
Columns: [Method, Test RMSE]
Index: []
