In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb

In [2]:
# Load dataset
diabetes = load_diabetes(as_frame=True)
X, y = diabetes.data, diabetes.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [3]:
# Models
models = {
    "Random Forest": RandomForestRegressor(n_estimators=200, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=3, random_state=42),
    "XGBoost": xgb.XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=4, random_state=42)
}

In [4]:
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results.append({
        "Model": name,
        "R2": r2_score(y_test, preds),
        "MSE": mean_squared_error(y_test, preds)
    })

In [5]:
df_results = pd.DataFrame(results)
print(df_results)

               Model        R2          MSE
0      Random Forest  0.463272  2967.936805
1  Gradient Boosting  0.436688  3114.938568
2            XGBoost  0.372197  3471.556970
