In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

# Dataset de regressão
X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos base
rf = RandomForestRegressor(n_estimators=100, random_state=42)
gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
lr = LinearRegression()

# ----- Voting Regressor -----
voting_reg = VotingRegressor(estimators=[("rf", rf), ("gb", gb), ("lr", lr)])
voting_reg.fit(X_train, y_train)
y_pred_vote = voting_reg.predict(X_test)
print("Voting RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_vote)))

# ----- Stacking Regressor -----
stack_reg = StackingRegressor(
    estimators=[("rf", rf), ("gb", gb),],
    final_estimator=lr
)
stack_reg.fit(X_train, y_train)
y_pred_stack = stack_reg.predict(X_test)
print("Stacking RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_stack)))

Voting RMSE: 0.5556013345212151
Stacking RMSE: 0.5035326908553531


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor

# Baseline (sempre predizer a média)
dummy = DummyRegressor(strategy="mean")
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict(X_test)

# Ensemble (Voting)
y_pred_vote = voting_reg.predict(X_test)

# ----- RMSE -----
rmse_model = np.sqrt(mean_squared_error(y_test, y_pred_vote))
rmse_dummy = np.sqrt(mean_squared_error(y_test, y_pred_dummy))

# ----- R² -----
r2_model = r2_score(y_test, y_pred_vote)
r2_dummy = r2_score(y_test, y_pred_dummy)

print("Voting RMSE:", rmse_model)
print("Baseline RMSE:", rmse_dummy)
print("Voting R²:", r2_model)
print("Baseline R²:", r2_dummy)

if rmse_model < rmse_dummy:
    print("✅ O ensemble tem erro menor que o baseline")
else:
    print("⚠️ O ensemble não superou o baseline em termos de RMSE")

if r2_model > r2_dummy:
    print("✅ O ensemble explica mais variância que o baseline")
else:
    print("⚠️ O ensemble não melhora em relação ao baseline em R²")

Voting RMSE: 0.5556013345212151
Baseline RMSE: 1.1448563543099792
Voting R²: 0.764430152687029
Baseline R²: -0.00021908714592466794
✅ O ensemble tem erro menor que o baseline
✅ O ensemble explica mais variância que o baseline
