In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Generate synthetic regression data
X, y = make_regression(n_samples=500, n_features=10, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Initialize model with warm_start=True to allow tracking
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, warm_start=True, random_state=42)

# Track training and test loss
train_losses = []
test_losses = []

for i in range(1, 101):  # up to 100 trees
    gbr.n_estimators = i
    gbr.fit(X_train, y_train)

    # Predict and calculate MSE
    y_train_pred = gbr.predict(X_train)
    y_test_pred = gbr.predict(X_test)

    train_mse = mean_squared_error(y_train, y_train_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)

    train_losses.append(train_mse)
    test_losses.append(test_mse)

# Plot loss curves
plt.plot(train_losses, label="Train MSE")
plt.plot(test_losses, label="Test MSE")
plt.xlabel("Number of Trees (Boosting Iterations)")
plt.ylabel("Mean Squared Error")
plt.title("Gradient Boosting Loss Over Time")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


scikit-learn:

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
model.fit(X_train, y_train)

XGBoost:

In [None]:
from xgboost import XGBClassifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

a side-by-side comparison of XGBoost vs scikit-learn Gradient Boosting on the same dataset?