# Comparing From-Scratch XGBoost with Standard Libraries

This notebook demonstrates the usage of our custom `advanced_xgboost` implementation and compares its performance against `xgboost.XGBRegressor` and `sklearn.ensemble.GradientBoostingRegressor` on a synthetic regression dataset.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

# Import our model
from advanced_xgboost import XGBoost

# Import library models for comparison
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

sns.set_style("whitegrid")

## 1. Generate Data

In [None]:
X, y = make_regression(
    n_samples=1000,
    n_features=10,
    noise=30,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Train and Evaluate Models

In [None]:
# Model Parameters
params = {
    'n_estimators': 100,
    'learning_rate': 0.1,
    'max_depth': 3,
}

# --- Our Custom XGBoost ---
xgb_custom = XGBoost(
    n_estimators=params['n_estimators'],
    learning_rate=params['learning_rate'],
    max_depth=params['max_depth'],
    min_child_weight=1,
    lambda_reg=1.0
)
xgb_custom.fit(X_train, y_train)
y_pred_custom = xgb_custom.predict(X_test)
mse_custom = mean_squared_error(y_test, y_pred_custom)

# --- Official XGBoost Library ---
xgb_official = XGBRegressor(
    n_estimators=params['n_estimators'],
    learning_rate=params['learning_rate'],
    max_depth=params['max_depth'],
    reg_lambda=1.0,
    objective='reg:squarederror',
    random_state=42
)
xgb_official.fit(X_train, y_train)
y_pred_official = xgb_official.predict(X_test)
mse_official = mean_squared_error(y_test, y_pred_official)

# --- Scikit-learn Gradient Boosting ---
gbr_sklearn = GradientBoostingRegressor(
    n_estimators=params['n_estimators'],
    learning_rate=params['learning_rate'],
    max_depth=params['max_depth'],
    random_state=42
)
gbr_sklearn.fit(X_train, y_train)
y_pred_sklearn = gbr_sklearn.predict(X_test)
mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)

print(f"Custom XGBoost MSE:       {mse_custom:.4f}")
print(f"Official XGBoost MSE:     {mse_official:.4f}")
print(f"Scikit-learn GBR MSE:   {mse_sklearn:.4f}")

## 3. Visualize Predictions

In [None]:
plt.figure(figsize=(14, 7))

plt.subplot(1, 2, 1)
plt.scatter(y_test, y_pred_custom, alpha=0.5, label=f'MSE: {mse_custom:.2f}')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red', lw=2)
plt.title('Custom XGBoost: Predictions vs Actual')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.scatter(y_test, y_pred_official, alpha=0.5, label=f'MSE: {mse_official:.2f}', color='green')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red', lw=2)
plt.title('Official XGBoost: Predictions vs Actual')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()