In [1]:
from boosting import BoostRegressor
import numpy as np
import time
# ================ USAGE EXAMPLE ================
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(42)
    n_samples, n_features = 5000, 10
    X = np.random.randn(n_samples, n_features)
    y = np.sum(X[:, :3], axis=1) + 0.1 * np.random.randn(n_samples)
    
    # Split data
    split_idx = int(0.8 * n_samples)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    print("🔥 Comparing Tree Growth Strategies")
    print("=" * 50)
    
    # 1. Level-wise (original)
    print("\n1️⃣ Level-wise Tree Growth:")
    start_time = time.time()
    
    model_level = BoostRegressor(
        n_estimators=50,
        learning_rate=0.1,
        max_depth=6,
        tree_learner="leaf",  # Original approach
        tree_method="hist",
        verbose=True,
        batch_size=1
    )

    # # compare with scikit learn's GradientBoostingRegressor
    # from sklearn.ensemble import GradientBoostingRegressor
    # model_sklearn = GradientBoostingRegressor(
    #     n_estimators=50,
    #     learning_rate=0.1,
    #     max_depth=6,
    #     verbose=1,
    #     random_state=42
    # )
    # model_sklearn.fit(X_train, y_train)
    # sklearn_time = time.time() - start_time
    # sklearn_pred = model_sklearn.predict(X_test)
    # sklearn_mse = np.mean((y_test - sklearn_pred) ** 2)
    # print(f"   Scikit-learn Time: {sklearn_time:.2f}s")
    # print(f"   Scikit-learn Test MSE: {sklearn_mse:.6f}")
    # print(f"   Scikit-learn Trees: {len(model_sklearn.estimators_)}")
    
    model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
    level_time = time.time() - start_time
    level_pred = model_level.predict(X_test)
    level_mse = np.mean((y_test - level_pred) ** 2)
    
    print(f"   Time: {level_time:.2f}s")
    print(f"   Test MSE: {level_mse:.6f}")
    print(f"   Trees: {len(model_level.trees)}")
    

🔥 Comparing Tree Growth Strategies

1️⃣ Level-wise Tree Growth:
🚀 Training with Leaf-wise trees (DART=on, GOSS=on), batch_size=1
[  10] Train: 0.644340, Val: 0.721932, Time: 1.83s
[  20] Train: 0.141160, Val: 0.183251, Time: 1.86s
[  30] Train: 0.047511, Val: 0.077846, Time: 1.89s
[  40] Train: 0.088175, Val: 0.115822, Time: 1.93s
[  50] Train: 0.026139, Val: 0.047356, Time: 1.95s
✅ Training completed in 1.95s, 50 trees
   Time: 2.03s
   Test MSE: 0.047356
   Trees: 50


In [2]:
from shap import *

BoostRegressor = add_shap_to_boostregressor(BoostRegressor)

model_level = BoostRegressor(
    n_estimators=50,
    learning_rate=0.1,
    max_depth=6,
    tree_learner="leaf",  # Original approach
    tree_method="hist",
    verbose=True,
    batch_size=1
)

model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
level_time = time.time() - start_time
# Set background for proper expected value
model_level.set_shap_background(X_train[:100])  # Use sample of training data

# Compute SHAP values (should have much lower additivity errors)
shap_values = model_level.shap_values(X_test[:10], debug=True)

# Validate the fix
# Explain individual predictions
explanation = model_level.explain_prediction(X_test[0])
# Get feature importance
#importance = model_level.shap_feature_importance(X_test[:100])
#print("Feature Importance:", importance)

# Analyze model behavior
#shap_values = model_level.shap_values(X_test[:50])

🚀 Training with Leaf-wise trees (DART=on, GOSS=on), batch_size=1
[  10] Train: 0.587451, Val: 0.662892, Time: 0.03s
[  20] Train: 0.125221, Val: 0.170599, Time: 0.07s
[  30] Train: 0.046902, Val: 0.075848, Time: 0.09s
[  40] Train: 0.029554, Val: 0.052284, Time: 0.11s
[  50] Train: 0.026333, Val: 0.045630, Time: 0.14s
✅ Training completed in 0.14s, 50 trees
[TreeSHAP] Computing SHAP for 10 samples, 10 features, 50 trees
  Sample 0: Adjusting SHAP sum from 0.47145656 to 0.49913301
  Sample 1: Adjusting SHAP sum from 1.37536788 to 1.40304432
  Sample 2: Adjusting SHAP sum from 1.54892413 to 1.57660058
  Sample 3: Adjusting SHAP sum from 0.16006203 to 0.18773847
  Sample 4: Adjusting SHAP sum from -0.12725600 to -0.09957956
  Sample 5: Adjusting SHAP sum from 2.99801349 to 3.02568993
  Sample 6: Adjusting SHAP sum from -0.89579542 to -0.86811897
  Sample 7: Adjusting SHAP sum from -1.93003967 to -1.90236323
  Sample 8: Adjusting SHAP sum from -2.29444136 to -2.26676491
  Sample 9: Adjusti

In [None]:
# Add SHAP to your model
from your_corrected_shap import add_shap_to_boostregressor

add_shap_to_boostregressor(BoostRegressor)

# Train model
model = BoostRegressor(n_estimators=100)
model.fit(X_train, y_train)



In [None]:
shap_values

array([[ 1.91107196e-01,  4.20794623e-04, -8.77049296e-03, ...,
        -4.86752828e-08,  5.19424793e-12, -2.25141219e-10],
       [ 5.39337510e-01,  4.20436384e-04,  3.54941428e-02, ...,
         6.65237358e-27,  0.00000000e+00, -5.44576677e-25],
       [ 1.04287985e-02,  2.56604225e-01,  7.41113806e-02, ...,
        -4.26782522e-10,  0.00000000e+00,  2.08622529e-08],
       ...,
       [ 6.47171653e-03,  4.69619953e-02,  1.41931347e-03, ...,
        -4.91279500e-08,  5.19424793e-12,  2.06371118e-08],
       [ 2.42425815e-01, -1.79334008e-02, -1.95592688e-02, ...,
        -4.86752828e-08,  5.19424793e-12,  2.08622529e-08],
       [-1.37218747e-01, -2.01557929e-01, -2.90648754e-01, ...,
        -4.86752828e-08,  5.19424793e-12, -1.52744722e-08]])