In [None]:
from boosting import BoostRegressor
import numpy as np
import time
# ================ USAGE EXAMPLE ================
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(42)
    n_samples, n_features = 5000, 10
    X = np.random.randn(n_samples, n_features)
    y = np.sum(X[:, :3], axis=1) + 0.1 * np.random.randn(n_samples)
    
    # Split data
    split_idx = int(0.8 * n_samples)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    print("🔥 Comparing Tree Growth Strategies")
    print("=" * 50)
    
    # 1. Level-wise (original)
    start_time = time.time()
    
    model_level = BoostRegressor(
        n_estimators=50,
        learning_rate=0.1,
        max_depth=6,
        tree_learner="level",  # Original approach
        tree_method="binned",
        binned_mode="hist",
        verbose=True,
        batch_size=1,
        use_gpu=False,
        adaptive_hist=False,
        use_goss=True,
        use_neural=False,  # Original approach
    )

    
    model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
    level_time = time.time() - start_time
    level_pred = model_level.predict(X_test)
    level_mse = np.mean((y_test - level_pred) ** 2)
    print(f"   Time: {level_time:.2f}s")
    print(f"   Test MSE: {level_mse:.6f}")
    print(f"   Trees: {len(model_level.trees)}")
    
    # print feature importances
    importances = model_level.feature_importances()
    print("   Feature Importances:", importances)
    
    # # # # compare with scikit learn's GradientBoostingRegressor
    # from sklearn.ensemble import GradientBoostingRegressor
    # model_sklearn = GradientBoostingRegressor(
    #     n_estimators=50,
    #     learning_rate=0.1,
    #     max_depth=6,
    #     verbose=1,
    #     random_state=4#
    # )
    # model_sklearn.fit(X_train, y_train)
    # sklearn_time = time.time() - start_time
    # sklearn_pred = model_sklearn.predict(X_test)
    # sklearn_mse = np.mean((y_test - sklearn_pred) ** 2)
    # print(f"   Scikit-learn Time: {sklearn_time:.2f}s")
    # print(f"   Scikit-learn Test MSE: {sklearn_mse:.6f}")
    # print(f"   Scikit-learn Trees: {len(model_sklearn.estimators_)}")
    



🔥 Comparing Tree Growth Strategies
🚀 Training with Level-wise trees (DART=on, GOSS=on), batch_size=1
[  10] Train: 0.615273, Val: 0.688379, Time: 0.38s
[  20] Train: 0.116879, Val: 0.155410, Time: 0.76s
[  30] Train: 0.044363, Val: 0.071079, Time: 1.08s
[  40] Train: 0.027127, Val: 0.048741, Time: 1.39s
[  50] Train: 0.034554, Val: 0.049874, Time: 1.70s
✅ Training completed in 1.70s, 50 trees
   Time: 1.70s
   Test MSE: 0.045364
   Trees: 50
   Feature Importances: [3.46952902e-01 3.38097412e-01 3.12128474e-01 4.71444334e-04
 3.84886108e-04 3.56998600e-04 4.50378622e-04 5.66395126e-04
 1.44903493e-04 4.46205560e-04]


In [2]:
from shap import *

BoostRegressor = add_shap_to_boostregressor(BoostRegressor)

model_level = BoostRegressor(
    n_estimators=50,
    learning_rate=0.1,
    max_depth=6,
    tree_learner="leaf",  # Original approach
    tree_method="hist",
    verbose=True,
    batch_size=1
)

model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
level_time = time.time() - start_time
# Set background for proper expected value
model_level.set_shap_background(X_train[:100])  # Use sample of training data

# Compute SHAP values (should have much lower additivity errors)
shap_values = model_level.shap_values(X_test[:10], debug=True)

# Validate the fix
# Explain individual predictions
explanation = model_level.explain_prediction(X_test[0])
# Get feature importance
#importance = model_level.shap_feature_importance(X_test[:100])
#print("Feature Importance:", importance)

# Analyze model behavior
#shap_values = model_level.shap_values(X_test[:50])

🚀 Training with Leaf-wise trees (DART=on, GOSS=on), batch_size=1
[  10] Train: 0.560007, Val: 0.635823, Time: 0.12s
[  20] Train: 0.131810, Val: 0.169781, Time: 0.25s
[  30] Train: 0.048517, Val: 0.074908, Time: 0.34s
[  40] Train: 0.031832, Val: 0.053086, Time: 0.42s
[  50] Train: 0.046573, Val: 0.068918, Time: 0.51s
✅ Training completed in 0.51s, 50 trees
[TreeSHAP] Computing SHAP for 10 samples, 10 features, 50 trees
  Sample 0: Adjusting SHAP sum from 0.31848894 to 0.43675774
  Sample 1: Adjusting SHAP sum from 1.29052109 to 1.40878989
  Sample 2: Adjusting SHAP sum from 1.59079794 to 1.70906674
  Sample 3: Adjusting SHAP sum from 0.28618006 to 0.40444887
  Sample 4: Adjusting SHAP sum from -0.07780282 to -0.09129796
  Sample 5: Adjusting SHAP sum from 2.88544908 to 3.00371789
  Sample 6: Adjusting SHAP sum from -1.20012455 to -1.14608999
  Sample 7: Adjusting SHAP sum from -1.71232373 to -1.61104609
  Sample 8: Adjusting SHAP sum from -2.47165796 to -2.42312511
  Sample 9: Adjusti

In [3]:
# Add SHAP to your model
from your_corrected_shap import add_shap_to_boostregressor

add_shap_to_boostregressor(BoostRegressor)

# Train model
model = BoostRegressor(n_estimators=100)
model.fit(X_train, y_train)



ModuleNotFoundError: No module named 'your_corrected_shap'

In [None]:
shap_values