In [1]:
from boosting import BoostRegressor
import numpy as np
import time
# ================ USAGE EXAMPLE ================
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(42)
    n_samples, n_features = 5000, 10
    X = np.random.randn(n_samples, n_features)
    y = np.sum(X[:, :3], axis=1) + 0.1 * np.random.randn(n_samples)
    
    # Split data
    split_idx = int(0.8 * n_samples)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    print("🔥 Comparing Tree Growth Strategies")
    print("=" * 50)
    
    # 1. Level-wise (original)
    start_time = time.time()
    
    model_level = BoostRegressor(
        n_estimators=50,
        learning_rate=0.1,
        adaptive_lr=False,
        lr_schedule="cosine_restart",  # Original approach
        max_depth=6,
        tree_learner="level",  # Original approach
        tree_method="binned",
        binned_mode="hist",
        verbose=True,
        batch_size=1,
        use_gpu=False,
        use_goss=True,
        use_neural=False,  # Original approach
        enable_interactions=False,
    )

    
    model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
    level_time = time.time() - start_time
    level_pred = model_level.predict(X_test)
    level_mse = np.mean((y_test - level_pred) ** 2)
    print(f"   Time: {level_time:.2f}s")
    print(f"   Test MSE: {level_mse:.6f}")
    print(f"   Trees: {len(model_level.trees)}")
    
    # print feature importances
    importances = model_level.feature_importances()
    print("   Feature Importances:", importances)
    # # plot feature importances like shap
    # import matplotlib.pyplot as plt
    # plt.bar(range(n_features), importances)
    # plt.xlabel("Feature")
    # plt.ylabel("Importance")
    # plt.title("Feature Importances")
    # plt.show()
    
    # # # # compare with scikit learn's GradientBoostingRegressor
    # from sklearn.ensemble import GradientBoostingRegressor
    # model_sklearn = GradientBoostingRegressor(
    #     n_estimators=50,
    #     learning_rate=0.1,
    #     max_depth=6,
    #     verbose=1,
    #     random_state=4
    # )
    # model_sklearn.fit(X_train, y_train)
    # sklearn_time = time.time() - start_time
    # sklearn_pred = model_sklearn.predict(X_test)
    # sklearn_mse = np.mean((y_test - sklearn_pred) ** 2)
    # print(f"   Scikit-learn Time: {sklearn_time:.2f}s")
    # print(f"   Scikit-learn Test MSE: {sklearn_mse:.6f}")
    # print(f"   Scikit-learn Trees: {len(model_sklearn.estimators_)}")




🔥 Comparing Tree Growth Strategies
🚀 Training with Level-wise trees (batch_size=1
   Objective: reg:squarederror, Loss: mse, Base score: -0.0095
   NODE layers disabled.
   GOSS: top_rate=0.2, other_rate=0.1
   DART: rate_drop=0.1, skip_drop=0.5, normalize_type=tree, one_drop=no
   Binning: binned (hist), 256 bins
   Training on 4000 samples with 10 features
[  10] Train: 0.565381, Val: 0.652812, Time: 0.23s
[  20] Train: 0.130102, Val: 0.175642, Time: 0.38s
[  30] Train: 0.040024, Val: 0.067765, Time: 0.48s
[  40] Train: 0.027387, Val: 0.049963, Time: 0.55s
[  50] Train: 0.023890, Val: 0.044788, Time: 0.62s
✅ Training completed in 0.62s, 50 trees
   Time: 0.62s
   Test MSE: 0.044788
   Trees: 50
   Feature Importances: [3.55186823e-01 3.24687004e-01 3.18762310e-01 1.21449909e-04
 2.07482414e-04 3.63795373e-04 9.07822997e-05 2.41901777e-04
 1.82462106e-04 1.55989074e-04]


Testing PIQP Multistage Solver
Problem size: 28 variables, 15 equality, 56 inequality constraints
Detected multistage structure: 3 stages

Structure detection: Enabled
Detected structure: {'detected': True, 'num_stages': 3, 'block_sizes': [7, 7, 7], 'arrow_size': 7, 'total_vars': 28}
Iter 0: r_prim=0.00e+00, r_eq=8.46e-01, r_ineq=4.00e+00, r_comp=9.90e-01, μ=1.00e-01


RuntimeError: linalg.solve_triangular: The input tensor B must have at least 2 dimensions.

In [2]:
from shap import *

BoostRegressor = add_shap_to_boostregressor(BoostRegressor)

model_level = BoostRegressor(
    n_estimators=50,
    learning_rate=0.1,
    max_depth=6,
    tree_learner="leaf",  # Original approach
    tree_method="hist",
    verbose=True,
    batch_size=1
)

model_level.fit(X_train, y_train, eval_set=(X_test, y_test))
level_time = time.time() - start_time
# Set background for proper expected value
model_level.set_shap_background(X_train[:100])  # Use sample of training data

# Compute SHAP values (should have much lower additivity errors)
shap_values = model_level.shap_values(X_test[:10], debug=True)

# Validate the fix
# Explain individual predictions
explanation = model_level.explain_prediction(X_test[0])
# Get feature importance
#importance = model_level.shap_feature_importance(X_test[:100])
#print("Feature Importance:", importance)

# Analyze model behavior
#shap_values = model_level.shap_values(X_test[:50])

🚀 Training with Leaf-wise trees (batch_size=1
   Objective: reg:squarederror, Loss: mse, Base score: -0.0095
   NODE layers disabled.
   GOSS: top_rate=0.2, other_rate=0.1
   DART: rate_drop=0.1, skip_drop=0.5, normalize_type=tree, one_drop=no
   Binning: hist (hist), 256 bins
   Training on 4000 samples with 10 features


ValueError: tree_method must be 'binned' or 'exact'

In [None]:
# Add SHAP to your model
from your_corrected_shap import add_shap_to_boostregressor

add_shap_to_boostregressor(BoostRegressor)

# Train model
model = BoostRegressor(n_estimators=100)
model.fit(X_train, y_train)



In [None]:
shap_values