Skip to content

Commit

Permalink
Fix deprecated use of xgboost early_stopping_rounds (#3306)
Browse files Browse the repository at this point in the history
* add test clustering

* run test only if xgboost is present

---------

Co-authored-by: connortann <71127464+connortann@users.noreply.github.com>
  • Loading branch information
CloseChoice and connortann committed Oct 5, 2023
1 parent 8f41a7e commit 5121282
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
8 changes: 4 additions & 4 deletions shap/utils/_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ def xgboost_distances_r2(X, y, learning_rate=0.6, early_stopping_rounds=2, subsa
test_preds = []
train_preds = []
for i in range(X.shape[1]):
model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1)
model.fit(X_train[:,i:i+1], y_train, eval_set=[(X_test[:,i:i+1], y_test)], early_stopping_rounds=early_stopping_rounds, verbose=False)
model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1, early_stopping_rounds=early_stopping_rounds)
model.fit(X_train[:,i:i+1], y_train, eval_set=[(X_test[:,i:i+1], y_test)], verbose=False)
train_preds.append(model.predict(X_train[:,i:i+1]))
test_preds.append(model.predict(X_test[:,i:i+1]))
train_preds = np.vstack(train_preds).T
Expand All @@ -132,8 +132,8 @@ def xgboost_distances_r2(X, y, learning_rate=0.6, early_stopping_rounds=2, subsa

# fit the model
else:
model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1)
model.fit(X_train[:,j:j+1], train_preds[:,i], eval_set=[(X_test[:,j:j+1], test_preds[:,i])], early_stopping_rounds=early_stopping_rounds, verbose=False)
model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1, early_stopping_rounds=early_stopping_rounds)
model.fit(X_train[:,j:j+1], train_preds[:,i], eval_set=[(X_test[:,j:j+1], test_preds[:,i])], verbose=False)
r2 = max(0, 1 - np.mean((test_preds[:,i] - model.predict(X_test[:,j:j+1]))**2) / preds_var)
dist[i,j] = 1 - r2

Expand Down
15 changes: 15 additions & 0 deletions tests/utils/test_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np
import pytest

from shap.utils import hclust


def test_hclust_runs():
# GH 3290
pytest.importorskip('xgboost')
X = np.column_stack((np.arange(1, 10), np.arange(100, 1000, step=100)))
y = np.where(X[:, 0] > 5, 1, 0)

clustered = hclust(X, y)
# just check if clustered ran successfully
assert isinstance(clustered, np.ndarray)

0 comments on commit 5121282

Please sign in to comment.