Skip to content

Commit

Permalink
MAINT: Minor simplifications and removal of duplication (#3567)
Browse files Browse the repository at this point in the history
* Remove redundant range 0 arg

* Remove repeated section

* Simplify
  • Loading branch information
connortann committed Apr 2, 2024
1 parent b1ec978 commit 70ef03b
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 52 deletions.
2 changes: 1 addition & 1 deletion shap/_explanation.py
Expand Up @@ -791,7 +791,7 @@ def _compute_shape(x):
for i in range(1, len(x)):
shape = _compute_shape(x[i])
assert len(shape) == len(first_shape), "Arrays in Explanation objects must have consistent inner dimensions!"
for j in range(0, len(shape)):
for j in range(len(shape)):
matches[j] &= shape[j] == first_shape[j]
return (len(x),) + tuple(first_shape[j] if match else None for j, match in enumerate(matches))

Expand Down
8 changes: 4 additions & 4 deletions shap/explainers/_kernel.py
Expand Up @@ -500,7 +500,7 @@ def not_equal(i, j):
def varying_groups(self, x):
if not scipy.sparse.issparse(x):
varying = np.zeros(self.data.groups_size)
for i in range(0, self.data.groups_size):
for i in range(self.data.groups_size):
inds = self.data.groups[i]
x_group = x[0, inds]
if scipy.sparse.issparse(x_group):
Expand All @@ -518,7 +518,7 @@ def varying_groups(self, x):
# if both background and evaluation are zero, the column does not vary
varying_indices = np.unique(np.union1d(self.data.data.nonzero()[1], x.nonzero()[1]))
remove_unvarying_indices = []
for i in range(0, len(varying_indices)):
for i in range(len(varying_indices)):
varying_index = varying_indices[i]
# now verify the nonzero values do vary
data_rows = self.data.data[:, [varying_index]]
Expand Down Expand Up @@ -556,7 +556,7 @@ def allocate(self):
last_indptr_idx = indptr[len(indptr) - 1]
indptr_wo_last = indptr[:-1]
new_indptrs = []
for i in range(0, self.nsamples - 1):
for i in range(self.nsamples - 1):
new_indptrs.append(indptr_wo_last + (i * last_indptr_idx))
new_indptrs.append(indptr + ((self.nsamples - 1) * last_indptr_idx))
new_indptr = np.concatenate(new_indptrs)
Expand Down Expand Up @@ -623,7 +623,7 @@ def run(self):
# find the expected value of each output
for i in range(self.nsamplesRun, self.nsamplesAdded):
eyVal = np.zeros(self.D)
for j in range(0, self.N):
for j in range(self.N):
eyVal += self.y[i * self.N + j, :] * self.data.weights[j]

self.ey[i, :] = eyVal
Expand Down
2 changes: 1 addition & 1 deletion shap/explainers/_sampling.py
Expand Up @@ -183,7 +183,7 @@ def sampling_estimate(self, j, f, x, X, nsamples=10):
X_masked = self.X_masked[:nsamples * 2,:]
inds = np.arange(X.shape[1])

for i in range(0, nsamples):
for i in range(nsamples):
np.random.shuffle(inds)
pos = np.where(inds == j)[0][0]
rind = np.random.randint(X.shape[0])
Expand Down
49 changes: 6 additions & 43 deletions shap/explainers/_tree.py
Expand Up @@ -794,19 +794,13 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
scaling = 1.0 / len(model.estimators_) # output is average of trees
self.trees = [IsoTree(e.tree_, f, scaling=scaling, data=data, data_missing=data_missing) for e, f in zip(model.detector_.estimators_, model.detector_.estimators_features_)]
self.tree_output = "raw_value"
elif safe_isinstance(model, "skopt.learning.forest.RandomForestRegressor"):
assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
self.input_dtype = np.float32
scaling = 1.0 / len(model.estimators_) # output is average of trees
self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
self.objective = objective_name_map.get(model.criterion, None)
self.tree_output = "raw_value"
elif safe_isinstance(
model,
[
"sklearn.ensemble.ExtraTreesRegressor",
"sklearn.ensemble.forest.ExtraTreesRegressor",
"skopt.learning.forest.RandomForestRegressor",
"skopt.learning.forest.ExtraTreesRegressor",
],
):
assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
Expand All @@ -816,14 +810,6 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
self.objective = objective_name_map.get(model.criterion, None)
self.tree_output = "raw_value"
elif safe_isinstance(model, "skopt.learning.forest.ExtraTreesRegressor"):
assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
self.input_dtype = np.float32
scaling = 1.0 / len(model.estimators_) # output is average of trees
self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
self.objective = objective_name_map.get(model.criterion, None)
self.tree_output = "raw_value"
elif safe_isinstance(
model,
[
Expand Down Expand Up @@ -852,6 +838,8 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
elif safe_isinstance(
model,
[
"sklearn.ensemble.ExtraTreesClassifier",
"sklearn.ensemble.forest.ExtraTreesClassifier",
"sklearn.ensemble.RandomForestClassifier",
"sklearn.ensemble.forest.RandomForestClassifier",
],
Expand All @@ -863,20 +851,6 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
self.objective = objective_name_map.get(model.criterion, None)
self.tree_output = "probability"
elif safe_isinstance(
model,
[
"sklearn.ensemble.ExtraTreesClassifier",
"sklearn.ensemble.forest.ExtraTreesClassifier",
],
): # TODO: add unit test for this case
assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
self.input_dtype = np.float32
scaling = 1.0 / len(model.estimators_) # output is average of trees
self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
self.objective = objective_name_map.get(model.criterion, None)
self.tree_output = "probability"
elif safe_isinstance(
model,
[
Expand Down Expand Up @@ -1085,18 +1059,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
# Some properties of the sklearn API are passed to a DMatrix object in
# xgboost We need to make sure we do the same here - GH #3313
self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
elif safe_isinstance(model, "xgboost.sklearn.XGBRegressor"):
self.original_model = model.get_booster()
self._set_xgboost_model_attributes(
data,
data_missing,
objective_name_map,
tree_output_name_map,
)
# Some properties of the sklearn API are passed to a DMatrix object in
# xgboost We need to make sure we do the same here - GH #3313
self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
elif safe_isinstance(model, "xgboost.sklearn.XGBRanker"):
elif safe_isinstance(model, ["xgboost.sklearn.XGBRegressor", "xgboost.sklearn.XGBRanker"]):
self.original_model = model.get_booster()
self._set_xgboost_model_attributes(
data,
Expand Down Expand Up @@ -1715,7 +1678,7 @@ def extract_data(node, tree):
node_sample_weight = np.zeros(m,dtype="float64")
values_lst = list(nodes_dict.values())
keys_lst = list(nodes_dict.keys())
for i in range(0,len(keys_lst)):
for i in range(len(keys_lst)):
value = values_lst[i]
key = keys_lst[i]
if "leaf" in value:
Expand Down
2 changes: 1 addition & 1 deletion shap/plots/_benchmark.py
Expand Up @@ -175,7 +175,7 @@ def benchmark(benchmark, show=True):
txt.set_path_effects([PathEffects.withStroke(linewidth=5, foreground='w')])

ax = plt.gca()
ax.set_yticks([1 - i / (len(methods) - 1) for i in range(0, len(methods))])
ax.set_yticks([1 - i / (len(methods) - 1) for i in range(len(methods))])
ax.set_yticklabels(methods, rotation=0, fontsize=11)

ax.set_xticks(np.arange(len(metrics) + 1))
Expand Down
4 changes: 2 additions & 2 deletions shap/utils/_masked_model.py
Expand Up @@ -372,7 +372,7 @@ def _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_position
# averaged_outs[0] = np.mean(linearizing_weights * link(last_outs))
# else:
# averaged_outs[0] = link(np.mean(last_outs))
for i in range(0, len(averaged_outs)):
for i in range(len(averaged_outs)):
if batch_positions[i] < batch_positions[i+1]:
if num_varying_rows[i] == sample_count:
last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]
Expand All @@ -390,7 +390,7 @@ def _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions
# here we can assume that the outputs will always be the same size, and we need
# to carry over evaluation outputs
sample_count = last_outs.shape[0]
for i in range(0, len(averaged_outs)):
for i in range(len(averaged_outs)):
if batch_positions[i] < batch_positions[i+1]:
if num_varying_rows[i] == sample_count:
last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]
Expand Down

0 comments on commit 70ef03b

Please sign in to comment.