MAINT: Minor simplifications and removal of duplication (#3567)

* Remove redundant range 0 arg * Remove repeated section * Simplify
shap · Apr 2, 2024 · 70ef03b · 70ef03b
1 parent b1ec978
commit 70ef03b
Show file tree

Hide file tree

Showing 6 changed files with 15 additions and 52 deletions.
diff --git a/shap/_explanation.py b/shap/_explanation.py
@@ -791,7 +791,7 @@ def _compute_shape(x):
                 for i in range(1, len(x)):
                     shape = _compute_shape(x[i])
                     assert len(shape) == len(first_shape), "Arrays in Explanation objects must have consistent inner dimensions!"
-                    for j in range(0, len(shape)):
+                    for j in range(len(shape)):
                         matches[j] &= shape[j] == first_shape[j]
                 return (len(x),) + tuple(first_shape[j] if match else None for j, match in enumerate(matches))
 

diff --git a/shap/explainers/_kernel.py b/shap/explainers/_kernel.py
@@ -500,7 +500,7 @@ def not_equal(i, j):
     def varying_groups(self, x):
         if not scipy.sparse.issparse(x):
             varying = np.zeros(self.data.groups_size)
-            for i in range(0, self.data.groups_size):
+            for i in range(self.data.groups_size):
                 inds = self.data.groups[i]
                 x_group = x[0, inds]
                 if scipy.sparse.issparse(x_group):
@@ -518,7 +518,7 @@ def varying_groups(self, x):
             # if both background and evaluation are zero, the column does not vary
             varying_indices = np.unique(np.union1d(self.data.data.nonzero()[1], x.nonzero()[1]))
             remove_unvarying_indices = []
-            for i in range(0, len(varying_indices)):
+            for i in range(len(varying_indices)):
                 varying_index = varying_indices[i]
                 # now verify the nonzero values do vary
                 data_rows = self.data.data[:, [varying_index]]
@@ -556,7 +556,7 @@ def allocate(self):
                 last_indptr_idx = indptr[len(indptr) - 1]
                 indptr_wo_last = indptr[:-1]
                 new_indptrs = []
-                for i in range(0, self.nsamples - 1):
+                for i in range(self.nsamples - 1):
                     new_indptrs.append(indptr_wo_last + (i * last_indptr_idx))
                 new_indptrs.append(indptr + ((self.nsamples - 1) * last_indptr_idx))
                 new_indptr = np.concatenate(new_indptrs)
@@ -623,7 +623,7 @@ def run(self):
         # find the expected value of each output
         for i in range(self.nsamplesRun, self.nsamplesAdded):
             eyVal = np.zeros(self.D)
-            for j in range(0, self.N):
+            for j in range(self.N):
                 eyVal += self.y[i * self.N + j, :] * self.data.weights[j]
 
             self.ey[i, :] = eyVal

diff --git a/shap/explainers/_sampling.py b/shap/explainers/_sampling.py
@@ -183,7 +183,7 @@ def sampling_estimate(self, j, f, x, X, nsamples=10):
         X_masked = self.X_masked[:nsamples * 2,:]
         inds = np.arange(X.shape[1])
 
-        for i in range(0, nsamples):
+        for i in range(nsamples):
             np.random.shuffle(inds)
             pos = np.where(inds == j)[0][0]
             rind = np.random.randint(X.shape[0])

diff --git a/shap/explainers/_tree.py b/shap/explainers/_tree.py
@@ -794,19 +794,13 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             scaling = 1.0 / len(model.estimators_) # output is average of trees
             self.trees = [IsoTree(e.tree_, f, scaling=scaling, data=data, data_missing=data_missing) for e, f in zip(model.detector_.estimators_, model.detector_.estimators_features_)]
             self.tree_output = "raw_value"
-        elif safe_isinstance(model, "skopt.learning.forest.RandomForestRegressor"):
-            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
-            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
-            self.input_dtype = np.float32
-            scaling = 1.0 / len(model.estimators_) # output is average of trees
-            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
-            self.objective = objective_name_map.get(model.criterion, None)
-            self.tree_output = "raw_value"
         elif safe_isinstance(
             model,
             [
                 "sklearn.ensemble.ExtraTreesRegressor",
                 "sklearn.ensemble.forest.ExtraTreesRegressor",
+                "skopt.learning.forest.RandomForestRegressor",
+                "skopt.learning.forest.ExtraTreesRegressor",
             ],
         ):
             assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
@@ -816,14 +810,6 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
             self.objective = objective_name_map.get(model.criterion, None)
             self.tree_output = "raw_value"
-        elif safe_isinstance(model, "skopt.learning.forest.ExtraTreesRegressor"):
-            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
-            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
-            self.input_dtype = np.float32
-            scaling = 1.0 / len(model.estimators_) # output is average of trees
-            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
-            self.objective = objective_name_map.get(model.criterion, None)
-            self.tree_output = "raw_value"
         elif safe_isinstance(
             model,
             [
@@ -852,6 +838,8 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
         elif safe_isinstance(
             model,
             [
+                "sklearn.ensemble.ExtraTreesClassifier",
+                "sklearn.ensemble.forest.ExtraTreesClassifier",
                 "sklearn.ensemble.RandomForestClassifier",
                 "sklearn.ensemble.forest.RandomForestClassifier",
             ],
@@ -863,20 +851,6 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
             self.objective = objective_name_map.get(model.criterion, None)
             self.tree_output = "probability"
-        elif safe_isinstance(
-            model,
-            [
-                "sklearn.ensemble.ExtraTreesClassifier",
-                "sklearn.ensemble.forest.ExtraTreesClassifier",
-            ],
-        ): # TODO: add unit test for this case
-            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
-            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
-            self.input_dtype = np.float32
-            scaling = 1.0 / len(model.estimators_) # output is average of trees
-            self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
-            self.objective = objective_name_map.get(model.criterion, None)
-            self.tree_output = "probability"
         elif safe_isinstance(
             model,
             [
@@ -1085,18 +1059,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             # Some properties of the sklearn API are passed to a DMatrix object in
             # xgboost We need to make sure we do the same here - GH #3313
             self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
-        elif safe_isinstance(model, "xgboost.sklearn.XGBRegressor"):
-            self.original_model = model.get_booster()
-            self._set_xgboost_model_attributes(
-                data,
-                data_missing,
-                objective_name_map,
-                tree_output_name_map,
-            )
-            # Some properties of the sklearn API are passed to a DMatrix object in
-            # xgboost We need to make sure we do the same here - GH #3313
-            self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
-        elif safe_isinstance(model, "xgboost.sklearn.XGBRanker"):
+        elif safe_isinstance(model, ["xgboost.sklearn.XGBRegressor", "xgboost.sklearn.XGBRanker"]):
             self.original_model = model.get_booster()
             self._set_xgboost_model_attributes(
                 data,
@@ -1715,7 +1678,7 @@ def extract_data(node, tree):
             node_sample_weight = np.zeros(m,dtype="float64")
             values_lst = list(nodes_dict.values())
             keys_lst = list(nodes_dict.keys())
-            for i in range(0,len(keys_lst)):
+            for i in range(len(keys_lst)):
                 value = values_lst[i]
                 key = keys_lst[i]
                 if "leaf" in value:

diff --git a/shap/plots/_benchmark.py b/shap/plots/_benchmark.py
@@ -175,7 +175,7 @@ def benchmark(benchmark, show=True):
                     txt.set_path_effects([PathEffects.withStroke(linewidth=5, foreground='w')])
 
             ax = plt.gca()
-            ax.set_yticks([1 - i / (len(methods) - 1) for i in range(0, len(methods))])
+            ax.set_yticks([1 - i / (len(methods) - 1) for i in range(len(methods))])
             ax.set_yticklabels(methods, rotation=0, fontsize=11)
 
             ax.set_xticks(np.arange(len(metrics) + 1))

diff --git a/shap/utils/_masked_model.py b/shap/utils/_masked_model.py
@@ -372,7 +372,7 @@ def _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_position
     #     averaged_outs[0] = np.mean(linearizing_weights * link(last_outs))
     # else:
     #     averaged_outs[0] = link(np.mean(last_outs))
-    for i in range(0, len(averaged_outs)):
+    for i in range(len(averaged_outs)):
         if batch_positions[i] < batch_positions[i+1]:
             if num_varying_rows[i] == sample_count:
                 last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]
@@ -390,7 +390,7 @@ def _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions
     # here we can assume that the outputs will always be the same size, and we need
     # to carry over evaluation outputs
     sample_count = last_outs.shape[0]
-    for i in range(0, len(averaged_outs)):
+    for i in range(len(averaged_outs)):
         if batch_positions[i] < batch_positions[i+1]:
             if num_varying_rows[i] == sample_count:
                 last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]