Make lots more files pass pylint

shap · Feb 25, 2021 · 330389f · 330389f
1 parent a542930
commit 330389f
Show file tree

Hide file tree

Showing 40 changed files with 734 additions and 614 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -142,7 +142,10 @@ disable=print-statement,
         invalid-name,
         invalid-unary-operand-type,
         assignment-from-no-return,
-        no-member
+        no-member,
+        not-callable,
+        missing-module-docstring,
+        arguments-differ
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
@@ -313,7 +316,7 @@ indent-after-paren=4
 indent-string='    '
 
 # Maximum number of characters on a single line.
-max-line-length=140
+max-line-length=150
 
 # Maximum number of lines in a module.
 max-module-lines=1000
@@ -406,7 +409,7 @@ contextmanager-decorators=contextlib.contextmanager
 # List of members which are set dynamically and missed by pylint inference
 # system, and so shouldn't trigger E1101 when accessed. Python regular
 # expressions are accepted.
-generated-members=
+generated-members=torch.tensor
 
 # Tells whether missing members accessed in mixin class should be ignored. A
 # mixin class is detected if its name ends with "mixin" (case insensitive).
@@ -503,16 +506,16 @@ valid-metaclass-classmethod-first-arg=cls
 [DESIGN]
 
 # Maximum number of arguments for function / method.
-max-args=8
+max-args=12
 
 # Maximum number of attributes for a class (see R0902).
-max-attributes=7
+max-attributes=20
 
 # Maximum number of boolean expressions in an if statement.
 max-bool-expr=5
 
 # Maximum number of branch for function / method body.
-max-branches=12
+max-branches=20
 
 # Maximum number of locals for function / method body.
 max-locals=50
@@ -527,7 +530,7 @@ max-public-methods=20
 max-returns=6
 
 # Maximum number of statements in function / method body.
-max-statements=50
+max-statements=80
 
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=2

diff --git a/.travis.yml b/.travis.yml
@@ -20,7 +20,7 @@ install:
 - pip install tensorflow xgboost matplotlib ipython codecov torch torchvision pylint sentencepiece
 script:
   - python -m pytest tests
-  - pylint tests/explainers/test_tree.py tests/explainers/test_gpu_tree.py shap/explainers/_gpu_tree.py tests/maskers tests/models
+  - pylint tests shap/models shap/maskers shap/explainers/_gpu_tree.py
 after_success:
   - codecov
 deploy:

diff --git a/shap/explainers/_partition.py b/shap/explainers/_partition.py
@@ -1,6 +1,4 @@
 from ..utils import MaskedModel
-import pandas as pd
-import scipy as sp
 import numpy as np
 import warnings
 import time

diff --git a/shap/maskers/_composite.py b/shap/maskers/_composite.py
@@ -1,63 +1,66 @@
-import pandas as pd
-import numpy as np
-import scipy as sp
-import scipy.cluster
+# import pandas as pd
+# import numpy as np
+# import scipy as sp
 from ._masker import Masker
-from ..utils import safe_isinstance
 
 class Composite(Masker):
-    def __init__(self, *maskers, clustering=None):
-        """ This merges several maskers for different inputs together into a single composite masker. 
-        
-        Parameters
-        ----------
-        background_data : np.array, pandas.DataFrame
-            The background dataset that is used for masking. The number of samples coming out of
-            the masker (to be integrated over) matches the number of samples in this background
-            dataset. This means larger background dataset cause longer runtimes. Normally about
-            1, 10, 100, or 1000 background samples are reasonable choices.
-
-        clustering : "correlation", string or None (default)
-            The distance metric to use for creating the partition_tree of the features. The
-            distance function can be any valid scipy.spatial.distance.pdist's metric argument.
-            However we suggest using 'correlation' in most cases. The full list of options is
-            ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
-            ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
-            ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
-            ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
-            the options from scipy.spatial.distance.pdist's metric argument.
-        """
-
-        self.maskers = maskers
-
-        self.output_dataframe = False
-        if safe_isinstance(background_data, "pandas.core.frame.DataFrame"):
-            self.input_names = background_data.columns
-            background_data = background_data.values
-            self.output_dataframe = True
-
-        self.background_data = background_data
-        self.clustering = clustering
-
-        # compute the clustering of the data
-        if clustering is not None:
-            bg_no_nan = background_data.copy()
-            for i in range(bg_no_nan.shape[1]):
-                np.nan_to_num(bg_no_nan[:,i], nan=np.nanmean(bg_no_nan[:,i]), copy=False)
-            D = sp.spatial.distance.pdist(bg_no_nan.T + np.random.randn(*bg_no_nan.T.shape)*1e-8, metric=clustering)
-            self.partition_tree = sp.cluster.hierarchy.complete(D)
-        else:
-            self.partition_tree = None
-
-    def __call__(self, x, mask=None):
-
-        # if mask is not given then we mask all features
-        if mask is None:
-            mask = np.zeros(np.prod(x.shape), dtype=np.bool)
-
-        out = x * mask + self.background_data * np.invert(mask)
-
-        if self.output_dataframe:
-            return pd.DataFrame(out, columns=self.input_names)
-        else:
-            return out
+    """ This merges several maskers for different inputs together into a single composite masker.
+
+    This is not yet implemented.
+    """
+
+    # def __init__(self, *maskers, clustering=None):
+    #     """ This merges several maskers for different inputs together into a single composite masker.
+
+    #     Parameters
+    #     ----------
+    #     background_data : np.array, pandas.DataFrame
+    #         The background dataset that is used for masking. The number of samples coming out of
+    #         the masker (to be integrated over) matches the number of samples in this background
+    #         dataset. This means larger background dataset cause longer runtimes. Normally about
+    #         1, 10, 100, or 1000 background samples are reasonable choices.
+
+    #     clustering : "correlation", string or None (default)
+    #         The distance metric to use for creating the partition_tree of the features. The
+    #         distance function can be any valid scipy.spatial.distance.pdist's metric argument.
+    #         However we suggest using 'correlation' in most cases. The full list of options is
+    #         ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
+    #         ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
+    #         ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
+    #         ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
+    #         the options from scipy.spatial.distance.pdist's metric argument.
+    #     """
+
+    #     self.maskers = maskers
+
+    #     # self.output_dataframe = False
+    #     # if safe_isinstance(background_data, "pandas.core.frame.DataFrame"):
+    #     #     self.input_names = background_data.columns
+    #     #     background_data = background_data.values
+    #     #     self.output_dataframe = True
+
+    #     self.background_data = background_data
+    #     self.clustering = clustering
+
+    #     # compute the clustering of the data
+    #     if clustering is not None:
+    #         bg_no_nan = background_data.copy()
+    #         for i in range(bg_no_nan.shape[1]):
+    #             np.nan_to_num(bg_no_nan[:,i], nan=np.nanmean(bg_no_nan[:,i]), copy=False)
+    #         D = sp.spatial.distance.pdist(bg_no_nan.T + np.random.randn(*bg_no_nan.T.shape)*1e-8, metric=clustering)
+    #         self.partition_tree = sp.cluster.hierarchy.complete(D)
+    #     else:
+    #         self.partition_tree = None
+
+    # def __call__(self, x, mask=None):
+
+    #     # if mask is not given then we mask all features
+    #     if mask is None:
+    #         mask = np.zeros(np.prod(x.shape), dtype=np.bool)
+
+    #     out = x * mask + self.background_data * np.invert(mask)
+
+    #     if self.output_dataframe:
+    #         return pd.DataFrame(out, columns=self.input_names)
+    #     else:
+    #         return out
diff --git a/shap/maskers/_fixed.py b/shap/maskers/_fixed.py
@@ -1,6 +1,6 @@
+import pickle
 from ._masker import Masker
 
-
 class Fixed(Masker):
     """ This leaves the input unchanged during masking, and is used for things like scoring labels.
 
@@ -9,25 +9,24 @@ class Fixed(Masker):
     the labels. These "true" labels are inputs to the function we are explaining, but we don't want
     to attribute credit to them, instead we want to consider them fixed and assign all the credit to
     the model's input features. This is where the Fixed masker can help, since we can apply it to the
-    label inputs. 
+    label inputs.
     """
     def __init__(self):
         pass
-    
+
     def __call__(self, x, mask):
         return x
-    
-    def save(self, out_file, *args):
-        super(Fixed, self).save(out_file)
+
+    def save(self, out_file):
+        pickle.dump(type(self), out_file)
 
     @classmethod
     def load(cls, in_file):
         masker_type = pickle.load(in_file)
-        if not masker_type == cls:
+        if not masker_type == Fixed:
             print("Warning: Saved masker type not same as the one that's attempting to be loaded. Saved masker type: ", masker_type)
         return Fixed._load(in_file)
 
     @classmethod
-    def _load(cls, in_file):
-        fixed_masker = Fixed()
-        return fixed_masker
+    def _load(cls, _):
+        return Fixed() # note we have not parameters to load
diff --git a/shap/maskers/_fixed_composite.py b/shap/maskers/_fixed_composite.py
@@ -1,8 +1,11 @@
+import pickle
 import numpy as np
 from ._masker import Masker
-import pickle
 
 class FixedComposite(Masker):
+    """ A masker that outputs both the masked data and the original data as a pair.
+    """
+
     def __init__(self, masker):
         """ Creates a Composite masker from an underlying masker and returns the original args along with the masked output.
 
@@ -35,8 +38,8 @@ def __call__(self, mask, *args):
         if not isinstance(masked_X, tuple):
             masked_X = (masked_X,)
         return masked_X + wrapped_args
-    
-    def save(self, out_file, *args):
+
+    def save(self, out_file):
         super(FixedComposite, self).save(out_file)
         pickle.dump(type(self.masker), out_file)
         self.masker.save(out_file)
@@ -53,4 +56,4 @@ def _load(cls, in_file):
         masker_type = pickle.load(in_file)
         masker = masker_type.load(in_file)
         fixedcomposite_masker = FixedComposite(masker)
-        return fixedcomposite_masker
+        return fixedcomposite_masker