Skip to content

Commit

Permalink
Make lots more files pass pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
slundberg committed Feb 25, 2021
1 parent a542930 commit 330389f
Show file tree
Hide file tree
Showing 40 changed files with 734 additions and 614 deletions.
17 changes: 10 additions & 7 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,10 @@ disable=print-statement,
invalid-name,
invalid-unary-operand-type,
assignment-from-no-return,
no-member
no-member,
not-callable,
missing-module-docstring,
arguments-differ

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down Expand Up @@ -313,7 +316,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=140
max-line-length=150

# Maximum number of lines in a module.
max-module-lines=1000
Expand Down Expand Up @@ -406,7 +409,7 @@ contextmanager-decorators=contextlib.contextmanager
# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E1101 when accessed. Python regular
# expressions are accepted.
generated-members=
generated-members=torch.tensor

# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
Expand Down Expand Up @@ -503,16 +506,16 @@ valid-metaclass-classmethod-first-arg=cls
[DESIGN]

# Maximum number of arguments for function / method.
max-args=8
max-args=12

# Maximum number of attributes for a class (see R0902).
max-attributes=7
max-attributes=20

# Maximum number of boolean expressions in an if statement.
max-bool-expr=5

# Maximum number of branch for function / method body.
max-branches=12
max-branches=20

# Maximum number of locals for function / method body.
max-locals=50
Expand All @@ -527,7 +530,7 @@ max-public-methods=20
max-returns=6

# Maximum number of statements in function / method body.
max-statements=50
max-statements=80

# Minimum number of public methods for a class (see R0903).
min-public-methods=2
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ install:
- pip install tensorflow xgboost matplotlib ipython codecov torch torchvision pylint sentencepiece
script:
- python -m pytest tests
- pylint tests/explainers/test_tree.py tests/explainers/test_gpu_tree.py shap/explainers/_gpu_tree.py tests/maskers tests/models
- pylint tests shap/models shap/maskers shap/explainers/_gpu_tree.py
after_success:
- codecov
deploy:
Expand Down
2 changes: 0 additions & 2 deletions shap/explainers/_partition.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from ..utils import MaskedModel
import pandas as pd
import scipy as sp
import numpy as np
import warnings
import time
Expand Down
123 changes: 63 additions & 60 deletions shap/maskers/_composite.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,66 @@
import pandas as pd
import numpy as np
import scipy as sp
import scipy.cluster
# import pandas as pd
# import numpy as np
# import scipy as sp
from ._masker import Masker
from ..utils import safe_isinstance

class Composite(Masker):
def __init__(self, *maskers, clustering=None):
""" This merges several maskers for different inputs together into a single composite masker.
Parameters
----------
background_data : np.array, pandas.DataFrame
The background dataset that is used for masking. The number of samples coming out of
the masker (to be integrated over) matches the number of samples in this background
dataset. This means larger background dataset cause longer runtimes. Normally about
1, 10, 100, or 1000 background samples are reasonable choices.
clustering : "correlation", string or None (default)
The distance metric to use for creating the partition_tree of the features. The
distance function can be any valid scipy.spatial.distance.pdist's metric argument.
However we suggest using 'correlation' in most cases. The full list of options is
‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
the options from scipy.spatial.distance.pdist's metric argument.
"""

self.maskers = maskers

self.output_dataframe = False
if safe_isinstance(background_data, "pandas.core.frame.DataFrame"):
self.input_names = background_data.columns
background_data = background_data.values
self.output_dataframe = True

self.background_data = background_data
self.clustering = clustering

# compute the clustering of the data
if clustering is not None:
bg_no_nan = background_data.copy()
for i in range(bg_no_nan.shape[1]):
np.nan_to_num(bg_no_nan[:,i], nan=np.nanmean(bg_no_nan[:,i]), copy=False)
D = sp.spatial.distance.pdist(bg_no_nan.T + np.random.randn(*bg_no_nan.T.shape)*1e-8, metric=clustering)
self.partition_tree = sp.cluster.hierarchy.complete(D)
else:
self.partition_tree = None

def __call__(self, x, mask=None):

# if mask is not given then we mask all features
if mask is None:
mask = np.zeros(np.prod(x.shape), dtype=np.bool)

out = x * mask + self.background_data * np.invert(mask)

if self.output_dataframe:
return pd.DataFrame(out, columns=self.input_names)
else:
return out
""" This merges several maskers for different inputs together into a single composite masker.
This is not yet implemented.
"""

# def __init__(self, *maskers, clustering=None):
# """ This merges several maskers for different inputs together into a single composite masker.

# Parameters
# ----------
# background_data : np.array, pandas.DataFrame
# The background dataset that is used for masking. The number of samples coming out of
# the masker (to be integrated over) matches the number of samples in this background
# dataset. This means larger background dataset cause longer runtimes. Normally about
# 1, 10, 100, or 1000 background samples are reasonable choices.

# clustering : "correlation", string or None (default)
# The distance metric to use for creating the partition_tree of the features. The
# distance function can be any valid scipy.spatial.distance.pdist's metric argument.
# However we suggest using 'correlation' in most cases. The full list of options is
# ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
# ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
# ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
# ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
# the options from scipy.spatial.distance.pdist's metric argument.
# """

# self.maskers = maskers

# # self.output_dataframe = False
# # if safe_isinstance(background_data, "pandas.core.frame.DataFrame"):
# # self.input_names = background_data.columns
# # background_data = background_data.values
# # self.output_dataframe = True

# self.background_data = background_data
# self.clustering = clustering

# # compute the clustering of the data
# if clustering is not None:
# bg_no_nan = background_data.copy()
# for i in range(bg_no_nan.shape[1]):
# np.nan_to_num(bg_no_nan[:,i], nan=np.nanmean(bg_no_nan[:,i]), copy=False)
# D = sp.spatial.distance.pdist(bg_no_nan.T + np.random.randn(*bg_no_nan.T.shape)*1e-8, metric=clustering)
# self.partition_tree = sp.cluster.hierarchy.complete(D)
# else:
# self.partition_tree = None

# def __call__(self, x, mask=None):

# # if mask is not given then we mask all features
# if mask is None:
# mask = np.zeros(np.prod(x.shape), dtype=np.bool)

# out = x * mask + self.background_data * np.invert(mask)

# if self.output_dataframe:
# return pd.DataFrame(out, columns=self.input_names)
# else:
# return out
19 changes: 9 additions & 10 deletions shap/maskers/_fixed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pickle
from ._masker import Masker


class Fixed(Masker):
""" This leaves the input unchanged during masking, and is used for things like scoring labels.
Expand All @@ -9,25 +9,24 @@ class Fixed(Masker):
the labels. These "true" labels are inputs to the function we are explaining, but we don't want
to attribute credit to them, instead we want to consider them fixed and assign all the credit to
the model's input features. This is where the Fixed masker can help, since we can apply it to the
label inputs.
label inputs.
"""
def __init__(self):
pass

def __call__(self, x, mask):
return x
def save(self, out_file, *args):
super(Fixed, self).save(out_file)

def save(self, out_file):
pickle.dump(type(self), out_file)

@classmethod
def load(cls, in_file):
masker_type = pickle.load(in_file)
if not masker_type == cls:
if not masker_type == Fixed:
print("Warning: Saved masker type not same as the one that's attempting to be loaded. Saved masker type: ", masker_type)
return Fixed._load(in_file)

@classmethod
def _load(cls, in_file):
fixed_masker = Fixed()
return fixed_masker
def _load(cls, _):
return Fixed() # note we have not parameters to load
11 changes: 7 additions & 4 deletions shap/maskers/_fixed_composite.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pickle
import numpy as np
from ._masker import Masker
import pickle

class FixedComposite(Masker):
""" A masker that outputs both the masked data and the original data as a pair.
"""

def __init__(self, masker):
""" Creates a Composite masker from an underlying masker and returns the original args along with the masked output.
Expand Down Expand Up @@ -35,8 +38,8 @@ def __call__(self, mask, *args):
if not isinstance(masked_X, tuple):
masked_X = (masked_X,)
return masked_X + wrapped_args
def save(self, out_file, *args):

def save(self, out_file):
super(FixedComposite, self).save(out_file)
pickle.dump(type(self.masker), out_file)
self.masker.save(out_file)
Expand All @@ -53,4 +56,4 @@ def _load(cls, in_file):
masker_type = pickle.load(in_file)
masker = masker_type.load(in_file)
fixedcomposite_masker = FixedComposite(masker)
return fixedcomposite_masker
return fixedcomposite_masker
Loading

0 comments on commit 330389f

Please sign in to comment.