Skip to content

Commit

Permalink
lime explainer done (#14)
Browse files Browse the repository at this point in the history
* lime explainer done

* getting rid of 2.7 check

* bugs, docs and tests

* added xgb test

* nn tests

* update gitignore

* update tests

* added doctest

* changes as per review
  • Loading branch information
aredier committed Oct 6, 2019
1 parent ff31530 commit ef62206
Show file tree
Hide file tree
Showing 14 changed files with 240 additions and 11 deletions.
45 changes: 45 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,48 @@ ENV/

# mypy
.mypy_cache/

**/*.rs.bk
Cargo.lock
.DS_Stores
.idea/
.cache/
.vscode/
*.pyc
__pycache__/
.python-version
/trelawney.egg-info
/.tox/
/.eggs/
.pytest_cache/
.ipynb_checkpoints/
.DS_Store
node_modules
/dist

# local env files
.env.local
.env.*.local

# Log files
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
.editorconfig
yarn.lock

# doc
docs/_build/*

# notebooks
*.ipynb
!project_template/{{cookiecutter.project_name}}/notebooks/*.ipynb
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ python:
- 3.7
- 3.6
- 3.5
- 2.7
install: pip install -U tox-travis
script: tox
deploy:
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Before you submit a pull request, check that it meets these guidelines:
2. If the pull request adds functionality, the docs should be updated. Put
your new functionality into a function with a docstring, and add the
feature to the list in README.rst.
3. The pull request should work for Python 2.7, 3.5, 3.6 and 3.7, and for PyPy. Check
3. The pull request should work for Python 3.5, 3.6 and 3.7, and for PyPy. Check
https://travis-ci.org/skanderkam/trelawney/pull_requests
and make sure that the tests pass for all supported Python versions.

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ lint: ## check style with flake8
flake8 trelawney tests

test: ## run tests quickly with the default Python
pytest
py.test trelawney tests docs --doctest-modules --doctest-glob='*.rst'

test-all: ## run tests on every Python version with tox
tox
Expand Down
11 changes: 9 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,14 @@

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
extensions = [
'sphinx.ext.autodoc',
# 'sphinx_autodoc_typehints',
'sphinx.ext.viewcode',
"sphinx.ext.doctest",
'sphinx.ext.intersphinx',
"sphinx_rtd_theme"
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
Expand Down Expand Up @@ -84,7 +91,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
html_theme = 'sphinx_rtd_theme'

# Theme options are theme-specific and customize the look and feel of a
# theme further. For a list of options available for each theme, see the
Expand Down
8 changes: 8 additions & 0 deletions docs/trelawney.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ trelawney.base\_explainer module
:undoc-members:
:show-inheritance:

trelawney.lime\_explainer module
--------------------------------

.. automodule:: trelawney.lime_explainer
:members:
:undoc-members:
:show-inheritance:

trelawney.trelawney module
--------------------------

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
pandas==0.25.1
scikit-learn==0.21.3
tqdm==4.36.1
lime==0.1.1.36
6 changes: 5 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@ Sphinx==1.8.5
twine==1.14.0

pytest==4.6.5
pytest-runner==5.1
pytest-runner==5.1

tensorflow==1.14.0
Keras==2.3.0
xgboost==0.90
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
setup(
author="Skander Kamoun",
author_email='skander.kam2@gmail.com',
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
python_requires='>=3.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
import numpy as np
import pytest
from keras import layers, models
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.linear_model import LogisticRegression


@pytest.fixture
def fake_dataset():
return (pd.DataFrame([list(range(100)), np.random.normal(size=100).tolist()], index=['real', 'fake']).T,
np.array(range(100)) > 50)


@pytest.fixture
def fitted_logistic_regression(fake_dataset):
model = LogisticRegression()
return model.fit(*fake_dataset)


@pytest.fixture
def fitted_neural_network(fake_dataset):

def make_neural_network():
model = models.Sequential([
layers.Dense(2, input_shape=(2,), activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model

model = KerasClassifier(make_neural_network, epochs=10, batch_size=100)
model.fit(*fake_dataset)
return model
50 changes: 50 additions & 0 deletions tests/test_lime_explainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pandas as pd
import pytest

from xgboost import XGBClassifier

from trelawney.lime_explainer import LimeExplainer


def _do_explainer_test(explainer):
explanation = explainer.explain_local(pd.DataFrame([[5, 0.1], [95, -0.5]]))
assert len(explanation) == 2
for single_explanation in explanation:
assert abs(single_explanation['real']) > abs(single_explanation['fake'])


def test_lime_explainer_single(fake_dataset, fitted_logistic_regression):
explainer = LimeExplainer(class_names=['false', 'true'])
explainer.fit(fitted_logistic_regression, *fake_dataset)
explanation = explainer.explain_local(pd.DataFrame([[5, 0.1]]))
assert len(explanation) == 1
single_explanation = explanation[0]
assert abs(single_explanation['real']) > abs(single_explanation['fake'])


def test_lime_explainer_multiple(fake_dataset, fitted_logistic_regression):
explainer = LimeExplainer(class_names=['false', 'true'])
explainer.fit(fitted_logistic_regression, *fake_dataset)
_do_explainer_test(explainer)


def test_lime_xgb(fake_dataset):
model = XGBClassifier()
x, y = fake_dataset
model.fit(x.values, y)

explainer = LimeExplainer()
explainer.fit(model, *fake_dataset)
with pytest.raises(TypeError):
explainer.explain_local(x.values)
_do_explainer_test(explainer)


def test_lime_nn(fake_dataset, fitted_neural_network):

explainer = LimeExplainer(class_names=['false', 'true'])
explainer.fit(fitted_neural_network, *fake_dataset)
explanation = explainer.explain_local(pd.DataFrame([[5, 0.1], [95, -0.5]]))
assert len(explanation) == 2
for single_explanation in explanation:
assert abs(single_explanation['real']) > abs(single_explanation['fake'])
6 changes: 2 additions & 4 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ setenv =
PYTHONPATH = {toxinidir}
deps =
-r{toxinidir}/requirements_dev.txt
; If you want to make tox run the tests with the same versions, create a
; requirements.txt with the pinned versions and uncomment the following line:
; -r{toxinidir}/requirements.txt
-r{toxinidir}/requirements.txt
commands =
pip install -U pip
pytest --basetemp={envtmpdir}
py.test trelawney tests docs --doctest-modules --doctest-glob='*.rst' --basetemp={envtmpdir}

80 changes: 80 additions & 0 deletions trelawney/lime_explainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from typing import List, Optional, Dict

import pandas as pd
import sklearn
from lime import lime_tabular
from tqdm import tqdm

from trelawney.base_explainer import BaseExplainer


class LimeExplainer(BaseExplainer):
"""
Lime stands for local interpretable model-agnostic explanations and is a package based on
`this article <https://www.arxiv.org/abs/1602.04938>`_. Lime will explain a single prediction of you model
by crechariotsating a local approximation of your model around said prediction.'sphinx.ext.autodoc', 'sphinx.ext.viewcode']
.. testsetup::
>>> import pandas as pd
>>> import numpy as np
>>> from trelawney.lime_explainer import LimeExplainer
>>> from sklearn.linear_model import LogisticRegression
.. doctest::
>>> X = pd.DataFrame([np.array(range(100)), np.random.normal(size=100).tolist()], index=['real', 'fake']).T
>>> y = np.array(range(100)) > 50
>>> # training the base model
>>> model = LogisticRegression().fit(X, y)
>>> # creating and fiting the explainer
>>> explainer = LimeExplainer()
>>> explainer.fit(model, X, y)
>>> # explaining observation
>>> explanation = explainer.explain_local(pd.DataFrame([[5, 0.1]]))[0]
>>> abs(explanation['real']) > abs(explanation['fake'])
True
"""

def __init__(self, class_names: Optional[List[str]] = None, categorical_features: Optional[List[str]] = None, ):
self._explainer = None
if class_names is not None and len(class_names) != 2:
raise NotImplementedError('Trelawney only handles binary classification case for now. PR welcome ;)')
self.class_names = class_names
self._output_len = None
self.categorical_features = categorical_features
self._model_to_explain = None

def fit(self, model: sklearn.base.BaseEstimator, x_train: pd.DataFrame, y_train: pd.DataFrame, ):
self._model_to_explain = model
self._explainer = lime_tabular.LimeTabularExplainer(x_train.values, feature_names=x_train.columns,
class_names=self.class_names,
categorical_features=self.categorical_features,
discretize_continuous=True)

def feature_importance(self, x_explain: pd.DataFrame, n_cols: Optional[int] = None) -> Dict[str, float]:
raise NotImplementedError('we are not sure global explaination is mathematically sound for LIME, it is still'
' debated, refer tp https://github.com/skanderkam/trelawney/issues/10')

@staticmethod
def _extract_col_from_explanation(col_explanation):
is_left_term = len([x for x in col_explanation if x in ['<', '>']]) < 2
if is_left_term:
return col_explanation.split()[0]
return col_explanation.split()[2]

def explain_local(self, x_explain: pd.DataFrame, n_cols: Optional[int] = None) -> List[Dict[str, float]]:
if not isinstance(x_explain, pd.DataFrame):
raise TypeError('{} is not supported, please use dataframes'.format(type(x_explain)))
n_cols = n_cols or len(x_explain.columns)
res = []
for individual_sample in tqdm(x_explain.iterrows()):
individual_explanation = self._explainer.explain_instance(individual_sample[1],
self._model_to_explain.predict_proba,
num_features=n_cols,
top_labels=2)
res.append({self._extract_col_from_explanation(col_explanation): col_value
for col_explanation, col_value in individual_explanation.as_list()})
return res

0 comments on commit ef62206

Please sign in to comment.