Merge pull request #107 from oegedijk/dev

Dev 0.3.4
oegedijk · Apr 13, 2021 · 479746e · 479746e
2 parents d46b8fc + ad5afed
commit 479746e
Show file tree

Hide file tree

Showing 8 changed files with 166 additions and 104 deletions.
diff --git a/.gitignore b/.gitignore
@@ -136,6 +136,11 @@ scratch_notebook.ipynb
 scratch_import.py
 show_and_tell_draft.md
 pydata_notebook.ipynb
+meetup_notebook.ipynb
+default_dashboard.yaml
+default_explainer.joblib
+custom_dashboard.yaml
+custom_explainer.joblib
 explainer.joblib
 explainerdashboard.yaml
 clicktest.py

diff --git a/.jovianrc b/.jovianrc
@@ -0,0 +1,3 @@
+{
+  "notebooks": {}
+}
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,4 +1,14 @@
 # Release Notes
+
+## Version 0.3.4:
+
+### Bug Fixes
+- Fixes incompatibility bug with dtreeviz >= 1.3
+- 
+
+### Improvements
+- raises ValueError when passing `shap='deep'` as it is not yet correctly supported
+-
 ## Version 0.3.3:
 
 Highlights:

diff --git a/TODO.md b/TODO.md
@@ -1,7 +1,12 @@
 
 # TODO
 
-
+- try pytorch model and DeepExplainer
+- try keras model and DeepExplainer
+- do multiclass pdp
+- turn print statements into log statements
+- add ExtraTrees to tree support
+- add feature descriptions component
 ## Bugs:
 
 ## Plots:

diff --git a/explainerdashboard/dashboards.py b/explainerdashboard/dashboards.py
@@ -859,7 +859,7 @@ def run(self, port=None, host='0.0.0.0', use_waitress=False, mode=None, **kwargs
             else:
                 app = self.app
             if mode == 'external':
-                if not self.is_colab:
+                if not self.is_colab or self.mode == 'external':
                     print(f"Starting ExplainerDashboard on http://{get_local_ip_adress()}:{port}\n"
                         "You can terminate the dashboard with "
                         f"ExplainerDashboard.terminate({port})", flush=True)
@@ -1575,10 +1575,12 @@ def dashboard_decks(dashboards, n_cols):
             return card_decks
 
 
-        header = dbc.Jumbotron([
+        header = html.Div([
+            dbc.Jumbotron([
                 html.H1(self.title, className="display-3"),
                 html.Hr(className="my-2"),
                 html.P(self.description, className="lead"),
+            ])
         ], style=dict(marginTop=40))
 
         if self.masonry:

diff --git a/explainerdashboard/explainer_methods.py b/explainerdashboard/explainer_methods.py
@@ -1085,7 +1085,7 @@ def get_decisionpath_df(decision_tree, observation, pos_label=1):
         pd.DataFrame: columns=['node_id', 'average', 'feature', 
             'value', 'split', 'direction', 'left', 'right', 'diff']
     """
-    _, nodes = decision_tree.predict(observation)
+    nodes = decision_tree.predict_path(observation)
 
     decisiontree_df = pd.DataFrame(columns=['node_id', 'average', 'feature',
                                      'value', 'split', 'direction',

diff --git a/explainerdashboard/explainers.py b/explainerdashboard/explainers.py
@@ -253,8 +253,10 @@ def __init__(self, model, X:pd.DataFrame, y:pd.Series=None, permutation_metric:C
                     "compatible with e.g. shap.TreeExplainer or shap.LinearExplainer "
                     "then pass shap='tree' or shap='linear'!")
         else:
-            assert shap in ['tree', 'linear', 'deep', 'kernel'], \
-                ("Only shap='guess', 'tree', 'linear', 'deep', or ' kernel' are "
+            if shap in {'deep', 'skorch'}:
+                raise ValueError("shap.DeepExplainer is not supported for now but we're working on it!")
+            assert shap in ['tree', 'linear', 'deep', 'kernel', 'skorch'], \
+                ("Only shap='guess', 'tree', 'linear', 'deep', ' kernel' or 'skorch' are "
                  " supported for now!.")
             self.shap = shap
         if self.shap == 'kernel':
@@ -846,10 +848,25 @@ def shap_explainer(self):
                 print(f"Generating self.shap_explainer = shap.LinearExplainer(model{X_str})...")
                 self._shap_explainer = shap.LinearExplainer(self.model, 
                     self.X_background if self.X_background is not None else self.X)
-            elif self.shap=='deep':
+            elif self.shap == 'deep':
                 print(f"Generating self.shap_explainer = "
-                      f"shap.DeepExplainer(model{NoX_str})")
-                self._shap_explainer = shap.DeepExplainer(self.model)
+                      f"shap.DeepExplainer(model, X_background)")
+                print("Warning: shap values for shap.DeepExplainer get "
+                        "calculated against X_background, but paramater "
+                        "X_background=None, so using shap.sample(X, 5) instead")
+                self._shap_explainer = shap.DeepExplainer(self.model, 
+                        self.X_background if self.X_background is not None \
+                            else shap.sample(self.X, 5))
+            elif self.shap == 'skorch':
+                print(f"Generating self.shap_explainer = "
+                      f"shap.DeepExplainer(model, X_background)")
+                print("Warning: shap values for shap.DeepExplainer get "
+                        "calculated against X_background, but paramater "
+                        "X_background=None, so using shap.sample(X, 5) instead")
+                import torch
+                self._shap_explainer = shap.DeepExplainer(self.model.module_, 
+                        torch.tensor(self.X_background.values) if self.X_background is not None \
+                            else torch.tensor(shap.sample(self.X, 5).values))
             elif self.shap=='kernel': 
                 if self.X_background is None:
                     print(
@@ -889,7 +906,12 @@ def get_shap_values_df(self, pos_label=None):
         """SHAP values calculated using the shap library"""
         if not hasattr(self, '_shap_values_df'):
             print("Calculating shap values...", flush=True)
-            self._shap_values_df = pd.DataFrame(self.shap_explainer.shap_values(self.X), 
+            if self.shap == 'skorch':
+                import torch
+                self._shap_values_df = pd.DataFrame(self.shap_explainer.shap_values(torch.tensor(self.X.values)), 
+                                    columns=self.columns)
+            else:
+                self._shap_values_df = pd.DataFrame(self.shap_explainer.shap_values(self.X), 
                                     columns=self.columns)
             self._shap_values_df = merge_categorical_shap_values(
                     self._shap_values_df, self.onehot_dict, self.merged_cols).astype(self.precision)
@@ -2115,9 +2137,24 @@ def shap_explainer(self):
 
                 self._shap_explainer = shap.LinearExplainer(self.model, 
                                             self.X_background if self.X_background is not None else self.X)
-            elif self.shap=='deep':
-                print("Generating self.shap_explainer = shap.DeepExplainer(model{', X_background' if self.X_background is not None else ''})")
-                self._shap_explainer = shap.DeepExplainer(self.model, self.X_background)
+            elif self.shap == 'deep':
+                print(f"Generating self.shap_explainer = "
+                      f"shap.DeepExplainer(model, X_background)")
+                print("Warning: shap values for shap.DeepExplainer get "
+                        "calculated against X_background, but paramater "
+                        "X_background=None, so using shap.sample(X, 5) instead")
+                self._shap_explainer = shap.DeepExplainer(self.model, 
+                        self.X_background if self.X_background is not None \
+                            else shap.sample(self.X, 5))
+            elif self.shap == 'skorch':
+                print(f"Generating self.shap_explainer = "
+                      f"shap.DeepExplainer(model, X_background)")
+                print("Warning: shap values for shap.DeepExplainer get "
+                        "calculated against X_background, but paramater "
+                        "X_background=None, so using shap.sample(X, 5) instead")
+                self._shap_explainer = shap.DeepExplainer(self.model.module_, 
+                        self.X_background if self.X_background is not None \
+                            else shap.sample(self.X, 5).values)
             elif self.shap=='kernel': 
                 if self.X_background is None:
                     print(
@@ -2173,7 +2210,7 @@ def get_shap_values_df(self, pos_label=None):
         """SHAP Values"""
         if not hasattr(self, '_shap_values_df'):
             print("Calculating shap values...", flush=True)
-            _shap_values = self.shap_explainer.shap_values(self.X)
+            _shap_values = self.shap_explainer.shap_values(self.X.values)
 
             if len(self.labels) == 2:
                 if not isinstance(_shap_values, list):

diff --git a/setup.py b/setup.py
@@ -1,90 +1,90 @@
-from setuptools import setup, find_packages
-
-setup(
-    name='explainerdashboard',
-    version='0.3.3.1',
-    description='Quickly build Explainable AI dashboards that show the inner workings of so-called "blackbox" machine learning models.',
-    long_description="""
-
-This package makes it convenient to quickly deploy a dashboard web app
-that explains the workings of a (scikit-learn compatible) fitted machine 
-learning model. The dashboard provides interactive plots on model performance, 
-feature importances, feature contributions to individual predictions, 
-partial dependence plots, SHAP (interaction) values, visualisation of individual
-decision trees, etc. 
-
-
-The goal is manyfold:
-
-    - Make it easy for data scientists to quickly inspect the inner workings and 
-        performance of their model with just a few lines of code
-    - Make it possible for non data scientist stakeholders such as co-workers,
-        managers, directors, internal and external watchdogs to interactively 
-        inspect the inner workings of the model without having to depend 
-        on a data scientist to generate every plot and table
-    - Make it easy to build a custom application that explains individual 
-        predictions of your model for customers that ask for an explanation
-    - Explain the inner workings of the model to the people working with 
-        model in a human-in-the-loop deployment so that they gain understanding 
-        what the model does do and does not do. 
-        This is important so that they can gain an intuition for when the 
-        model is likely missing information and may have to be overruled.
-
-The dashboard includes:
-
-    - SHAP values (i.e. what is the contribution of each feature to each 
-        individual prediction?)
-    - Permutation importances (how much does the model metric deteriorate 
-        when you shuffle a feature?)
-    - Partial dependence plots (how does the model prediction change when 
-        you vary a single feature?
-    - Shap interaction values (decompose the shap value into a direct effect 
-        an interaction effects)
-    - For Random Forests and xgboost models: visualization of individual trees
-        in the ensemble.  
-    - Plus for classifiers: precision plots, confusion matrix, ROC AUC plot, 
-        PR AUC plot, etc
-    - For regression models: goodness-of-fit plots, residual plots, etc.
-
-The library is designed to be modular so that it is easy to design your 
-own custom dashboards so that you can focus on the layout and project specific 
-textual explanations of the dashboard. (i.e. design it so that it will be 
-interpretable for business users in your organization, not just data scientists)
-
-
-A deployed example can be found at http://titanicexplainer.herokuapp.com
-""",
-    license='MIT',
-    packages=find_packages(),
-    package_dir={'explainerdashboard': 'explainerdashboard'},  # the one line where all the magic happens
-    package_data={
-        'explainerdashboard': ['assets/*', 'datasets/*', 'static/*'],
-    },
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Environment :: Web Environment",
-        "Framework :: Dash",
-        "Framework :: Flask",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Education",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence"],
-    install_requires=['dash>=1.19', 'dash-bootstrap-components', 'jupyter_dash', 'dash-auth',
-                    'dtreeviz>=1.1.4', 'numpy', 'pandas>=1.1', 'scikit-learn', 
-                    'shap>=0.37', 'joblib', 'oyaml', 'click', 'waitress',
-                    'flask_simplelogin'],
-    python_requires='>=3.6',
-    entry_points={
-        'console_scripts': [
-            'explainerdashboard = explainerdashboard.cli:explainerdashboard_cli',
-            'explainerhub = explainerdashboard.cli:explainerhub_cli',
-        ],
-    },
-    author='Oege Dijk',
-    author_email='oegedijk@gmail.com',
-    keywords=['machine learning', 'explainability', 'shap', 'feature importances', 'dash'],
-    url='https://github.com/oegedijk/explainerdashboard',
-    project_urls={
-        "Github page": "https://github.com/oegedijk/explainerdashboard/",
-        "Documentation": "https://explainerdashboard.readthedocs.io/",
-    },
-)
+from setuptools import setup, find_packages
+
+setup(
+    name='explainerdashboard',
+    version='0.3.4',
+    description='Quickly build Explainable AI dashboards that show the inner workings of so-called "blackbox" machine learning models.',
+    long_description="""
+
+This package makes it convenient to quickly deploy a dashboard web app
+that explains the workings of a (scikit-learn compatible) fitted machine 
+learning model. The dashboard provides interactive plots on model performance, 
+feature importances, feature contributions to individual predictions, 
+partial dependence plots, SHAP (interaction) values, visualisation of individual
+decision trees, etc. 
+
+
+The goal is manyfold:
+
+    - Make it easy for data scientists to quickly inspect the inner workings and 
+        performance of their model with just a few lines of code
+    - Make it possible for non data scientist stakeholders such as co-workers,
+        managers, directors, internal and external watchdogs to interactively 
+        inspect the inner workings of the model without having to depend 
+        on a data scientist to generate every plot and table
+    - Make it easy to build a custom application that explains individual 
+        predictions of your model for customers that ask for an explanation
+    - Explain the inner workings of the model to the people working with 
+        model in a human-in-the-loop deployment so that they gain understanding 
+        what the model does do and does not do. 
+        This is important so that they can gain an intuition for when the 
+        model is likely missing information and may have to be overruled.
+
+The dashboard includes:
+
+    - SHAP values (i.e. what is the contribution of each feature to each 
+        individual prediction?)
+    - Permutation importances (how much does the model metric deteriorate 
+        when you shuffle a feature?)
+    - Partial dependence plots (how does the model prediction change when 
+        you vary a single feature?
+    - Shap interaction values (decompose the shap value into a direct effect 
+        an interaction effects)
+    - For Random Forests and xgboost models: visualization of individual trees
+        in the ensemble.  
+    - Plus for classifiers: precision plots, confusion matrix, ROC AUC plot, 
+        PR AUC plot, etc
+    - For regression models: goodness-of-fit plots, residual plots, etc.
+
+The library is designed to be modular so that it is easy to design your 
+own custom dashboards so that you can focus on the layout and project specific 
+textual explanations of the dashboard. (i.e. design it so that it will be 
+interpretable for business users in your organization, not just data scientists)
+
+
+A deployed example can be found at http://titanicexplainer.herokuapp.com
+""",
+    license='MIT',
+    packages=find_packages(),
+    package_dir={'explainerdashboard': 'explainerdashboard'},  # the one line where all the magic happens
+    package_data={
+        'explainerdashboard': ['assets/*', 'datasets/*', 'static/*'],
+    },
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Environment :: Web Environment",
+        "Framework :: Dash",
+        "Framework :: Flask",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence"],
+    install_requires=['dash>=1.19', 'dash-bootstrap-components', 'jupyter_dash', 'dash-auth',
+                    'dtreeviz>=1.3', 'numpy', 'pandas>=1.1', 'scikit-learn', 
+                    'shap>=0.37', 'joblib', 'oyaml', 'click', 'waitress',
+                    'flask_simplelogin'],
+    python_requires='>=3.6',
+    entry_points={
+        'console_scripts': [
+            'explainerdashboard = explainerdashboard.cli:explainerdashboard_cli',
+            'explainerhub = explainerdashboard.cli:explainerhub_cli',
+        ],
+    },
+    author='Oege Dijk',
+    author_email='oegedijk@gmail.com',
+    keywords=['machine learning', 'explainability', 'shap', 'feature importances', 'dash'],
+    url='https://github.com/oegedijk/explainerdashboard',
+    project_urls={
+        "Github page": "https://github.com/oegedijk/explainerdashboard/",
+        "Documentation": "https://explainerdashboard.readthedocs.io/",
+    },
+)