Merge branch 'master' of https://github.com/oegedijk/explainerdashboard…

… into master
oegedijk · Apr 1, 2022 · 518cefc · 518cefc
2 parents 9bcb491 + 1d926d8
commit 518cefc
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 53 deletions.
diff --git a/explainerdashboard/explainer_methods.py b/explainerdashboard/explainer_methods.py
@@ -1,5 +1,6 @@
 __all__ = [
     'IndexNotFoundError',
+    'append_dict_to_df',
     'safe_isinstance',
     'guess_shap',
     'mape_score',
@@ -50,6 +51,22 @@
 from joblib import Parallel, delayed
 
 
+def append_dict_to_df(df, row_dict):
+    """Appends a row to the dataframe 'df' and returns the new
+    dataframe.
+    
+    Args:
+        df (pd.DataFrame) data frame
+    
+        row_dict (dict): row data
+
+    Returns:
+        pd.DataFrame
+    """
+    return pd.concat([df, pd.DataFrame([row_dict])],
+                     ignore_index=True)
+
+
 class IndexNotFoundError(Exception):
     def __init__(self, message="Index not Found", index=None):
         if index is not None:
@@ -1157,8 +1174,8 @@ def get_contrib_summary_df(contrib_df, model_output="raw", round=2, units="", na
         else:
             effect +=  str(np.round(row['contribution'], round)) + f" {units}"
 
-        contrib_summary_df = contrib_summary_df.append(
-            dict(Reason=reason, Effect=effect), ignore_index=True)
+        contrib_summary_df = append_dict_to_df(contrib_summary_df,
+            dict(Reason=reason, Effect=effect))
 
     return contrib_summary_df.reset_index(drop=True)
 
@@ -1225,7 +1242,7 @@ def node_pred_proba(node):
             return node.class_counts()[pos_label]/ sum(node.class_counts())
         for node in nodes:
             if not node.isleaf():
-                decisiontree_df = decisiontree_df.append({
+                decisiontree_df = append_dict_to_df(decisiontree_df, {
                     'node_id' : node.id,
                     'average' : node_pred_proba(node),
                     'feature' : node.feature_name(),
@@ -1237,14 +1254,14 @@ def node_pred_proba(node):
                     'diff' : node_pred_proba(node.left) - node_pred_proba(node) \
                                 if observation[node.feature_name()] < node.split() \
                                 else node_pred_proba(node.right) - node_pred_proba(node)
-                }, ignore_index=True)
+                })
 
     else:
         def node_mean(node):
             return decision_tree.tree_model.tree_.value[node.id].item()
         for node in nodes:
             if not node.isleaf():
-                decisiontree_df = decisiontree_df.append({
+                decisiontree_df = append_dict_to_df(decisiontree_df, {
                     'node_id' : node.id,
                     'average' : node_mean(node),
                     'feature' : node.feature_name(),
@@ -1256,7 +1273,7 @@ def node_mean(node):
                     'diff' : node_mean(node.left) - node_mean(node) \
                                 if observation[node.feature_name()] < node.split() \
                                 else node_mean(node.right) - node_mean(node)
-                }, ignore_index=True)
+                })
     return decisiontree_df
 
 
@@ -1284,35 +1301,35 @@ def get_decisiontree_summary_df(decisiontree_df, classifier=False, round=2, unit
 
 
     decisiontree_summary_df = pd.DataFrame(columns=['Feature', 'Condition', 'Adjustment', 'New Prediction'])
-    decisiontree_summary_df = decisiontree_summary_df.append({
+    decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
                             'Feature' : "",
                             'Condition' : "",
                             'Adjustment' : "Starting average",
                             'New Prediction' : str(np.round(base_value, round)) + ('%' if classifier else f' {units}')
-                        }, ignore_index=True)
+                        })
 
     for _, row in decisiontree_df.iterrows():
         if classifier:
-            decisiontree_summary_df = decisiontree_summary_df.append({
+            decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
                             'Feature' : row['feature'],
                             'Condition' : str(row['value']) + str(' >= ' if row['direction'] == 'right' else ' < ') + str(row['split']).ljust(10),
                             'Adjustment' : str('+' if row['diff'] >= 0 else '') + str(np.round(100*row['diff'], round)) +'%',
                             'New Prediction' : str(np.round(100*(row['average']+row['diff']), round)) + '%'
-                        }, ignore_index=True)
+                        })
         else:
-            decisiontree_summary_df = decisiontree_summary_df.append({
+            decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
                             'Feature' : row['feature'],
                             'Condition' : str(row['value']) + str(' >= ' if row['direction'] == 'right' else ' < ') + str(row['split']).ljust(10),
                             'Adjustment' : str('+' if row['diff'] >= 0 else '') + str(np.round(row['diff'], round)),
                             'New Prediction' : str(np.round((row['average']+row['diff']), round)) + f" {units}"
-                        }, ignore_index=True)
+                        })
 
-    decisiontree_summary_df = decisiontree_summary_df.append({
+    decisiontree_summary_df = append_dict_to_df( decisiontree_summary_df, {
                         'Feature' : "",
                         'Condition' : "",
                         'Adjustment' : "Final Prediction",
                         'New Prediction' : str(np.round(prediction, round)) + ('%' if classifier else '') + f" {units}"
-                    }, ignore_index=True)
+                    })
 
     return decisiontree_summary_df
 
@@ -1379,20 +1396,20 @@ def get_xgboost_path_df(xgbmodel, X_row, n_tree=None):
 
     node = node_dict[0]
     while not node['is_leaf']:
-        prediction_path_df = prediction_path_df.append(
+        prediction_path_df = append_dict_to_df(prediction_path_df,
             dict(
                 node=node['node'], 
                 feature=node['feature'], 
                 cutoff=node['cutoff'], 
                 value=float(X_row[node['feature']])
-            ), ignore_index=True)
+            ))
         if np.isnan(X_row[node['feature']]) or X_row[node['feature']] < node['cutoff']:
             node = node_dict[node['left_node']]
         else:
             node = node_dict[node['right_node']]
 
     if node['is_leaf']:
-        prediction_path_df = prediction_path_df.append(dict(node=node['node'], feature="_PREDICTION", value=node['leaf_value']), ignore_index=True)
+        prediction_path_df = append_dict_to_df(prediction_path_df, dict(node=node['node'], feature="_PREDICTION", value=node['leaf_value']))
     return prediction_path_df
 
 
@@ -1411,25 +1428,25 @@ def get_xgboost_path_summary_df(xgboost_path_df, output="margin"):
 
     for row in xgboost_path_df.itertuples():
         if row.feature == "_PREDICTION":
-            xgboost_path_summary_df = xgboost_path_summary_df.append(
+            xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
                 dict(
                     node=row.node, 
                     split_condition=f"prediction ({output}) = {row.value}" 
-                ), ignore_index=True
-            )   
+                )
+            )
         elif row.value < row.cutoff:
-            xgboost_path_summary_df = xgboost_path_summary_df.append(
+            xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
                 dict(
                     node=row.node, 
                     split_condition=f"{row.feature} = {row.value} < {row.cutoff}"
-                ), ignore_index=True
+                )
             )
         else:
-            xgboost_path_summary_df = xgboost_path_summary_df.append(
+            xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
                 dict(
                     node=row.node, 
                     split_condition=f"{row.feature} = {row.value} >= {row.cutoff}"
-                ), ignore_index=True
+                )
             )
     return xgboost_path_summary_df
 

diff --git a/explainerdashboard/explainers.py b/explainerdashboard/explainers.py
@@ -1191,10 +1191,9 @@ def size_to_string(num, suffix='B'):
 
         memory_df = pd.DataFrame(columns=['property', 'type', 'bytes', 'size'])
         for k, v in self.__dict__.items():
-            memory_df = memory_df.append(dict(
+            memory_df = append_dict_to_df(memory_df, dict(
                 property=f"self.{k}", type=v.__class__.__name__, 
-                bytes=get_size(v), size=size_to_string(get_size(v))), 
-                ignore_index=True)
+                bytes=get_size(v), size=size_to_string(get_size(v))))
 
         print("Explainer total memory usage (approximate): ", 
                     size_to_string(memory_df.bytes.sum()), flush=True)
@@ -2390,10 +2389,10 @@ def set_shap_values(self, base_value:List[float], shap_values:List):
                 raise ValueError(f"Expected shap values to have {len(self.X)} rows!")
             if sv.shape[1] != len(self.original_cols):
                 raise ValueError(f"Expected shap values to have {len(self.original_columns)} columns!")
-            self._shap_values_df.append(
+            self._shap_values_df = pd.concat([self._shap_values_df,
                 merge_categorical_shap_values(
                     pd.DataFrame(sv, columns=self.columns), 
-                    self.onehot_dict, self.merged_cols).astype(self.precision)
+                    self.onehot_dict, self.merged_cols).astype(self.precision)]
             )
         if len(self.labels) == 2:
             self._shap_values_df = self._shap_values_df[1]
@@ -3343,18 +3342,15 @@ def prediction_result_df(self, index=None, X_row=None, round=3):
             X_row = X_row.values.astype("float32")
         pred = self.model.predict(X_row).item()
         preds_df = pd.DataFrame(columns = ["", self.target])
-        preds_df = preds_df.append(
-                pd.Series(("Predicted", f"{pred:.{round}f} {self.units}"), 
-                        index=preds_df.columns), ignore_index=True)
+        preds_df = append_dict_to_df(preds_df,
+                {"": "Predicted", self.target: f"{pred:.{round}f} {self.units}"})
         if index is not None:
             try:
                 y_true = self.get_y(index)
-                preds_df = preds_df.append(
-                    pd.Series(("Observed", f"{y_true:.{round}f} {self.units}"), 
-                        index=preds_df.columns), ignore_index=True)
-                preds_df = preds_df.append(
-                    pd.Series(("Residual", f"{(y_true-pred):.{round}f} {self.units}"), 
-                        index=preds_df.columns), ignore_index=True)
+                preds_df = append_dict_to_df(preds_df,
+                    {"": "Observed", self.target: f"{y_true:.{round}f} {self.units}"})
+                preds_df = append_dict_to_df(preds_df,
+                    {"": "Residual", self.target: f"{(y_true-pred):.{round}f} {self.units}"})
             except Exception as e:
                 pass
         return preds_df
@@ -3635,10 +3631,10 @@ def graphviz_available(self):
         """ """
         if not hasattr(self, '_graphviz_available'):
             try:
-                import graphviz.backend as be
+                import graphviz.backend.execute as be
                 cmd = ["dot", "-V"]
-                stdout, stderr = be.run(cmd, capture_output=True, check=True, quiet=True)
-            except:
+                be.run_check(cmd, capture_output=True, check=True, quiet=True)
+            except Exception as e:
                 print("""
                 WARNING: you don't seem to have graphviz in your path (cannot run 'dot -V'), 
                 so no dtreeviz visualisation of decision trees will be shown on the shadow trees tab.

diff --git a/requirements.txt b/requirements.txt
@@ -1,14 +1,17 @@
-dash>=2
-dash-bootstrap-components
+click
 dash-auth
+dash-bootstrap-components<1
+dash>=2
+dtreeviz>=1.3
+flask_simplelogin
+graphviz>=0.18.2
+joblib
 jupyter_dash
-dtreeviz>=1.1.4
 numpy
-pandas>=1.2.1
+oyaml
+pandas>=1.1
 scikit-learn
 shap>=0.37
-joblib
-oyaml
-click
 waitress
-flask_simplelogin
+Werkzeug<=2.0.3
+
diff --git a/setup.py b/setup.py
@@ -1,5 +1,9 @@
 from setuptools import setup, find_packages
 
+with open("requirements.txt", "rt") as requirements_file:
+    requirements = list(filter(None, map(str.strip,
+                                         requirements_file.readlines())))
+
 setup(
     name='explainerdashboard',
     version='0.3.8.1',
@@ -68,10 +72,7 @@
         "Intended Audience :: Developers",
         "Intended Audience :: Education",
         "Topic :: Scientific/Engineering :: Artificial Intelligence"],
-    install_requires=['dash>=2', 'dash-bootstrap-components<1', 'jupyter_dash', 'dash-auth',
-                    'dtreeviz>=1.3', 'numpy', 'pandas>=1.1', 'scikit-learn', 
-                    'shap>=0.37', 'joblib', 'oyaml', 'click', 'waitress',
-                    'flask_simplelogin', 'orjson'],
+    install_requires=requirements,
     python_requires='>=3.6',
     entry_points={
         'console_scripts': [