Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… into master
  • Loading branch information
oegedijk committed Apr 1, 2022
2 parents 9bcb491 + 1d926d8 commit 518cefc
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 53 deletions.
65 changes: 41 additions & 24 deletions explainerdashboard/explainer_methods.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__all__ = [
'IndexNotFoundError',
'append_dict_to_df',
'safe_isinstance',
'guess_shap',
'mape_score',
Expand Down Expand Up @@ -50,6 +51,22 @@
from joblib import Parallel, delayed


def append_dict_to_df(df, row_dict):
"""Appends a row to the dataframe 'df' and returns the new
dataframe.
Args:
df (pd.DataFrame) data frame
row_dict (dict): row data
Returns:
pd.DataFrame
"""
return pd.concat([df, pd.DataFrame([row_dict])],
ignore_index=True)


class IndexNotFoundError(Exception):
def __init__(self, message="Index not Found", index=None):
if index is not None:
Expand Down Expand Up @@ -1157,8 +1174,8 @@ def get_contrib_summary_df(contrib_df, model_output="raw", round=2, units="", na
else:
effect += str(np.round(row['contribution'], round)) + f" {units}"

contrib_summary_df = contrib_summary_df.append(
dict(Reason=reason, Effect=effect), ignore_index=True)
contrib_summary_df = append_dict_to_df(contrib_summary_df,
dict(Reason=reason, Effect=effect))

return contrib_summary_df.reset_index(drop=True)

Expand Down Expand Up @@ -1225,7 +1242,7 @@ def node_pred_proba(node):
return node.class_counts()[pos_label]/ sum(node.class_counts())
for node in nodes:
if not node.isleaf():
decisiontree_df = decisiontree_df.append({
decisiontree_df = append_dict_to_df(decisiontree_df, {
'node_id' : node.id,
'average' : node_pred_proba(node),
'feature' : node.feature_name(),
Expand All @@ -1237,14 +1254,14 @@ def node_pred_proba(node):
'diff' : node_pred_proba(node.left) - node_pred_proba(node) \
if observation[node.feature_name()] < node.split() \
else node_pred_proba(node.right) - node_pred_proba(node)
}, ignore_index=True)
})

else:
def node_mean(node):
return decision_tree.tree_model.tree_.value[node.id].item()
for node in nodes:
if not node.isleaf():
decisiontree_df = decisiontree_df.append({
decisiontree_df = append_dict_to_df(decisiontree_df, {
'node_id' : node.id,
'average' : node_mean(node),
'feature' : node.feature_name(),
Expand All @@ -1256,7 +1273,7 @@ def node_mean(node):
'diff' : node_mean(node.left) - node_mean(node) \
if observation[node.feature_name()] < node.split() \
else node_mean(node.right) - node_mean(node)
}, ignore_index=True)
})
return decisiontree_df


Expand Down Expand Up @@ -1284,35 +1301,35 @@ def get_decisiontree_summary_df(decisiontree_df, classifier=False, round=2, unit


decisiontree_summary_df = pd.DataFrame(columns=['Feature', 'Condition', 'Adjustment', 'New Prediction'])
decisiontree_summary_df = decisiontree_summary_df.append({
decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
'Feature' : "",
'Condition' : "",
'Adjustment' : "Starting average",
'New Prediction' : str(np.round(base_value, round)) + ('%' if classifier else f' {units}')
}, ignore_index=True)
})

for _, row in decisiontree_df.iterrows():
if classifier:
decisiontree_summary_df = decisiontree_summary_df.append({
decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
'Feature' : row['feature'],
'Condition' : str(row['value']) + str(' >= ' if row['direction'] == 'right' else ' < ') + str(row['split']).ljust(10),
'Adjustment' : str('+' if row['diff'] >= 0 else '') + str(np.round(100*row['diff'], round)) +'%',
'New Prediction' : str(np.round(100*(row['average']+row['diff']), round)) + '%'
}, ignore_index=True)
})
else:
decisiontree_summary_df = decisiontree_summary_df.append({
decisiontree_summary_df = append_dict_to_df(decisiontree_summary_df, {
'Feature' : row['feature'],
'Condition' : str(row['value']) + str(' >= ' if row['direction'] == 'right' else ' < ') + str(row['split']).ljust(10),
'Adjustment' : str('+' if row['diff'] >= 0 else '') + str(np.round(row['diff'], round)),
'New Prediction' : str(np.round((row['average']+row['diff']), round)) + f" {units}"
}, ignore_index=True)
})

decisiontree_summary_df = decisiontree_summary_df.append({
decisiontree_summary_df = append_dict_to_df( decisiontree_summary_df, {
'Feature' : "",
'Condition' : "",
'Adjustment' : "Final Prediction",
'New Prediction' : str(np.round(prediction, round)) + ('%' if classifier else '') + f" {units}"
}, ignore_index=True)
})

return decisiontree_summary_df

Expand Down Expand Up @@ -1379,20 +1396,20 @@ def get_xgboost_path_df(xgbmodel, X_row, n_tree=None):

node = node_dict[0]
while not node['is_leaf']:
prediction_path_df = prediction_path_df.append(
prediction_path_df = append_dict_to_df(prediction_path_df,
dict(
node=node['node'],
feature=node['feature'],
cutoff=node['cutoff'],
value=float(X_row[node['feature']])
), ignore_index=True)
))
if np.isnan(X_row[node['feature']]) or X_row[node['feature']] < node['cutoff']:
node = node_dict[node['left_node']]
else:
node = node_dict[node['right_node']]

if node['is_leaf']:
prediction_path_df = prediction_path_df.append(dict(node=node['node'], feature="_PREDICTION", value=node['leaf_value']), ignore_index=True)
prediction_path_df = append_dict_to_df(prediction_path_df, dict(node=node['node'], feature="_PREDICTION", value=node['leaf_value']))
return prediction_path_df


Expand All @@ -1411,25 +1428,25 @@ def get_xgboost_path_summary_df(xgboost_path_df, output="margin"):

for row in xgboost_path_df.itertuples():
if row.feature == "_PREDICTION":
xgboost_path_summary_df = xgboost_path_summary_df.append(
xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
dict(
node=row.node,
split_condition=f"prediction ({output}) = {row.value}"
), ignore_index=True
)
)
)
elif row.value < row.cutoff:
xgboost_path_summary_df = xgboost_path_summary_df.append(
xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
dict(
node=row.node,
split_condition=f"{row.feature} = {row.value} < {row.cutoff}"
), ignore_index=True
)
)
else:
xgboost_path_summary_df = xgboost_path_summary_df.append(
xgboost_path_summary_df = append_dict_to_df(xgboost_path_summary_df,
dict(
node=row.node,
split_condition=f"{row.feature} = {row.value} >= {row.cutoff}"
), ignore_index=True
)
)
return xgboost_path_summary_df

Expand Down
30 changes: 13 additions & 17 deletions explainerdashboard/explainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,10 +1191,9 @@ def size_to_string(num, suffix='B'):

memory_df = pd.DataFrame(columns=['property', 'type', 'bytes', 'size'])
for k, v in self.__dict__.items():
memory_df = memory_df.append(dict(
memory_df = append_dict_to_df(memory_df, dict(
property=f"self.{k}", type=v.__class__.__name__,
bytes=get_size(v), size=size_to_string(get_size(v))),
ignore_index=True)
bytes=get_size(v), size=size_to_string(get_size(v))))

print("Explainer total memory usage (approximate): ",
size_to_string(memory_df.bytes.sum()), flush=True)
Expand Down Expand Up @@ -2390,10 +2389,10 @@ def set_shap_values(self, base_value:List[float], shap_values:List):
raise ValueError(f"Expected shap values to have {len(self.X)} rows!")
if sv.shape[1] != len(self.original_cols):
raise ValueError(f"Expected shap values to have {len(self.original_columns)} columns!")
self._shap_values_df.append(
self._shap_values_df = pd.concat([self._shap_values_df,
merge_categorical_shap_values(
pd.DataFrame(sv, columns=self.columns),
self.onehot_dict, self.merged_cols).astype(self.precision)
self.onehot_dict, self.merged_cols).astype(self.precision)]
)
if len(self.labels) == 2:
self._shap_values_df = self._shap_values_df[1]
Expand Down Expand Up @@ -3343,18 +3342,15 @@ def prediction_result_df(self, index=None, X_row=None, round=3):
X_row = X_row.values.astype("float32")
pred = self.model.predict(X_row).item()
preds_df = pd.DataFrame(columns = ["", self.target])
preds_df = preds_df.append(
pd.Series(("Predicted", f"{pred:.{round}f} {self.units}"),
index=preds_df.columns), ignore_index=True)
preds_df = append_dict_to_df(preds_df,
{"": "Predicted", self.target: f"{pred:.{round}f} {self.units}"})
if index is not None:
try:
y_true = self.get_y(index)
preds_df = preds_df.append(
pd.Series(("Observed", f"{y_true:.{round}f} {self.units}"),
index=preds_df.columns), ignore_index=True)
preds_df = preds_df.append(
pd.Series(("Residual", f"{(y_true-pred):.{round}f} {self.units}"),
index=preds_df.columns), ignore_index=True)
preds_df = append_dict_to_df(preds_df,
{"": "Observed", self.target: f"{y_true:.{round}f} {self.units}"})
preds_df = append_dict_to_df(preds_df,
{"": "Residual", self.target: f"{(y_true-pred):.{round}f} {self.units}"})
except Exception as e:
pass
return preds_df
Expand Down Expand Up @@ -3635,10 +3631,10 @@ def graphviz_available(self):
""" """
if not hasattr(self, '_graphviz_available'):
try:
import graphviz.backend as be
import graphviz.backend.execute as be
cmd = ["dot", "-V"]
stdout, stderr = be.run(cmd, capture_output=True, check=True, quiet=True)
except:
be.run_check(cmd, capture_output=True, check=True, quiet=True)
except Exception as e:
print("""
WARNING: you don't seem to have graphviz in your path (cannot run 'dot -V'),
so no dtreeviz visualisation of decision trees will be shown on the shadow trees tab.
Expand Down
19 changes: 11 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
dash>=2
dash-bootstrap-components
click
dash-auth
dash-bootstrap-components<1
dash>=2
dtreeviz>=1.3
flask_simplelogin
graphviz>=0.18.2
joblib
jupyter_dash
dtreeviz>=1.1.4
numpy
pandas>=1.2.1
oyaml
pandas>=1.1
scikit-learn
shap>=0.37
joblib
oyaml
click
waitress
flask_simplelogin
Werkzeug<=2.0.3

9 changes: 5 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from setuptools import setup, find_packages

with open("requirements.txt", "rt") as requirements_file:
requirements = list(filter(None, map(str.strip,
requirements_file.readlines())))

setup(
name='explainerdashboard',
version='0.3.8.1',
Expand Down Expand Up @@ -68,10 +72,7 @@
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Topic :: Scientific/Engineering :: Artificial Intelligence"],
install_requires=['dash>=2', 'dash-bootstrap-components<1', 'jupyter_dash', 'dash-auth',
'dtreeviz>=1.3', 'numpy', 'pandas>=1.1', 'scikit-learn',
'shap>=0.37', 'joblib', 'oyaml', 'click', 'waitress',
'flask_simplelogin', 'orjson'],
install_requires=requirements,
python_requires='>=3.6',
entry_points={
'console_scripts': [
Expand Down

0 comments on commit 518cefc

Please sign in to comment.