In [1]:
import pandas as pd
import numpy as np

from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

## Add method to generate selector for mock data

In [2]:
def gen_global_selector(ebm):
    records = []

    for feature_group_index, feature_indexes in enumerate(
        ebm.feature_groups_
    ):
        record = {}
        record["Name"] = ebm.feature_names[feature_indexes[0]]
        record["Type"] = ebm.feature_types[feature_indexes[0]]
        records.append(record)

    columns = ["Name", "Type"]
    df = pd.DataFrame.from_records(records, columns=columns)
    return df

## Add Mock Preprocessor Object (EBMPreprocessor not exposed in interpret)

In [3]:
class AwesomePreprocessor:
    pass

def _get_bin_labels(self, feature_index):
    min_val = self.col_min_[feature_index]
    cuts = self.col_bin_edges_[feature_index]
    max_val = self.col_max_[feature_index]
    return list(np.concatenate(([min_val], cuts, [max_val])))

def _get_hist_edges(self, feature_index):
    return list(self.hist_edges_[feature_index])

def _get_hist_counts(self, feature_index):
    return list(self.hist_counts_[feature_index])

setattr(AwesomePreprocessor, '_get_bin_labels', _get_bin_labels)
setattr(AwesomePreprocessor, '_get_hist_edges', _get_hist_edges)
setattr(AwesomePreprocessor, '_get_hist_counts', _get_hist_counts)

preprocessor = AwesomePreprocessor()
preprocessor.col_bin_edges_ = {}
preprocessor.col_bin_edges_[0] = np.array([2, 3])
preprocessor.col_bin_edges_[1] = np.array([20, 25, 40])
preprocessor.col_min_ = {}
preprocessor.col_min_[0] = np.float64(1.0)
preprocessor.col_min_[1] = np.float64(10)
preprocessor.col_max_ = {}
preprocessor.col_max_[0] = np.float64(5)
preprocessor.col_max_[1] = np.float64(55)
preprocessor.col_types_ = ['continuous', 'continuous']
preprocessor.hist_edges_ = {}
preprocessor.hist_edges_[0] = np.array([])
preprocessor.hist_edges_[1] = np.array([])
preprocessor.hist_counts_ = {}
preprocessor.hist_counts_[0] = np.array([])
preprocessor.hist_counts_[1] = np.array([])

## Create EBM Classifier with all mock data

In [4]:
ebm = ExplainableBoostingClassifier()

ebm.additive_terms_ = list()
ebm.additive_terms_.append(np.array([0, 1.0, -1.0, 2.0]))
ebm.additive_terms_.append(np.array([0, -2.0, 10, 0, 7]))

ebm.term_standard_deviations_ = list()
ebm.term_standard_deviations_.append(np.array([0, .2, .1, .3]))
ebm.term_standard_deviations_.append(np.array([0, .2, .1, .3, .5]))

ebm.classes_ = np.array([0, 1])

ebm.feature_names = ['feature_1', 'feature_2']
ebm.feature_types = ['continuous', 'continuous']
ebm.feature_groups_ = [[0], [1]]
ebm.feature_importances_ = [np.float64(.45), np.float64(.63)]

ebm.preprocessor_ = preprocessor
ebm.global_selector = gen_global_selector(ebm)

ebm.has_fitted_ = True




## Does show() work with mocked EBM Classifier?

In [5]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

## Declare `_VisualizationModelWrap`

In [6]:
import pandas as pd
import numpy as np
from typing import List
from interpret.glassbox import ExplainableBoostingClassifier as E
import inspect

class _VisualizationModelWrap:
    def __init__(self, inner_model):
        self._inner_model = inner_model

    @property
    def num_features(self) -> int:
        #return self._inner_model.numFeatures
        return 2

    @property
    def feature_names(self) -> List[str]:
         # return self._inner_model.featureNames
        return ['feature_1', 'feature_2']

    @property
    def feature_types(self) -> List[str]:
        # InterpretML has 'categorical', 'ordinal' and 'continuous', but you just have continuous so I think this will
        # be a list with |feature| 'continuous' strings for our purposes
        return ['continuous', 'continuous']

    @property
    def feature_importances(self) -> List[float]:
        # TODO: use T-EBMs bin counts and bin weights to calculate feature importances
        return [0.45, 0.63]

    def _gen_global_selector(self):
        """ Generates a Pandas DataFrame from used to render the selector section of a visualization that allows the
            user to choose different visualizations.

        Returns:
            A Pandas DataFrame used to render the selector section of a visualization that allows the user to choose
            different visualizations.
        """
        records = []

        for feature_idx in range(self.num_features):
            record = {}
            record["Name"] = self.feature_names[feature_idx]
            record["Type"] = self.feature_types[feature_idx]
            records.append(record)

        columns = ["Name", "Type"]
        df = pd.DataFrame.from_records(records, columns=columns)
        return df

    def _get_bounds(self):
        # calculate minimum and maximum of all the model scores; InterpretML implementation in comments below

        # Obtain min/max for model scores
#         lower_bound = np.inf
#         upper_bound = -np.inf
#         for feature_group_index, _ in enumerate(self.feature_groups_):
#             errors = self.term_standard_deviations_[feature_group_index]
#             scores = self.additive_terms_[feature_group_index]
#
#             lower_bound = min(lower_bound, np.min(scores - errors))
#             upper_bound = max(upper_bound, np.max(scores + errors))
#
#         bounds = (lower_bound, upper_bound)

        return (-2.2, 10.1)

    def _get_values_for_bin(self, feature_idx) -> List[float]:
        # self._inner_model.valuesForBin(feature_idx)
        if feature_idx == 0:
            return [1.0, -1.0, 2.0]
        else:
            return [-2.0, 10, 0, 7]

    def _get_stddev_for_bin(self, feature_idx) -> List[float]:
        # self._inner_model.stddevForBin(feature_idx)
        if feature_idx == 0:
            return [.2, .1, .3]
        else:
            return [.2, .1, .3, .5]

    def _get_feature_min(self, feature_idx) -> float:
        # self._inner_model.featureMins(feature_idx)
        if feature_idx == 0:
            return 1.0
        else:
            return 10.0

    def _get_feature_max(self, feature_idx) -> float:
        # self._inner_model.featureMaxes(feature_idx)
        if feature_idx == 0:
            return 5.0
        else:
            return 55.0

    def _get_bin_thresholds(self, feature_idx) -> List[float]:
        # self._inner_model.binThresholds(feature_idx)
        if feature_idx == 0:
            return [2.0, 3.0]
        else:
            return [20.0, 25.0, 40.0]

    def _get_bin_labels(self, feature_idx) -> List[float]:
        feature_type = self.feature_types[feature_idx]
        if feature_type == "continuous":
            min_val = self._get_feature_min(feature_idx)
            cuts = self._get_bin_thresholds(feature_idx)
            max_val = self._get_feature_max(feature_idx)
            return list(np.concatenate(([min_val], cuts, [max_val])))
        else:  # pragma: no cover
            raise Exception("Unknown feature type")

    def _create_internal_object(self):
        data_dicts = []
        feature_list = []

        for feature_idx in range(self.num_features):
            model_graph = np.array(self._get_values_for_bin(feature_idx))
            errors = np.array(self._get_stddev_for_bin(feature_idx))

            bin_labels = self._get_bin_labels(feature_idx)
            scores = list(model_graph)
            upper_bounds = list(model_graph + errors)
            lower_bounds = list(model_graph - errors)

            feature_dict = {
                "type": "univariate",
                "names": bin_labels,
                "scores": scores,
                "scores_range": self._get_bounds(),
                "upper_bounds": upper_bounds,
                "lower_bounds": lower_bounds,
            }

            feature_list.append(feature_dict)

            data_dict = {
                "type": "univariate",
                "names": bin_labels,
                "scores": model_graph,
                "scores_range": self._get_bounds(),
                "upper_bounds": model_graph + errors,
                "lower_bounds": model_graph - errors
            }

            if type(self) is _ClassificationWrap:
                data_dict["meta"] = {
                    "label_names": self.classes
                }

            data_dicts.append(data_dict)

        overall_dict = {
            "type": "univariate",
            "names": self.feature_names,
            "scores": self.feature_importances,
        }

        internal_obj = {
            "overall": overall_dict,
            "specific": data_dicts,
            "mli": [
                {
                    "explanation_type": "ebm_global",
                    "value": {"feature_list": feature_list}
                }
            ]
        }

        return internal_obj


    def explain_global(self, name=None):
        """ Provides global explanation for model.

        Args:
            name: User-defined explanation name.

        Returns:
            An explanation object,
            visualizing feature-value pairs as horizontal bar chart.
        """
        if name is None:
            name = "an intelligent default name for the explanation like 'EBM'"

        return inspect.getmodule(E).EBMExplanation(
            "global",
            self._create_internal_object(),
            feature_names=self.feature_names,
            feature_types=self.feature_types,
            name=name,
            selector=self._gen_global_selector()
        )

class _RegressionWrap(_VisualizationModelWrap):
    def __init__(self, inner_model):
        super().__init__(inner_model)

class _ClassificationWrap(_VisualizationModelWrap):
    def __init__(self, inner_model):
        super().__init__(inner_model)

    @property
    def classes(self) -> List[int]:
         # return self._inner_model.?? -- we might need to implement this
        return [0, 1]


## Does show() work with mocked `_VisualizationModelWrap`

In [8]:
mocked_viz_wrap = _ClassificationWrap(None)
ebm_global = mocked_viz_wrap.explain_global(name='EBM')
show(ebm_global)