diff --git a/erroranalysis/erroranalysis/_internal/constants.py b/erroranalysis/erroranalysis/_internal/constants.py index 6e0ce53443..bd0c489d72 100644 --- a/erroranalysis/erroranalysis/_internal/constants.py +++ b/erroranalysis/erroranalysis/_internal/constants.py @@ -92,6 +92,29 @@ class Metrics(str, Enum): ERROR_RATE = 'error_rate' +class MetricKeys(str, Enum): + """Provide keys for properties related to metrics. + """ + METRIC_NAME = 'metricName' + METRIC_VALUE = 'metricValue' + + +class RootKeys(str, Enum): + """Provide keys for the root cohort. + """ + METRIC_NAME = MetricKeys.METRIC_NAME.value + METRIC_VALUE = MetricKeys.METRIC_VALUE.value + TOTAL_SIZE = 'totalSize' + ERROR_COVERAGE = 'errorCoverage' + + +class TreeNode(str, Enum): + """Provide the tree node properties. + """ + METRIC_NAME = MetricKeys.METRIC_NAME.value + METRIC_VALUE = MetricKeys.METRIC_VALUE.value + + metric_to_display_name = { Metrics.ACCURACY_SCORE: 'Accuracy score', Metrics.MEAN_ABSOLUTE_ERROR: 'Mean absolute error', diff --git a/erroranalysis/erroranalysis/_internal/matrix_filter.py b/erroranalysis/erroranalysis/_internal/matrix_filter.py index 1e9726f58e..4c316f3ed0 100644 --- a/erroranalysis/erroranalysis/_internal/matrix_filter.py +++ b/erroranalysis/erroranalysis/_internal/matrix_filter.py @@ -11,8 +11,8 @@ from erroranalysis._internal.cohort_filter import filter_from_cohort from erroranalysis._internal.constants import (DIFF, PRED_Y, ROW_INDEX, TRUE_Y, - MatrixParams, Metrics, - ModelTask, + MatrixParams, MetricKeys, + Metrics, ModelTask, metric_to_display_name) from erroranalysis._internal.metrics import (get_ordered_classes, is_multi_agg_metric, @@ -26,8 +26,8 @@ INTERVAL_MIN = 'intervalMin' INTERVAL_MAX = 'intervalMax' MATRIX = 'matrix' -METRIC_VALUE = 'metricValue' -METRIC_NAME = 'metricName' +METRIC_VALUE = MetricKeys.METRIC_VALUE +METRIC_NAME = MetricKeys.METRIC_NAME VALUES = 'values' PRECISION = 100 TP = 'tp' diff --git a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py index 193d7a10ce..0d66c79d7e 100644 --- a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py +++ b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py @@ -15,7 +15,7 @@ PRED_Y, ROW_INDEX, SPLIT_FEATURE, SPLIT_INDEX, TRUE_Y, CohortFilterMethods, - Metrics, ModelTask, + Metrics, ModelTask, TreeNode, error_metrics, f1_metrics, metric_to_display_name, precision_metrics, @@ -620,8 +620,8 @@ def get_json_node(arg, condition, error, nodeid, method, node_name, "size": float(total), "sourceRowKeyHash": "hashkey", # Note: remove this eventually "success": float(success), # Note: remove this eventually - "metricName": metric_name, - "metricValue": float(metric_value), + TreeNode.METRIC_NAME: metric_name, + TreeNode.METRIC_VALUE: float(metric_value), "isErrorMetric": is_error_metric } diff --git a/erroranalysis/erroranalysis/analyzer/error_analyzer.py b/erroranalysis/erroranalysis/analyzer/error_analyzer.py index 4e3430ac18..51041a81b0 100644 --- a/erroranalysis/erroranalysis/analyzer/error_analyzer.py +++ b/erroranalysis/erroranalysis/analyzer/error_analyzer.py @@ -12,9 +12,12 @@ mutual_info_regression) from sklearn.preprocessing import OrdinalEncoder -from erroranalysis._internal.constants import MatrixParams, Metrics, ModelTask +from erroranalysis._internal.constants import (MatrixParams, Metrics, + ModelTask, RootKeys, + metric_to_display_name) from erroranalysis._internal.matrix_filter import \ compute_matrix as _compute_matrix +from erroranalysis._internal.metrics import metric_to_func from erroranalysis._internal.surrogate_error_tree import \ compute_error_tree as _compute_error_tree from erroranalysis._internal.utils import generate_random_unique_indexes @@ -23,6 +26,7 @@ BIN_THRESHOLD = MatrixParams.BIN_THRESHOLD IMPORTANCES_THRESHOLD = 50000 +ROOT_COVERAGE = 100 class BaseAnalyzer(ABC): @@ -298,7 +302,8 @@ def create_error_report(self, max_depth=None, num_leaves=None, min_child_samples=None, - compute_importances=False): + compute_importances=False, + compute_root_stats=False): """Creates the error analysis ErrorReport. The ErrorReport contains the importances, heatmap and tree view json. @@ -316,6 +321,9 @@ def create_error_report(self, :type min_child_samples: int :param compute_importances: If true, computes and adds the correlation of features and the error to the ErrorReport. + :type compute_importances: bool + :param compute_root_stats: If true, computes and adds the root stats. + :type compute_root_stats: bool :return: The computed error analysis ErrorReport. :rtype: dict """ @@ -333,10 +341,14 @@ def create_error_report(self, importances = None if compute_importances: importances = self.compute_importances() + root_stats = None + if compute_root_stats: + root_stats = self.compute_root_stats() return ErrorReport(tree, matrix, tree_features=self.feature_names, matrix_features=filter_features, - importances=importances) + importances=importances, + root_stats=root_stats) def compute_importances(self): """Compute the importances or correlation between features and error. @@ -375,6 +387,32 @@ def compute_importances(self): else: return mutual_info_regression(input_data, diff).tolist() + def compute_root_stats(self): + """Compute the root all data statistics. + + :return: The computed root statistics. + :rtype: dict + """ + if self.metric != Metrics.ERROR_RATE: + metric_func = metric_to_func[self.metric] + metric_value = metric_func(self.pred_y, self.true_y) + else: + total = len(self.true_y) + if total == 0: + metric_value = 0 + else: + diff = self.get_diff() + error = sum(diff) + metric_value = (error / total) * 100 + metric_name = metric_to_display_name[self.metric] + root_stats = { + RootKeys.METRIC_NAME: metric_name, + RootKeys.METRIC_VALUE: metric_value, + RootKeys.TOTAL_SIZE: len(self.true_y), + RootKeys.ERROR_COVERAGE: ROOT_COVERAGE + } + return root_stats + def update_metric(self, metric): """Update the metric used by the error analyzer. @@ -421,6 +459,15 @@ def get_diff(self): """ pass + @abstractmethod + def pred_y(self): + """Abstract method to get the predicted y labels. + + :return: The predicted y labels. + :rtype: numpy.ndarray or list[] or pandas.Series + """ + pass + class ModelAnalyzer(BaseAnalyzer): """ModelAnalyzer Class. @@ -524,6 +571,17 @@ def get_diff(self): else: return self.model.predict(self.dataset) - self.true_y + @property + def pred_y(self): + """Get the computed predicted y values. + + Note for ModelAnalyzer these are computed on the fly. + + :return: The computed predicted y values. + :rtype: numpy.ndarray or list[] or pandas.Series + """ + return self.model.predict(self.dataset) + class PredictionsAnalyzer(BaseAnalyzer): """PredictionsAnalyzer Class. diff --git a/erroranalysis/erroranalysis/report/error_report.py b/erroranalysis/erroranalysis/report/error_report.py index 8daffe6906..aa7570b194 100644 --- a/erroranalysis/erroranalysis/report/error_report.py +++ b/erroranalysis/erroranalysis/report/error_report.py @@ -7,9 +7,11 @@ _ErrorReportVersion1 = '1.0' _ErrorReportVersion2 = '2.0' _ErrorReportVersion3 = '3.0' +_ErrorReportVersion4 = '4.0' _AllVersions = [_ErrorReportVersion1, _ErrorReportVersion2, - _ErrorReportVersion3] + _ErrorReportVersion3, + _ErrorReportVersion4] _VERSION = 'version' TREE = 'tree' @@ -19,6 +21,7 @@ IMPORTANCES = 'importances' ID = 'id' METADATA = 'metadata' +ROOT_STATS = 'root_stats' def json_converter(obj): @@ -57,19 +60,18 @@ def as_error_report(error_dict): return ErrorReport(error_dict[TREE], error_dict[MATRIX], error_dict[ID]) - elif IMPORTANCES not in error_dict: - return ErrorReport(error_dict[TREE], - error_dict[MATRIX], - error_dict[TREE_FEATURES], - error_dict[MATRIX_FEATURES], - id=error_dict[ID]) - else: - return ErrorReport(error_dict[TREE], - error_dict[MATRIX], - error_dict[TREE_FEATURES], - error_dict[MATRIX_FEATURES], - importances=error_dict[IMPORTANCES], - id=error_dict[ID]) + extra_args = {} + if IMPORTANCES in error_dict: + extra_args[IMPORTANCES] = error_dict[IMPORTANCES] + if ROOT_STATS in error_dict: + extra_args[ROOT_STATS] = error_dict[ROOT_STATS] + if ID in error_dict: + extra_args[ID] = error_dict[ID] + return ErrorReport(error_dict[TREE], + error_dict[MATRIX], + error_dict[TREE_FEATURES], + error_dict[MATRIX_FEATURES], + **extra_args) else: return error_dict @@ -93,6 +95,7 @@ def __init__(self, tree_features=None, matrix_features=None, importances=None, + root_stats=None, id=None): """Defines the ErrorReport, which contains the tree and matrix filter. @@ -109,6 +112,8 @@ def __init__(self, :param importances: The feature importances calculated using mutual information with the error on the true labels. :type importances: list[float] + :param root_stats: The statistics for the root all data cohort. + :type root_stats: dict :param id: The unique identifier for the ErrorReport. A new unique id is created if none is specified. :type id: str @@ -119,7 +124,8 @@ def __init__(self, self._tree_features = tree_features self._matrix_features = matrix_features self._importances = importances - self._metadata = {_VERSION: _ErrorReportVersion3} + self._root_stats = root_stats + self._metadata = {_VERSION: _ErrorReportVersion4} @property def __dict__(self): @@ -139,6 +145,8 @@ def __dict__(self): METADATA: self._metadata} if self._importances is not None: error_report_dict[IMPORTANCES] = self._importances + if self._root_stats is not None: + error_report_dict[ROOT_STATS] = self._root_stats return error_report_dict @property @@ -197,6 +205,19 @@ def importances(self): """ return self._importances + @property + def root_stats(self): + """Returns the root cohort statistics. + + The root cohort statistics are displayed for both the + heatmap and tree view. They include the metric name + and value for the all data cohort. + + :return: The root all data cohort statistics. + :rtype: dict + """ + return self._root_stats + @property def id(self): """Returns the unique identifier for this ErrorReport. diff --git a/erroranalysis/tests/test_error_report.py b/erroranalysis/tests/test_error_report.py index 60a4c1b648..d9be0ea3cd 100644 --- a/erroranalysis/tests/test_error_report.py +++ b/erroranalysis/tests/test_error_report.py @@ -107,6 +107,7 @@ def run_error_analyzer(model, X_test, y_test, feature_names, assert ea_deserialized.tree_features == report1.tree_features assert ea_deserialized.matrix_features == report1.matrix_features assert ea_deserialized.importances == report1.importances + assert ea_deserialized.root_stats == report1.root_stats # validate error report does not modify original dataset in ModelAnalyzer if isinstance(X_test, pd.DataFrame): diff --git a/erroranalysis/tests/test_root_stats.py b/erroranalysis/tests/test_root_stats.py new file mode 100644 index 0000000000..a21552c3cc --- /dev/null +++ b/erroranalysis/tests/test_root_stats.py @@ -0,0 +1,117 @@ +# Copyright (c) Microsoft Corporation +# Licensed under the MIT License. + +import pytest +from common_utils import (create_binary_classification_dataset, + create_cancer_data, create_housing_data, + create_iris_data, create_models_classification, + create_models_regression, create_simple_titanic_data, + create_titanic_pipeline) + +from erroranalysis._internal.constants import (Metrics, RootKeys, + metric_to_display_name) +from erroranalysis._internal.error_analyzer import ModelAnalyzer +from erroranalysis._internal.metrics import metric_to_func + +TOL = 1e-10 + + +class TestRootStats(object): + + @pytest.mark.parametrize('metric', [Metrics.ERROR_RATE, + Metrics.MACRO_PRECISION_SCORE, + Metrics.MICRO_PRECISION_SCORE, + Metrics.MACRO_RECALL_SCORE, + Metrics.MICRO_RECALL_SCORE, + Metrics.MACRO_F1_SCORE, + Metrics.MICRO_F1_SCORE, + Metrics.ACCURACY_SCORE]) + def test_importances_iris(self, metric): + X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data() + + models = create_models_classification(X_train, y_train) + + for model in models: + categorical_features = [] + run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features, metric) + + @pytest.mark.parametrize('metric', [Metrics.ERROR_RATE, + Metrics.PRECISION_SCORE, + Metrics.RECALL_SCORE, + Metrics.ACCURACY_SCORE, + Metrics.F1_SCORE]) + def test_importances_cancer(self, metric): + X_train, X_test, y_train, y_test, feature_names, _ = \ + create_cancer_data() + + models = create_models_classification(X_train, y_train) + + for model in models: + categorical_features = [] + run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features, metric) + + @pytest.mark.parametrize('metric', [Metrics.ERROR_RATE, + Metrics.PRECISION_SCORE, + Metrics.RECALL_SCORE, + Metrics.ACCURACY_SCORE, + Metrics.F1_SCORE]) + def test_importances_binary_classification(self, metric): + X_train, y_train, X_test, y_test, _ = \ + create_binary_classification_dataset() + feature_names = list(X_train.columns) + models = create_models_classification(X_train, y_train) + + for model in models: + categorical_features = [] + run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features, metric=metric) + + def test_importances_titanic(self): + X_train, X_test, y_train, y_test, numeric, categorical = \ + create_simple_titanic_data() + feature_names = categorical + numeric + clf = create_titanic_pipeline(X_train, y_train) + categorical_features = categorical + run_error_analyzer(clf, X_test, y_test, feature_names, + categorical_features) + + @pytest.mark.parametrize('metric', [Metrics.MEAN_SQUARED_ERROR, + Metrics.MEAN_ABSOLUTE_ERROR]) + def test_importances_housing(self, metric): + X_train, X_test, y_train, y_test, feature_names = \ + create_housing_data() + models = create_models_regression(X_train, y_train) + + for model in models: + categorical_features = [] + run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features, metric) + + +def run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features, metric=Metrics.ERROR_RATE): + model_analyzer = ModelAnalyzer(model, X_test, y_test, + feature_names, + categorical_features, + metric=metric) + root_stats = model_analyzer.compute_root_stats() + metric_name = metric_to_display_name[metric] + + total = len(X_test) + + assert root_stats[RootKeys.METRIC_NAME] == metric_name + assert root_stats[RootKeys.TOTAL_SIZE] == total + assert root_stats[RootKeys.ERROR_COVERAGE] == 100 + + if metric == Metrics.ERROR_RATE: + diff = model_analyzer.get_diff() + error = sum(diff) + metric_value = (error / total) * 100 + else: + metric_func = metric_to_func[metric] + metric_value = metric_func(model_analyzer.pred_y, + model_analyzer.true_y) + + assert root_stats[RootKeys.METRIC_VALUE] == metric_value