# Error analysis

> Understand your model's shortcomings.

In [None]:
# | default_exp error_analysis.error_analysis

In [None]:
# | hide
from nbdev.showdoc import *

In [None]:
# | export

from typing import Optional, Sequence, Union, Dict

import numpy as np
import pandas as pd
from sklearn.inspection import permutation_importance
from sklearn.base import BaseEstimator

from poniard.preprocessing import PoniardPreprocessor
from poniard.utils.utils import get_kwargs, non_default_repr
from poniard.utils.estimate import element_to_list_maybe, get_target_info
from poniard.estimators.core import PoniardBaseEstimator

Inspecting which classes or target ranges a model struggles with the most is a vital step in the model building iterative process. `ErrorAnalyzer` aims at streamlining this process.

In [None]:
# | export


class ErrorAnalyzer:
    """An error analyzer for predictive models.

    Compare ground truth and predicted target and rank the largest deviations
    (either by probabilities for classifiers and actual values for regressors).

    This class is tightly integrated with `PoniardBaseEstimator`, but does not require it.

    Parameters
    ----------
    task :
        The machine learning task. Either 'regression' or 'classification'.
    """

    def __init__(self, task: str):
        self._init_params = get_kwargs()
        self.task = task
        self._poniard: Optional["PoniardBaseEstimator"] = None

    @property
    def _has_poniard(self):
        return True if self._poniard is not None else False

    @classmethod
    def from_poniard(
        cls, poniard: "PoniardBaseEstimator", estimator_names: Union[str, Sequence[str]]
    ):
        """Use a Poniard instance to instantiate `ErrorAnalyzer`.

        Automatically sets the task and gives access to the underlying data.

        Parameters
        ----------
        poniard :
            A `PoniardClassifier` or `PoniardRegressor` instance.
        estimator_names :
            Array of estimators for which to compute errors.

        Returns
        -------
        ErrorAnalyzer :
            An instance of the class.
        """
        error_analysis = cls(task=poniard.poniard_task)
        error_analysis._poniard = poniard
        error_analysis.estimator_names = element_to_list_maybe(estimator_names)
        error_analysis.type_of_target = poniard.target_info["type_"]
        return error_analysis

    def _compute_predictions(self):
        """Compute predictions for Poniard estimators."""
        predictions = self._poniard.predict(estimator_names=self.estimator_names)
        probas = None
        if self.type_of_target in ["binary", "multilabel-indicator", "multiclass"]:
            probas = self._poniard.predict_proba(estimator_names=self.estimator_names)
        return predictions, probas

    def rank_errors(
        self,
        y: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
        predictions: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
        probas: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
        exclude_correct: bool = True,
    ):
        """Compare the `y` ground truth with `predictions` and `probas` and sort by the largest deviations.

        If `ErrorAnalyzer.from_poniard` was used, no data needs to be passed to this method.

        In this context "error" refers to:

        * misclassified samples in binary and multiclass problems.
        * misclassified samples in any of the labels for multilabel problems.
        * samples with predicted values outside the `truth - 1SD <-> truth + 1SD`
        range for regression problems.
        * samples with predicted values outside the `truth - 1SD <-> truth + 1SD`
        range in any of the targets for multioutput regression problems.

        Parameters
        ----------
        y :
            Ground truth target.
        predictions :
            Predicted target.
        probas :
            Predicted probabilities for each class in classification tasks.
        exclude_correct :
            Whether to exclude correctly predicted samples in the output ranking. Default True.

        Returns
        -------
        Dict
            Ranked errors
        """
        if self._has_poniard:
            y = self._poniard.y
            predictions, probas = self._compute_predictions()
            type_of_target = self.type_of_target
            ranked_errors = {}
            for estimator in self.estimator_names:
                proc_probas = probas[estimator] if probas is not None else probas
                estimator_errors = self._target_redirect(type_of_target)(
                    y, predictions[estimator], proc_probas, exclude_correct
                )
                ranked_errors.update({estimator: estimator_errors})
            return ranked_errors
        else:
            self.type_of_target = get_target_info(y, task=self.task)["type_"]
            return self._target_redirect(self.type_of_target)(
                y, predictions, probas, exclude_correct
            )

    def _target_redirect(self, type_of_target: str):
        """A router for error ranking depending on the type of the target."""
        if type_of_target == "binary":
            return self._rank_errors_binary
        elif type_of_target == "multiclass":
            return self._rank_errors_multiclass
        elif type_of_target == "multilabel-indicator":
            return self._rank_errors_multilabel
        elif type_of_target == "continuous":
            return self._rank_errors_continuous
        elif type_of_target == "continuous-multioutput":
            return self._rank_errors_continuous_multioutput
        else:
            raise NotImplementedError("Type of target could not be determined.")

    def _rank_errors_binary(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        predictions: Union[np.ndarray, pd.Series, pd.DataFrame],
        probas: Union[np.ndarray, pd.Series, pd.DataFrame],
        exclude_correct: bool = True,
    ):
        errors = pd.DataFrame(
            {
                "y": y,
                "prediction": predictions,
                "proba_0": probas[:, 0],
                "proba_1": probas[:, 1],
            }
        )
        if exclude_correct:
            errors = errors.query("y != prediction")
        errors = errors.assign(error=(errors["y"] - errors["proba_1"]).abs())
        ranked_errors = errors.sort_values("error", ascending=False)
        return {"values": ranked_errors, "idx": ranked_errors.index}

    def _rank_errors_multiclass(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        predictions: Union[np.ndarray, pd.Series, pd.DataFrame],
        probas: Union[np.ndarray, pd.Series, pd.DataFrame],
        exclude_correct: bool = True,
    ):
        data = {"y": y, "prediction": predictions}
        data.update({f"proba_{i}": probas[:, i] for i in range(len(np.unique(y)))})
        errors = pd.DataFrame(data)
        if exclude_correct:
            errors = errors.query("y != prediction")
        errors = errors.assign(
            truth_proba=[
                errors[f"proba_{truth}"].iloc[idx]
                for idx, truth in enumerate(errors["y"])
            ]
        )
        errors = errors.assign(error=(1 - errors["truth_proba"]).abs())
        ranked_errors = errors.sort_values("error", ascending=False)
        return {"values": ranked_errors, "idx": ranked_errors.index}

    def _rank_errors_multilabel(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        predictions: Union[np.ndarray, pd.Series, pd.DataFrame],
        probas: Union[np.ndarray, pd.Series, pd.DataFrame],
        exclude_correct: bool = True,
    ):
        truth = pd.DataFrame(y, columns=[f"y_{i}" for i in range(y.shape[1])])
        preds = pd.DataFrame(
            predictions,
            columns=[f"prediction_{i}" for i in range(y.shape[1])],
        )
        pro = pd.DataFrame(probas, columns=[f"proba_{i}" for i in range(y.shape[1])])
        errors = pd.concat([truth, preds, pro], axis=1)
        if exclude_correct:
            errors = errors.loc[~preds.eq(y).all(axis=1)]
        errors_per_label = (1 - errors[pro.columns]).abs()
        last = lambda x: x[-1]
        zero_array = np.zeros_like(errors_per_label)
        errors_per_label = errors_per_label.mask(
            errors[truth.columns]
            .rename(columns=last)
            .eq(errors[preds.columns].rename(columns=last))
            .values,
            zero_array,
        )
        errors_per_label.columns = [f"error_{i}" for i in range(y.shape[1])]
        errors_per_label = errors_per_label.assign(error=errors_per_label.mean(axis=1))
        errors = pd.concat([errors, errors_per_label], axis=1)
        ranked_errors = errors.sort_values("error", ascending=False)
        return {"values": ranked_errors, "idx": ranked_errors.index}

    def _rank_errors_continuous(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        predictions: Union[np.ndarray, pd.Series, pd.DataFrame],
        probas=None,
        exclude_correct: bool = True,
    ):
        errors = pd.DataFrame({"y": y, "prediction": predictions})
        y_std = np.std(y)
        if exclude_correct:
            errors = errors.query("prediction < y - @y_std | prediction > y + @y_std")
        errors = errors.assign(error=(errors["y"] - errors["prediction"]).abs())
        ranked_errors = errors.sort_values("error", ascending=False)
        return {"values": ranked_errors, "idx": ranked_errors.index}

    def _rank_errors_continuous_multioutput(
        self,
        y: Union[np.ndarray, pd.Series, pd.DataFrame],
        predictions: Union[np.ndarray, pd.Series, pd.DataFrame],
        probas=None,
        exclude_correct: bool = True,
    ):
        truth = pd.DataFrame(y, columns=[f"y_{i}" for i in range(y.shape[1])])
        preds = pd.DataFrame(
            predictions,
            columns=[f"prediction_{i}" for i in range(y.shape[1])],
        )
        errors = pd.concat([truth, preds], axis=1)
        if exclude_correct:
            y_std = np.std(y, axis=0)
            errors = errors.loc[
                (
                    (preds.values < truth.values - y_std)
                    | (preds.values > truth.values + y_std)
                ).any(axis=1)
            ].loc[lambda x: ~x.index.duplicated()]
        errors_per_target = pd.DataFrame(
            np.abs(errors[truth.columns].values - errors[preds.columns].values)
        ).set_index(errors.index)
        errors_per_target.columns = [f"error_{i}" for i in range(y.shape[1])]
        errors_per_target = errors_per_target.assign(
            error=errors_per_target.mean(axis=1)
        )
        errors = pd.concat([errors, errors_per_target], axis=1)
        ranked_errors = errors.sort_values("error", ascending=False)
        return {"values": ranked_errors, "idx": ranked_errors.index}

    @staticmethod
    def merge_errors(errors: Dict[str, Dict[str, Union[pd.DataFrame, pd.Series]]]):
        """Merge multiple error rankings. This is particularly useful when evaluating multiple estimators.

        Compute how many estimators had the specific error and the average error between them.

        This method works best when using `ErrorAnalyzer.from_poniard`, since `errors` can be
        the output of `ErrorAnalyzer.rank_errors`. However, this is not required; as long as
        `errors` is properly defined (`{str: {str: pd.DataFrame, str: pd.Series}}`)

        Parameters
        ----------
        errors :
            Dictionary of errors and error indexes.

        Returns
        -------
        Dict
            Merged errors
        """
        concatenated = pd.concat(
            [
                error_dict["values"].assign(estimator=estimator)
                for estimator, error_dict in errors.items()
            ]
        ).reset_index()
        concatenated = (
            concatenated.groupby("index")
            .agg(
                mean_error=pd.NamedAgg(column="error", aggfunc=np.mean),
                freq=pd.NamedAgg(column="error", aggfunc=np.size),
                estimators=pd.NamedAgg(column="estimator", aggfunc=lambda x: list(x)),
            )
            .sort_values(["freq", "mean_error"], ascending=False)
        )
        return {"values": concatenated, "idx": concatenated.index}

    def analyze_target(
        self,
        errors_idx: pd.Series,
        y: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
        reg_bins: int = 5,
        as_ratio: bool = False,
        wrt_target: bool = False,
    ):
        """Analyze which target classes/ranges have the most errors and compare with observed
        target distribution.

        Parameters
        ----------
        errors_idx :
            Index of ranked errors.
        y :
            Ground truth. Not needed if using `ErrorAnalyzer.from_poniard`.
        reg_bins :
            Number of bins in which to place ground truth targets for regression tasks.
        as_ratio :
            Whether to show error ratios instead of error counts per class/bin. Default False.
        wrt_target :
            Whether to compute counts of errors or error ratios with respect
            to the ground truth. Default False.

        Returns
        -------
        pd.DataFrame
            Counts per error.
        """
        type_of_target = self.type_of_target
        if self._has_poniard:
            y = self._poniard.y
        y = pd.DataFrame(y)
        y_errors = y.iloc[errors_idx]

        if type_of_target in ["binary", "multiclass", "multilabel-indicator"]:
            target_names = y.columns.tolist()
        elif type_of_target == "continuous":
            bins = pd.qcut(y.squeeze(), q=reg_bins)
            y = y.assign(bins=bins)
            y_errors = y_errors.assign(bins=bins)
            target_names = "bins"
        elif type_of_target == "continuous-multioutput":
            bins = {
                f"bin_{target}": pd.qcut(y[target], q=reg_bins)
                for target in range(y.shape[1])
            }
            y = y.assign(**bins)
            y_errors = y_errors.assign(**bins)
            target_names = list(bins.keys())
        else:
            raise NotImplementedError("Type of target could not be determined.")
        errors_dist = y_errors.groupby(target_names).size()
        target_dist = y.groupby(target_names).size()
        if as_ratio:
            errors_dist = errors_dist / errors_dist.sum()
            target_dist = target_dist / target_dist.sum()
        if wrt_target:
            output = (errors_dist / target_dist).fillna(0).sort_values(ascending=False)
        else:
            output = pd.DataFrame(errors_dist).join(
                pd.DataFrame(target_dist),
                how="right",
                lsuffix="_errors",
                rsuffix="_target",
            )
            output = output.fillna(0).sort_values(by=output.columns[0], ascending=False)
        return output

    def analyze_features(
        self,
        errors_idx: pd.Series,
        X: Optional[Union[np.ndarray, pd.Series, pd.DataFrame]] = None,
        features: Optional[Sequence[Union[str, int]]] = None,
        estimator_name: Optional[Union[str, BaseEstimator]] = None,
        n_features: Optional[Union[int, float]] = None,
    ):
        """Cross tabulate features with prediction errors.

        Parameters
        ----------
        errors_idx :
            Index of ranked errors.
        X :
            Features array. Not needed if using `ErrorAnalyzer.from_poniard`.
        features :
            Array of features to analyze. If `None`, all features will be analyzed.
        estimator_name :
            Only valid if using `ErrorAnalyzer.from_poniard`. Allows using an estimator to
            compute permutation importances and analyzing only the top `n_features`.
        n_features :
            How many features to analyze based on permutation importances.

        Returns
        -------
        Dict[str, pd.DataFrame]
            Per feature summary.
        """
        if self._has_poniard:
            X = self._poniard.X
            feature_types = self._poniard.feature_types.items()
        else:
            feature_types = (
                PoniardPreprocessor(task="placeholder")
                .build(X, np.zeros((X.shape[0],)))
                .feature_types
            )
        inverted_feature_types = {}
        for k, v in feature_types:
            for i in v:
                inverted_feature_types[i] = k
        X = pd.DataFrame(X).assign(error=lambda x: x.index.isin(errors_idx).astype(int))

        if features:
            most_important_idx = []
        elif estimator_name:
            features = []
            model = self._poniard[estimator_name]
            X_train, X_test, y_train, y_test = self._poniard._train_test_split_from_cv()
            model.fit(X_train, y_train)
            scoring = self._poniard._first_scorer(sklearn_scorer=True)
            random_state = self._poniard.random_state
            importances = permutation_importance(
                model,
                X_test,
                y_test,
                n_repeats=10,
                scoring=scoring,
                random_state=random_state,
            )
            sorted_importances_idx = importances.importances_mean.argsort()[::-1]
            if n_features is None:
                n_features = 0.5
            if isinstance(n_features, float):
                assert 0 <= n_features <= 1
                n_features = round(n_features * X.shape[1])
            most_important_idx = sorted_importances_idx[:n_features].tolist()
        else:
            features = X.columns
            most_important_idx = []
        summary = {}
        for i, feature in enumerate(X.columns):
            if (i in most_important_idx or feature in features) and feature != "error":
                current_feature_type = inverted_feature_types[feature]
                if current_feature_type == "numeric":
                    feature_summary = X.groupby("error")[feature].describe()
                else:
                    feature_summary = (
                        X.groupby("error")[feature]
                        .value_counts(normalize=True, dropna=False)
                        .rename("data")
                        .reset_index(feature)
                        .pivot(columns=feature, values="data")
                    )
                summary[feature] = feature_summary
        return summary

    def __repr__(self):
        return non_default_repr(self)

In [None]:
show_doc(ErrorAnalyzer.from_poniard)

---

[source](https://github.com/rxavier/poniard/blob/master/poniard/error_analysis/error_analysis.py#L44){target="_blank" style="float:right; font-size:smaller"}

### ErrorAnalyzer.from_poniard

>      ErrorAnalyzer.from_poniard
>                                  (poniard:poniard.estimators.core.PoniardBaseE
>                                  stimator,
>                                  estimator_names:Union[str,Sequence[str]])

Use a Poniard instance to instantiate `ErrorAnalyzer`.

Automatically sets the task and gives access to the underlying data.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| poniard | PoniardBaseEstimator | A `PoniardClassifier` or `PoniardRegressor` instance. |
| estimator_names | typing.Union[str, typing.Sequence[str]] | Array of estimators for which to compute errors. |
| **Returns** |  | **An instance of the class.** |

In [None]:
show_doc(ErrorAnalyzer.rank_errors)

---

[source](https://github.com/rxavier/poniard/blob/master/poniard/error_analysis/error_analysis.py#L77){target="_blank" style="float:right; font-size:smaller"}

### ErrorAnalyzer.rank_errors

>      ErrorAnalyzer.rank_errors (y:Union[numpy.ndarray,pandas.core.series.Serie
>                                 s,pandas.core.frame.DataFrame,NoneType]=None, 
>                                 predictions:Union[numpy.ndarray,pandas.core.se
>                                 ries.Series,pandas.core.frame.DataFrame,NoneTy
>                                 pe]=None, probas:Union[numpy.ndarray,pandas.co
>                                 re.series.Series,pandas.core.frame.DataFrame,N
>                                 oneType]=None, exclude_correct:bool=True)

Compare the `y` ground truth with `predictions` and `probas` and sort by the largest deviations.

If `ErrorAnalyzer.from_poniard` was used, no data needs to be passed to this method.

In this context "error" refers to:

* misclassified samples in binary and multiclass problems.
* misclassified samples in any of the labels for multilabel problems.
* samples with predicted values outside the `truth - 1SD <-> truth + 1SD`
range for regression problems.
* samples with predicted values outside the `truth - 1SD <-> truth + 1SD`
range in any of the targets for multioutput regression problems.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| y | typing.Union[numpy.ndarray, pandas.core.series.Series, pandas.core.frame.DataFrame, NoneType] | None | Ground truth target. |
| predictions | typing.Union[numpy.ndarray, pandas.core.series.Series, pandas.core.frame.DataFrame, NoneType] | None | Predicted target. |
| probas | typing.Union[numpy.ndarray, pandas.core.series.Series, pandas.core.frame.DataFrame, NoneType] | None | Predicted probabilities for each class in classification tasks. |
| exclude_correct | bool | True | Whether to exclude correctly predicted samples in the output ranking. Default True. |
| **Returns** | **Dict** |  | **Ranked errors** |

`ErrorAnalyzer.rank_errors` works for simple classification...

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from poniard import PoniardClassifier

In [None]:
x, y = load_breast_cancer(return_X_y=True, as_frame=True)
pnd = (
    PoniardClassifier(estimators=[KNeighborsClassifier(), LogisticRegression()])
    .setup(x, y, show_info=False)
    .fit()
)
error_analysis = ErrorAnalyzer.from_poniard(
    pnd, ["KNeighborsClassifier", "LogisticRegression"]
)
ranked_errors = error_analysis.rank_errors()
ranked_errors["LogisticRegression"]["values"]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0,y,prediction,proba_0,proba_1,error
297,0,1,0.002394,0.997606,0.997606
73,0,1,0.060207,0.939793,0.939793
40,0,1,0.062019,0.937981,0.937981
135,0,1,0.115223,0.884777,0.884777
190,0,1,0.21557,0.78443,0.78443
263,0,1,0.278617,0.721383,0.721383
68,1,0,0.694271,0.305729,0.694271
213,0,1,0.344298,0.655702,0.655702
146,0,1,0.397514,0.602486,0.602486
541,1,0,0.585451,0.414549,0.585451


As well as more complicated setups, such as multioutput regression.

In [None]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor

from poniard import PoniardRegressor
from poniard.preprocessing import PoniardPreprocessor

In [None]:
x, y = make_regression(
    n_samples=1000,
    n_features=10,
    n_targets=2,
    n_informative=3,
    noise=50,
    random_state=0,
)
x += np.random.normal()
prep = PoniardPreprocessor(numeric_threshold=10)
pnd = (
    PoniardRegressor(
        estimators={
            "lr": MultiOutputRegressor(LinearRegression()),
            "knn": MultiOutputRegressor(KNeighborsRegressor()),
        },
        custom_preprocessor=prep,
    )
    .setup(x, y, show_info=False)
    .fit()
)
error_analysis = ErrorAnalyzer.from_poniard(pnd, ["lr", "knn"])
ranked_errors = error_analysis.rank_errors()
ranked_errors["knn"]["values"]

  ) = self._setup_transformers()


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0,y_0,y_1,prediction_0,prediction_1,error_0,error_1,error
679,-285.183722,-361.210314,-83.567331,-174.502727,201.616391,186.707587,194.161989
580,-206.914531,-316.893779,-68.562490,-119.157106,138.352042,197.736673,168.044357
466,-193.559624,-270.201613,-47.648875,-83.285570,145.910749,186.916043,166.413396
543,166.559570,307.797957,44.254690,110.607538,122.304880,197.190419,159.747649
110,199.955678,175.857068,0.293321,62.923251,199.662358,112.933817,156.298087
...,...,...,...,...,...,...,...
563,138.150171,34.382971,60.332156,27.100214,77.818015,7.282756,42.550386
911,-94.526886,-32.665129,-15.892835,-27.263218,78.634051,5.401912,42.017982
441,-6.393304,65.144850,73.175490,61.991206,79.568794,3.153644,41.361219
582,1.808688,-64.975064,-76.728970,-61.481938,78.537658,3.493126,41.015392


It can also be used without Poniard.

In [None]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

from poniard import PoniardRegressor
from poniard.preprocessing import PoniardPreprocessor

In [None]:
x, y = make_regression(
    n_samples=2000,
    n_features=10,
    n_targets=1,
    n_informative=3,
    noise=50,
    random_state=0,
)
x += np.random.normal()
y_pred = y.copy()
y_pred[np.random.randint(0, y.shape[0], 50)] = np.random.normal()

error_analysis = ErrorAnalyzer(task="regression")
ranked_errors = error_analysis.rank_errors(y, y_pred)
ranked_errors["values"]

Unnamed: 0,y,prediction,error
152,-464.670855,-0.350707,464.320148
975,404.41895,-0.350707,404.769658
1766,315.697748,-0.350707,316.048455
1633,296.745975,-0.350707,297.096683
842,251.852262,-0.350707,252.20297
1207,244.731893,-0.350707,245.082601
1269,-239.847177,-0.350707,239.49647
55,-210.915148,-0.350707,210.56444
1236,-191.913223,-0.350707,191.562515
1076,-189.187016,-0.350707,188.836309


In [None]:
show_doc(ErrorAnalyzer.merge_errors)

---

[source](https://github.com/rxavier/poniard/blob/master/poniard/error_analysis/error_analysis.py#L269){target="_blank" style="float:right; font-size:smaller"}

### ErrorAnalyzer.merge_errors

>      ErrorAnalyzer.merge_errors (errors:Dict[str,Dict[str,Union[pandas.core.fr
>                                  ame.DataFrame,pandas.core.series.Series]]])

Merge multiple error rankings. This is particularly useful when evaluating multiple estimators.

Compute how many estimators had the specific error and the average error between them.

This method works best when using `ErrorAnalyzer.from_poniard`, since `errors` can be 
the output of `ErrorAnalyzer.rank_errors`. However, this is not required; as long as 
`errors` is properly defined (`{str: {str: pd.DataFrame, str: pd.Series}}`)

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| errors | typing.Dict[str, typing.Dict[str, typing.Union[pandas.core.frame.DataFrame, pandas.core.series.Series]]] | Dictionary of errors and error indexes. |
| **Returns** | **Dict** | **Merged errors** |

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier


from poniard import PoniardClassifier

In [None]:
x, y = load_iris(return_X_y=True, as_frame=True)
pnd = (
    PoniardClassifier(
        estimators=[
            LogisticRegression(),
            RandomForestClassifier(),
            HistGradientBoostingClassifier(),
        ]
    )
    .setup(x, y, show_info=False)
    .fit()
)
error_analysis = ErrorAnalyzer.from_poniard(
    pnd,
    ["RandomForestClassifier", "LogisticRegression", "HistGradientBoostingClassifier"],
)
ranked_errors = error_analysis.rank_errors()
merged_errors = error_analysis.merge_errors(ranked_errors)
merged_errors["values"]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0_level_0,mean_error,freq,estimators
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
106,0.848222,3,"[RandomForestClassifier, LogisticRegression, H..."
70,0.842455,3,"[RandomForestClassifier, LogisticRegression, H..."
77,0.824483,3,"[RandomForestClassifier, LogisticRegression, H..."
119,0.81789,3,"[RandomForestClassifier, LogisticRegression, H..."
133,0.74106,3,"[RandomForestClassifier, LogisticRegression, H..."
83,0.909402,2,"[RandomForestClassifier, HistGradientBoostingC..."
72,0.793324,2,"[RandomForestClassifier, HistGradientBoostingC..."
129,0.763608,2,"[RandomForestClassifier, HistGradientBoostingC..."
138,0.61,1,[RandomForestClassifier]
134,0.529012,1,[LogisticRegression]


In [None]:
show_doc(ErrorAnalyzer.analyze_target)

---

[source](https://github.com/rxavier/poniard/blob/master/poniard/error_analysis/error_analysis.py#L305){target="_blank" style="float:right; font-size:smaller"}

### ErrorAnalyzer.analyze_target

>      ErrorAnalyzer.analyze_target (errors_idx:pandas.core.series.Series, y:Uni
>                                    on[numpy.ndarray,pandas.core.series.Series,
>                                    pandas.core.frame.DataFrame,NoneType]=None,
>                                    reg_bins:int=5, as_ratio:bool=False,
>                                    wrt_target:bool=False)

Analyze which target classes/ranges have the most errors and compare with observed
target distribution.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| errors_idx | Series |  | Index of ranked errors. |
| y | typing.Union[numpy.ndarray, pandas.core.series.Series, pandas.core.frame.DataFrame, NoneType] | None | Ground truth. Not needed if using `ErrorAnalyzer.from_poniard`. |
| reg_bins | int | 5 | Number of bins in which to place ground truth targets for regression tasks. |
| as_ratio | bool | False | Whether to show error ratios instead of error counts per class/bin. Default False. |
| wrt_target | bool | False | Whether to compute counts of errors or error ratios with respect<br>to the ground truth. Default False. |
| **Returns** | **pd.DataFrame** |  | **Counts per error.** |

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression

from poniard import PoniardRegressor

In [None]:
x, y = load_diabetes(return_X_y=True, as_frame=True)
pnd = PoniardRegressor(estimators=LinearRegression()).setup(x, y, show_info=False).fit()
error_analysis = ErrorAnalyzer.from_poniard(pnd, ["LinearRegression"])
ranked_errors = error_analysis.rank_errors()

error_analysis.analyze_target(ranked_errors["LinearRegression"]["idx"])

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0_level_0,0_errors,0_target
bins,Unnamed: 1_level_1,Unnamed: 2_level_1
"(232.0, 346.0]",33,88
"(24.999, 77.0]",18,91
"(77.0, 115.0]",9,87
"(168.0, 232.0]",9,87
"(115.0, 168.0]",5,89


In [None]:
error_analysis.analyze_target(ranked_errors["LinearRegression"]["idx"], wrt_target=True)

bins
(232.0, 346.0]    0.375000
(24.999, 77.0]    0.197802
(77.0, 115.0]     0.103448
(168.0, 232.0]    0.103448
(115.0, 168.0]    0.056180
dtype: float64

In [None]:
show_doc(ErrorAnalyzer.analyze_features)

---

[source](https://github.com/rxavier/poniard/blob/master/poniard/error_analysis/error_analysis.py#L375){target="_blank" style="float:right; font-size:smaller"}

### ErrorAnalyzer.analyze_features

>      ErrorAnalyzer.analyze_features (errors_idx:pandas.core.series.Series, X:U
>                                      nion[numpy.ndarray,pandas.core.series.Ser
>                                      ies,pandas.core.frame.DataFrame,NoneType]
>                                      =None, features:Union[Sequence[Union[str,
>                                      int]],NoneType]=None, estimator_name:Unio
>                                      n[str,sklearn.base.BaseEstimator,NoneType
>                                      ]=None, n_features:Union[int,float,NoneTy
>                                      pe]=None)

Cross tabulate features with prediction errors.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| errors_idx | Series |  | Index of ranked errors. |
| X | typing.Union[numpy.ndarray, pandas.core.series.Series, pandas.core.frame.DataFrame, NoneType] | None | Features array. Not needed if using `ErrorAnalyzer.from_poniard`. |
| features | typing.Union[typing.Sequence[typing.Union[str, int]], NoneType] | None | Array of features to analyze. If `None`, all features will be analyzed. |
| estimator_name | typing.Union[str, sklearn.base.BaseEstimator, NoneType] | None | Only valid if using `ErrorAnalyzer.from_poniard`. Allows using an estimator to<br>compute permutation importances and analyzing only the top `n_features`. |
| n_features | typing.Union[int, float, NoneType] | None | How many features to analyze based on permutation importances. |
| **Returns** | **Dict[str, pd.DataFrame]** |  | **Per feature summary.** |

In [None]:
error_analysis.analyze_features(ranked_errors["LinearRegression"]["idx"])["age"]

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
error,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,368.0,0.000467,0.047873,-0.107226,-0.035483,0.005383,0.038076,0.110727
1,74.0,-0.002324,0.046585,-0.096328,-0.037299,0.005383,0.02627,0.110727


In [None]:
# | hide
import nbdev

nbdev.nbdev_export()