diff --git a/sktime/performance_metrics/forecasting/_classes.py b/sktime/performance_metrics/forecasting/_classes.py index 389d2ceebb4..4a4113c3929 100644 --- a/sktime/performance_metrics/forecasting/_classes.py +++ b/sktime/performance_metrics/forecasting/_classes.py @@ -369,12 +369,11 @@ def _evaluate_by_index(self, y_true, y_pred, **kwargs): Parameters ---------- - y_true : time series in sktime compatible data container format + y_true : time series in sktime compatible pandas based data container format Ground truth (correct) target values y can be in one of the following formats: - Series scitype: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) - Panel scitype: pd.DataFrame with 2-level row MultiIndex, - 3D np.ndarray, list of Series pd.DataFrame, or nested pd.DataFrame + Series scitype: pd.DataFrame + Panel scitype: pd.DataFrame with 2-level row MultiIndex Hierarchical scitype: pd.DataFrame with 3 or more level row MultiIndex y_pred :time series in sktime compatible data container format Forecasted values to evaluate @@ -1107,8 +1106,23 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2] -class MeanAbsoluteError(BaseForecastingErrorMetricFunc): - """Mean absolute error (MAE). +class MeanAbsoluteError(BaseForecastingErrorMetric): + r"""Mean absolute error (MAE). + + For a univariate, non-hierarchical sample + of true values :math:`y_1, \dots, y_n` and + predicted values :math:`\widehat{y}_1, \dots, \widehat{y}_n` (in :math:`mathbb{R}`), + at time indices :math:`t_1, \dots, t_n`, + `evaluate` or call returns the Mean Absolute Error, + :math:`\frac{1}{n}\sum_{i=1}^n |y_i - \widehat{y}_i|`. + (the time indices are not used) + + `multioutput` and `multilevel` control averaging across variables and + hierarchy indices, see below. + + `evaluate_by_index` returns, at a time index :math:`t_i`, + the abolute error at that time index, :math:`|y_i - \widehat{y}_i|`, + for all time indices :math:`t_1, \dots, t_n` in the input. MAE output is non-negative floating point. The best value is 0.0. @@ -1124,6 +1138,11 @@ class MeanAbsoluteError(BaseForecastingErrorMetricFunc): If array-like, values used as weights to average the errors. If 'raw_values', returns a full set of errors in case of multioutput input. If 'uniform_average', errors of all outputs are averaged with uniform weight. + multilevel : {'raw_values', 'uniform_average', 'uniform_average_time'} + Defines how to aggregate metric for hierarchical data (with levels). + If 'uniform_average' (default), errors are mean-averaged across levels. + If 'uniform_average_time', errors are mean-averaged across rows. + If 'raw_values', does not average errors across levels, hierarchy is retained. See Also -------- @@ -1157,7 +1176,46 @@ class MeanAbsoluteError(BaseForecastingErrorMetricFunc): 0.85 """ - func = mean_absolute_error + def _evaluate_by_index(self, y_true, y_pred, **kwargs): + """Return the metric evaluated at each time point. + + private _evaluate_by_index containing core logic, called from evaluate_by_index + + Parameters + ---------- + y_true : time series in sktime compatible pandas based data container format + Ground truth (correct) target values + y can be in one of the following formats: + Series scitype: pd.DataFrame + Panel scitype: pd.DataFrame with 2-level row MultiIndex + Hierarchical scitype: pd.DataFrame with 3 or more level row MultiIndex + y_pred :time series in sktime compatible data container format + Forecasted values to evaluate + must be of same format as y_true, same indices and columns if indexed + + Returns + ------- + loss : pd.Series or pd.DataFrame + Calculated metric, by time point (default=jackknife pseudo-values). + pd.Series if self.multioutput="uniform_average" or array-like + index is equal to index of y_true + entry at index i is metric at time i, averaged over variables + pd.DataFrame if self.multioutput="raw_values" + index and columns equal to those of y_true + i,j-th entry is metric at time i, at variable j + """ + multioutput = self.multioutput + + raw_values = (y_true - y_pred).abs() + + if multioutput == "raw_values": + return raw_values + + if multioutput == "uniform_average": + return raw_values.mean(axis=1) + + # else, we expect multioutput to be array-like + return raw_values.dot(multioutput) class MedianAbsoluteError(BaseForecastingErrorMetricFunc):