Skip to content

Commit

Permalink
[ENH] tests and fixes for numpy weights in performance metrics - pr…
Browse files Browse the repository at this point in the history
…obabilistic metrics (#5104)

Continuation-fix of #5086 for
probabilistic metrics.

This PR:

* fixes the ability of using `numpy` weights in probabilistic metrics -
this was added in #5086 but not tested
* adds test for weighted probabilistic metrics. This is a generalized
version of the doctest in #5068
  • Loading branch information
fkiraly committed Aug 15, 2023
1 parent 2cd9b80 commit 3ec8894
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 10 deletions.
48 changes: 38 additions & 10 deletions sktime/performance_metrics/forecasting/probabilistic/_classes.py
Expand Up @@ -117,14 +117,21 @@ def evaluate(self, y_true, y_pred, multioutput=None, **kwargs):
# pass to inner function
out = self._evaluate(y_true_inner, y_pred_inner, multioutput, **kwargs)

if self.score_average and multioutput == "uniform_average":
out = float(out.mean(axis=1).iloc[0]) # average over all
if self.score_average and multioutput == "raw_values":
out = out.groupby(axis=1, level=0).mean() # average over scores
if not self.score_average and multioutput == "uniform_average":
out = out.groupby(axis=1, level=1).mean() # average over variables
if not self.score_average and multioutput == "raw_values":
out = out # don't average
if isinstance(multioutput, str):
if self.score_average and multioutput == "uniform_average":
out = float(out.mean(axis=1).iloc[0]) # average over all
if self.score_average and multioutput == "raw_values":
out = out.groupby(axis=1, level=0).mean() # average over scores
if not self.score_average and multioutput == "uniform_average":
out = out.groupby(axis=1, level=1).mean() # average over variables
if not self.score_average and multioutput == "raw_values":
out = out # don't average
else: # is np.array with weights
if self.score_average:
out_raw = out.groupby(axis=1, level=0).mean()
out = out_raw.dot(multioutput)[0]
else:
out = _groupby_dot(out, multioutput)

if isinstance(out, pd.DataFrame):
out = out.squeeze(axis=0)
Expand Down Expand Up @@ -216,9 +223,9 @@ def evaluate_by_index(self, y_true, y_pred, multioutput=None, **kwargs):
else: # numpy array
if self.score_average:
out_raw = out.groupby(axis=1, level=0).mean()
out = out_raw.groupby(axis=1, level=1).dot(multioutput)
out = out_raw.dot(multioutput)
else:
out = out.groupby(axis=1, level=1).dot(multioutput)
out = _groupby_dot(out, multioutput)

return out

Expand Down Expand Up @@ -382,6 +389,27 @@ def _handle_multioutput(self, loss, multioutput):
return out


def _groupby_dot(df, weights):
"""Groupby dot product.
Groups df by axis 1, level 1, and applies dot product with weights.
Parameters
----------
df : pd.DataFrame
dataframe to groupby
weights : np.array
weights to apply to each group
Returns
-------
out : pd.DataFrame
dataframe with weighted groupby dot product
"""
out = df.groupby(axis=1, level=1).apply(lambda x: x.dot(weights))
return out


class PinballLoss(_BaseProbaForecastingErrorMetric):
"""Pinball loss aka quantile loss for quantile/interval predictions.
Expand Down
Expand Up @@ -224,3 +224,52 @@ def test_evaluate_alpha_negative(metric, sample_data):
# 0.3 not in test quantile data so raise error.
Loss = metric.create_test_instance().set_params(alpha=0.3)
res = Loss(y_true=y_true, y_pred=y_pred) # noqa


@pytest.mark.parametrize("metric", all_metrics)
@pytest.mark.parametrize("score_average", [True, False])
def test_multioutput_weighted(metric, score_average):
"""Test output contracts for multioutput weights."""
y_true = pd.DataFrame({"var1": [3, -0.5, 2, 7, 2], "var2": [4, 0.5, 3, 8, 3]})
y_pred = pd.DataFrame(
{
("var1", 0.05): [1.5, -1, 1, 4, 0.65],
("var1", 0.5): [2.5, 0, 2, 8, 1.25],
("var1", 0.95): [3.5, 4, 3, 12, 1.85],
("var2", 0.05): [2.5, 0, 2, 8, 1.25],
("var2", 0.5): [5.0, 1, 4, 16, 2.5],
("var2", 0.95): [7.5, 2, 6, 24, 3.75],
}
)

weights = np.array([0.3, 0.7])

loss = metric.create_test_instance()
loss.set_params(score_average=score_average, multioutput=weights)

eval_loss = loss(y_true, y_pred)

if loss.get_tag("scitype:y_pred") == "pred_interval":
# 1 full interval, lower = 0.05, upper = 0.95
expected_score_ix = [0.9]
else:
# 3 quantile scores, 0.05, 0.5, 0.95
expected_score_ix = [0.05, 0.5, 0.95]
no_expected_scores = len(expected_score_ix)
expected_timepoints = len(y_pred)

if score_average:
assert isinstance(eval_loss, float)
else:
assert isinstance(eval_loss, pd.Series)
assert len(eval_loss) == no_expected_scores

eval_loss_by_index = loss.evaluate_by_index(y_true, y_pred)
assert len(eval_loss_by_index) == expected_timepoints

if score_average:
assert isinstance(eval_loss_by_index, pd.Series)
else:
assert isinstance(eval_loss_by_index, pd.DataFrame)
assert eval_loss_by_index.shape == (expected_timepoints, no_expected_scores)
assert eval_loss_by_index.columns.to_list() == expected_score_ix

0 comments on commit 3ec8894

Please sign in to comment.