Skip to content

Commit

Permalink
[ENH] in ARCH, fix str coercion of pd.Series name (#5407)
Browse files Browse the repository at this point in the history
Related to #5349 (issue), #5384 (PR).


> After debugging for a while,
> 
> 1. Firstly, the function `_check_predict_proba` is only testing for
`y_train` generated from `_make_data(n_columns=n_columns)`. If the
forecaster is univariate, then the `y_train` generated would always have
column name `None` of type `str`. The `_check_predict_proba` checks
whether the `_predict_proba` is changing `None` to `0` internally. Since
every forecaster's `_predict_proba` doing it, all forecasters passed.
> 2. Actually `ARCH` should fail this test, because `_predict_proba`
function is deleted in this PR. But, the `type(y_train.name)` is `str`
(`'None'`), not `None`. Therefore, `None` case is skipped and checks
whether `(pred_cols == y_train.name).all()` which is `None` of `str`
equals `None` of `str` so, `ARCH` also passed.
> 
> I think it's my bad, there is no bug in `_check_predict_proba` as it
is not designed to check whether column names are preserved. Changes
should only be in `ARCH`'s `_predict_proba` like below:
> 
> 1. If the column name is `None` of type `None`/`str`, do
`pred_dist.name = pd.Index([0])`.
> 2. Otherwise, just return output of `Super()._predict_proba()` as it
is, i.e., column names are preserved by default.

I think there was some ambiguity in my explanation for #5349 (issue) in
#5384 (PR), but what I meant was exactly the changes in this PR.
  • Loading branch information
Vasudeva-bit committed Dec 13, 2023
1 parent 005eafa commit d3cb87c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
16 changes: 11 additions & 5 deletions sktime/forecasting/arch/_uarch.py
Expand Up @@ -251,6 +251,7 @@ def _fit(self, y, X=None, fh=None):
if fh:
self._horizon = fh

y_name = y.name
self._forecaster = _ARCH(
y=y,
x=X,
Expand All @@ -277,6 +278,7 @@ def _fit(self, y, X=None, fh=None):
options=self.options,
backcast=self.backcast,
)
y.name = y_name
return self

def _get_arch_result_object(self, fh=None, X=None):
Expand Down Expand Up @@ -344,7 +346,7 @@ def _predict(self, fh, X=None):
y_pred = pd.Series(
ArchResultObject.mean.values[-1],
index=full_range,
name=str(self._y.name),
name=self._y.name,
)
y_pred = y_pred.loc[abs_idx.to_pandas()]
y_pred.index = self._horizon.to_absolute_index(self.cutoff)
Expand Down Expand Up @@ -398,19 +400,23 @@ def _predict_interval(self, fh, X, coverage):
upper_int = mean_forecast + (z_critical * std_err)
lower_df = pd.DataFrame(
lower_int,
columns=[y_col_name + " " + str(alpha) + " " + "lower"],
columns=[
y_col_name if y_col_name else "0" + " " + str(alpha) + " " + "lower"
],
)
upper_df = pd.DataFrame(
upper_int,
columns=[y_col_name + " " + str(alpha) + " " + "upper"],
columns=[
y_col_name if y_col_name else "0" + " " + str(alpha) + " " + "upper"
],
)
df_list.append(pd.concat((lower_df, upper_df), axis=1))
concat_df = pd.concat(df_list, axis=1)
concat_df_columns = list(
OrderedDict.fromkeys(
[
col_df
for col in y_col_name
for col in (y_col_name if y_col_name else "0")
for col_df in concat_df.columns
if col in col_df
]
Expand All @@ -425,7 +431,7 @@ def _predict_interval(self, fh, X, coverage):
final_columns = list(
itertools.product(
*[
[y_col_name],
[y_col_name if y_col_name else 0],
coverage,
df.columns.get_level_values(2).unique(),
]
Expand Down
2 changes: 1 addition & 1 deletion sktime/forecasting/tests/test_all_forecasters.py
Expand Up @@ -383,7 +383,7 @@ def get_expected_columns():

found = pred_ints.columns.to_flat_index()
msg = (
"columns of returned prediction interval DataFrame do not"
"columns of returned prediction interval DataFrame do not "
f"match up with expected columns. Expected: {expected},"
f"found: {found}"
)
Expand Down

0 comments on commit d3cb87c

Please sign in to comment.