Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] fix predict output conversion failure in BaseClassifier, BaseRegressor, if y_inner_mtype tag is a list #5680

Merged
merged 7 commits into from Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 31 additions & 7 deletions sktime/base/_base_panel.py
Expand Up @@ -265,22 +265,29 @@ def _convert_X(self, X, X_mtype):
)
return X

def _check_y(self, y=None):
def _check_y(self, y=None, return_to_mtype=False):
"""Check and coerce X/y for fit/transform functions.

Parameters
----------
y : pd.DataFrame, pd.Series or np.ndarray
return_to_mtype : bool
whether to return the mtype of y output

Returns
-------
y : object of sktime compatible time series type
y_inner : object of sktime compatible time series type
can be Series, Panel, Hierarchical
y_metadata : dict
metadata of y, retured by check_is_scitype
y_mtype : str, only returned if return_to_mtype=True
mtype of y_inner, after convert
"""
if y is None:
return None
if return_to_mtype:
return None, None, None
else:
return None, None

capa_multioutput = self.get_tag("capability:multioutput")
y_inner_mtype = self.get_tag("y_inner_mtype")
Expand Down Expand Up @@ -314,24 +321,32 @@ def _check_y(self, y=None):
store=self._converter_store_y,
)
y_vec = VectorizedDF([y_df], iterate_cols=True)
return y_vec, y_metadata
if return_to_mtype:
return y_vec, y_metadata, "pd_DataFrame_Table"
else:
return y_vec, y_metadata

y_inner = convert(
y_inner, y_inner_mtype = convert(
y,
from_type=y_mtype,
to_type=y_inner_mtype,
as_scitype="Table",
store=self._converter_store_y,
return_to_mtype=True,
)

return y_inner, y_metadata
if return_to_mtype:
return y_inner, y_metadata, y_inner_mtype
else:
return y_inner, y_metadata

def _convert_output_y(self, y):
"""Convert output y to original format.

Parameters
----------
y : np.ndarray or pd.DataFrame
output to convert

Returns
-------
Expand All @@ -346,9 +361,18 @@ def _convert_output_y(self, y):
output_mtype = "numpy1D"
converter_store = None

# inner return mtype is what we convert from
# special treatment for 1D numpy array
# this can be returned in composites due to
# current downwards compatible choice "1D return is always numpy"
if isinstance(y, np.ndarray) and y.ndim == 1:
inner_return_mtype = "numpy1D"
else:
inner_return_mtype = self._y_inner_mtype

y = convert(
y,
from_type=self.get_tag("y_inner_mtype"),
from_type=inner_return_mtype,
to_type=output_mtype,
as_scitype="Table",
store=converter_store,
Expand Down
3 changes: 2 additions & 1 deletion sktime/classification/base.py
Expand Up @@ -180,8 +180,9 @@ class labels for fitting
start = int(round(time.time() * 1000))

# check and convert y for multioutput vectorization
y, y_metadata = self._check_y(y)
y, y_metadata, y_inner_mtype = self._check_y(y, return_to_mtype=True)
self._y_metadata = y_metadata
self._y_inner_mtype = y_inner_mtype
self._is_vectorized = isinstance(y, VectorizedDF)

if self._is_vectorized:
Expand Down
1 change: 1 addition & 0 deletions sktime/classification/dictionary_based/_boss.py
Expand Up @@ -667,6 +667,7 @@ def _shorten_bags(self, word_len, y):
new_boss.classes_ = self.classes_
new_boss._class_dictionary = self._class_dictionary
new_boss._y_metadata = self._y_metadata
new_boss._y_inner_mtype = self._y_inner_mtype
new_boss._is_fitted = True

return new_boss
Expand Down
12 changes: 9 additions & 3 deletions sktime/classification/early_classification/base.py
Expand Up @@ -630,20 +630,26 @@ def _convert_X(self, X, X_mtype):
_convert_X = BaseClassifier._convert_X
return _convert_X(self, X, X_mtype)

def _check_y(self, y=None):
def _check_y(self, y=None, return_to_mtype=False):
"""Check and coerce X/y for fit/transform functions.

Parameters
----------
y : pd.DataFrame, pd.Series or np.ndarray
return_to_mtype : bool
whether to return the mtype of y output

Returns
-------
y : object of sktime compatible time series type
y_inner : object of sktime compatible time series type
can be Series, Panel, Hierarchical
y_metadata : dict
metadata of y, retured by check_is_scitype
y_mtype : str, only returned if return_to_mtype=True
mtype of y_inner, after convert
"""
_check_y = BaseClassifier._check_y
return _check_y(self, y)
return _check_y(self, y, return_to_mtype=return_to_mtype)

def _check_input(self, X, y=None, enforce_min_instances=1, return_metadata=True):
"""Check whether input X and y are valid formats with minimum data.
Expand Down
4 changes: 2 additions & 2 deletions sktime/datatypes/_convert.py
Expand Up @@ -301,8 +301,8 @@ def _get_first_mtype_of_same_scitype(from_mtype, to_mtypes, varname="to_mtypes")
]
if len(same_scitype_mtypes) == 0:
raise TypeError(
f"{varname} contains no mtype compatible with the scitype of obj,"
f"which is {scitype}"
f"{varname} contains no mtype compatible with the scitype of obj, "
f"which is {scitype}. Value of {varname} is: {to_mtypes}"
)
to_type = same_scitype_mtypes[0]
return to_type
Expand Down
3 changes: 2 additions & 1 deletion sktime/regression/base.py
Expand Up @@ -174,8 +174,9 @@ class labels for fitting
start = int(round(time.time() * 1000))

# check and convert y for multioutput vectorization
y, y_metadata = self._check_y(y)
y, y_metadata, y_inner_mtype = self._check_y(y, return_to_mtype=True)
self._y_metadata = y_metadata
self._y_inner_mtype = y_inner_mtype
self._is_vectorized = isinstance(y, VectorizedDF)

if self._is_vectorized:
Expand Down