diff --git a/pyproject.toml b/pyproject.toml index 4d30c71..cb94c1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "tsml" -version = "0.0.5" +version = "0.0.6" description = "A toolkit for time series machine learning algorithms." authors = [ {name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"}, diff --git a/tsml/__init__.py b/tsml/__init__.py index 06f4456..31ae2af 100644 --- a/tsml/__init__.py +++ b/tsml/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- """tsml.""" -__version__ = "0.0.5" +__version__ = "0.0.6" diff --git a/tsml/base.py b/tsml/base.py index a4b5d24..3f57a73 100644 --- a/tsml/base.py +++ b/tsml/base.py @@ -111,7 +111,10 @@ def _validate_data( return out def _convert_X( - self, X: Union[np.ndarray, List[np.ndarray]], concatenate_channels: bool = False + self, + X: Union[np.ndarray, List[np.ndarray]], + pad_unequal: bool = False, + concatenate_channels: bool = False, ) -> Union[np.ndarray, List[np.ndarray]]: dtypes = self._get_tags()["X_types"] @@ -123,9 +126,9 @@ def _convert_X( return X.reshape((X.shape[0], -1)) else: raise ValueError( - "Can only convert 3D numpy array with 1 channel to 2D numpy " - f"array if concatenate_channels is True, found {X.shape[1]} " - "channels." + "Can only convert 3D numpy array with more than 1 channel to " + "2D numpy array if concatenate_channels is True, found " + f"{X.shape[1]} channels." ) elif dtypes[0] == "np_list": return [x for x in X] @@ -142,6 +145,13 @@ def _convert_X( if "np_list" in dtypes: return X elif dtypes[0] == "3darray": + if not pad_unequal and not all(x.shape[1] == X[0].shape[1] for x in X): + raise ValueError( + "Can only convert list of 2D numpy arrays with unequal length " + "data to 3D numpy array if pad_unequal is True, found " + "different series lengths." + ) + max_len = max(x.shape[1] for x in X) arr = np.zeros((len(X), X[0].shape[0], max_len)) @@ -151,6 +161,15 @@ def _convert_X( return arr elif dtypes[0] == "2darray": if X[0].shape[0] == 1 or concatenate_channels: + if not pad_unequal and not all( + x.shape[1] == X[0].shape[1] for x in X + ): + raise ValueError( + "Can only convert list of 2D numpy arrays with unequal " + "length data to 2D numpy array if pad_unequal is True, " + "found different series lengths." + ) + max_len = max(x.shape[1] for x in X) arr = np.zeros((len(X), X[0].shape[0], max_len)) @@ -160,9 +179,9 @@ def _convert_X( return arr.reshape((arr.shape[0], -1)) else: raise ValueError( - "Can only convert list of 2D numpy arrays with 1 channel to 2D " - "numpy array if concatenate_channels is True, found " - f"{X[0].shape[0]} channels." + "Can only convert list of 2D numpy arrays with more than 1 " + "channel to 2D numpy array if concatenate_channels is True, " + f"found {X[0].shape[0]} channels." ) else: raise ValueError( diff --git a/tsml/dummy/_dummy.py b/tsml/dummy/_dummy.py index 0a323b3..9cb4035 100644 --- a/tsml/dummy/_dummy.py +++ b/tsml/dummy/_dummy.py @@ -112,7 +112,7 @@ def fit(self, X, y): random_state=self.random_state, constant=self.constant, ) - self._clf.fit(np.zeros(X.shape), y) + self._clf.fit(None, y) return self @@ -141,7 +141,10 @@ def predict_proba(self, X) -> np.ndarray: return self._clf.predict_proba(np.zeros(X.shape)) def _more_tags(self): - return {"X_types": ["3darray", "2darray", "np_list"]} + return { + "X_types": ["3darray", "2darray", "np_list"], + "equal_length_only": False, + } class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator): @@ -205,12 +208,12 @@ def __init__(self, strategy="mean", constant=None, quantile=None): def fit(self, X, y): """""" - X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1) + _, y = self._validate_data(X=X, y=y, ensure_min_series_length=1) self._reg = SklearnDummyRegressor( strategy=self.strategy, constant=self.constant, quantile=self.quantile ) - self._reg.fit(np.zeros(X.shape), y) + self._reg.fit(None, y) return self @@ -223,7 +226,10 @@ def predict(self, X): return self._reg.predict(np.zeros(X.shape)) def _more_tags(self): - return {"X_types": ["3darray", "2darray", "np_list"]} + return { + "X_types": ["3darray", "2darray", "np_list"], + "equal_length_only": False, + } class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator): @@ -291,4 +297,7 @@ def predict(self, X): raise ValueError(f"Unknown strategy {self.strategy}") def _more_tags(self): - return {"X_types": ["3darray", "2darray", "np_list"]} + return { + "X_types": ["3darray", "2darray", "np_list"], + "equal_length_only": False, + } diff --git a/tsml/tests/_sklearn_checks.py b/tsml/tests/_sklearn_checks.py index 7525a53..ec01e37 100644 --- a/tsml/tests/_sklearn_checks.py +++ b/tsml/tests/_sklearn_checks.py @@ -1329,16 +1329,16 @@ def check_classifiers_train( "fit." ) - if not tags["no_validation"]: - if tags["pairwise"]: - with raises( - ValueError, - err_msg=msg_pairwise.format(name, "predict"), - ): - classifier.predict(X.reshape(-1, 1)) - else: - with raises(ValueError, err_msg=msg.format(name, "predict")): - classifier.predict(X.T) + # if not tags["no_validation"]: + # if tags["pairwise"]: + # with raises( + # ValueError, + # err_msg=msg_pairwise.format(name, "predict"), + # ): + # classifier.predict(X.reshape(-1, 1)) + # else: + # with raises(ValueError, err_msg=msg.format(name, "predict")): + # classifier.predict(X.T) if hasattr(classifier, "decision_function"): try: # decision_function agrees with predict @@ -1378,20 +1378,20 @@ def check_classifiers_train( assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples)) - if not tags["no_validation"]: - # raises error on malformed input for predict_proba - if tags["pairwise"]: - with raises( - ValueError, - err_msg=msg_pairwise.format(name, "predict_proba"), - ): - classifier.predict_proba(X.reshape(-1, 1)) - else: - with raises( - ValueError, - err_msg=msg.format(name, "predict_proba"), - ): - classifier.predict_proba(X.T) + # if not tags["no_validation"]: + # # raises error on malformed input for predict_proba + # if tags["pairwise"]: + # with raises( + # ValueError, + # err_msg=msg_pairwise.format(name, "predict_proba"), + # ): + # classifier.predict_proba(X.reshape(-1, 1)) + # else: + # with raises( + # ValueError, + # err_msg=msg.format(name, "predict_proba"), + # ): + # classifier.predict_proba(X.T) if hasattr(classifier, "predict_log_proba"): # predict_log_proba is a transformation of predict_proba y_log_prob = classifier.predict_log_proba(X) diff --git a/tsml/tests/test_interface.py b/tsml/tests/test_interface.py index ad5ef23..6ed827b 100644 --- a/tsml/tests/test_interface.py +++ b/tsml/tests/test_interface.py @@ -32,7 +32,7 @@ def _generate_conversion_test_X(data_type): def test_convert_X_to_3d_array(input_type): est = _3dArrayDummy() X, old_shape = _generate_conversion_test_X(input_type) - X = est._convert_X(X) + X = est._convert_X(X, pad_unequal=True) assert isinstance(X, np.ndarray) assert X.ndim == 3 @@ -45,7 +45,7 @@ def test_convert_X_to_3d_array(input_type): def test_convert_X_to_2d_array(input_type): est = _2dArrayDummy() X, old_shape = _generate_conversion_test_X(input_type) - X = est._convert_X(X, concatenate_channels=True) + X = est._convert_X(X, concatenate_channels=True, pad_unequal=True) assert isinstance(X, np.ndarray) assert X.ndim == 2