From 37c88a9f4308789dcbc3ea7685a8cd6fb6a964a1 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Thu, 2 Mar 2023 19:13:59 +0000 Subject: [PATCH 1/4] actually change python version --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8bdaae7..e339999 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,7 +40,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: ${{ matrix.python-version }} - name: Install run: python -m pip install .[dev,optional_dependencies] From 7c971df29ff95564c0ad7920ea8c2486ffb8fe2e Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Sun, 5 Mar 2023 00:52:11 +0000 Subject: [PATCH 2/4] dummy classifiers and sklearn lower bound change --- pyproject.toml | 7 +- tsml/__init__.py | 2 +- tsml/dummy/__init__.py | 8 + tsml/dummy/_dummy.py | 311 ++++++++++++++++++ tsml/feature_based/_catch22_classifier.py | 3 + tsml/interval_based/_base.py | 5 +- tsml/interval_based/_cif.py | 4 +- tsml/interval_based/_interval_pipelines.py | 2 +- tsml/interval_based/_rise.py | 2 +- tsml/interval_based/_tsf.py | 4 +- tsml/shapelet_based/_stc.py | 5 +- tsml/tests/_sklearn_checks.py | 8 +- tsml/transformations/catch22.py | 3 +- tsml/transformations/interval_extraction.py | 3 +- tsml/transformations/shapelet_transform.py | 3 +- tsml/utils/testing.py | 2 +- tsml/{sklearn => vector}/__init__.py | 4 +- tsml/{sklearn => vector}/_cit.py | 2 +- tsml/{sklearn => vector}/_rotation_forest.py | 5 +- tsml/{sklearn => vector}/tests/__init__.py | 0 .../tests/test_rotation_forest.py | 2 +- 21 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 tsml/dummy/_dummy.py rename tsml/{sklearn => vector}/__init__.py (51%) rename tsml/{sklearn => vector}/_cit.py (99%) rename tsml/{sklearn => vector}/_rotation_forest.py (99%) rename tsml/{sklearn => vector}/tests/__init__.py (100%) rename tsml/{sklearn => vector}/tests/test_rotation_forest.py (94%) diff --git a/pyproject.toml b/pyproject.toml index 0633d94..8a460df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "tsml" -version = "0.0.2" +version = "0.0.3" description = "A toolkit for time series machine learning algorithms." authors = [ {name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"}, @@ -37,7 +37,7 @@ classifiers = [ dependencies = [ "numba>=0.55", "numpy>=1.21.0", - "scikit-learn>=1.2.1", + "scikit-learn>=1.0.2", ] [project.optional-dependencies] @@ -76,7 +76,6 @@ include = ["tsml"] ignore = [ "examples/**", "docs/**", - "requirements.txt", "*.yaml", "*.yml", ".coveragerc", @@ -88,6 +87,8 @@ extend-ignore = ["E203"] [tool.pytest.ini_options] addopts = ''' + --ignore examples + --ignore docs --durations 10 --timeout 600 --showlocals diff --git a/tsml/__init__.py b/tsml/__init__.py index f4d0e00..68f6ccb 100644 --- a/tsml/__init__.py +++ b/tsml/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- """tsml.""" -__version__ = "0.0.1" +__version__ = "0.0.3" diff --git a/tsml/dummy/__init__.py b/tsml/dummy/__init__.py index 7b8c881..2983ded 100644 --- a/tsml/dummy/__init__.py +++ b/tsml/dummy/__init__.py @@ -1,2 +1,10 @@ # -*- coding: utf-8 -*- """Dummy estimators.""" + +__all__ = [ + "DummyClassifier", + "DummyRegressor", + "DummyClusterer", +] + +from tsml.dummy._dummy import DummyClassifier, DummyClusterer, DummyRegressor diff --git a/tsml/dummy/_dummy.py b/tsml/dummy/_dummy.py new file mode 100644 index 0000000..8e349ab --- /dev/null +++ b/tsml/dummy/_dummy.py @@ -0,0 +1,311 @@ +# -*- coding: utf-8 -*- +"""Dummy time series estimators.""" + +__author__ = ["MatthewMiddlehurst"] +__all__ = ["DummyClassifier", "DummyRegressor", "DummyClusterer"] + +import numpy as np +from sklearn.base import ClassifierMixin, ClusterMixin, RegressorMixin +from sklearn.dummy import DummyClassifier as SklearnDummyClassifier +from sklearn.dummy import DummyRegressor as SklearnDummyRegressor +from sklearn.utils import check_random_state +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import check_is_fitted + +from tsml.base import BaseTimeSeriesEstimator + + +class DummyClassifier(ClassifierMixin, BaseTimeSeriesEstimator): + """DummyClassifier makes predictions that ignore the input features. + + This classifier serves as a simple baseline to compare against other more + complex classifiers. Do not use it for real problems. + + The specific behavior of the baseline is selected with the `strategy` + parameter. + + All strategies make predictions that ignore the input feature values passed + as the `X` argument to `fit` and `predict`. The predictions, however, + typically depend on values observed in the `y` parameter passed to `fit`. + + A wrapper for `sklearn.dummy.DummyClassifier` using the tsml interface. Functionally + identical. + + Parameters + ---------- + strategy : {"most_frequent", "prior", "stratified", "uniform", \ + "constant"}, default="prior" + Strategy to use to generate predictions. + + * "most_frequent": the `predict` method always returns the most + frequent class label in the observed `y` argument passed to `fit`. + The `predict_proba` method returns the matching one-hot encoded + vector. + * "prior": the `predict` method always returns the most frequent + class label in the observed `y` argument passed to `fit` (like + "most_frequent"). ``predict_proba`` always returns the empirical + class distribution of `y` also known as the empirical class prior + distribution. + * "stratified": the `predict_proba` method randomly samples one-hot + vectors from a multinomial distribution parametrized by the empirical + class prior probabilities. + The `predict` method returns the class label which got probability + one in the one-hot vector of `predict_proba`. + Each sampled row of both methods is therefore independent and + identically distributed. + * "uniform": generates predictions uniformly at random from the list + of unique classes observed in `y`, i.e. each class has equal + probability. + * "constant": always predicts a constant label that is provided by + the user. This is useful for metrics that evaluate a non-majority + class. + random_state : int, RandomState instance or None, default=None + Controls the randomness to generate the predictions when + ``strategy='stratified'`` or ``strategy='uniform'``. + Pass an int for reproducible output across multiple function calls. + See :term:`Glossary `. + constant : int or str or array-like of shape (n_outputs,), default=None + The explicit constant as predicted by the "constant" strategy. This + parameter is useful only for the "constant" strategy. + + See Also + -------- + DummyRegressor : Regressor that makes predictions using simple rules. + + Examples + -------- + >>> from tsml.dummy import DummyClassifier + >>> from tsml.datasets import load_minimal_chinatown + >>> X_train, y_train = load_minimal_chinatown(split="train") + >>> X_test, y_test = load_minimal_chinatown(split="test") + >>> clf = DummyClassifier(strategy="most_frequent") + >>> clf.fit(X_train, y_train) + DummyClassifier(strategy='most_frequent') + >>> clf.score(X_test, y_test) + 0.5 + """ + + def __init__(self, strategy="prior", random_state=None, constant=None): + self.strategy = strategy + self.random_state = random_state + self.constant = constant + + super(DummyClassifier, self).__init__() + + def fit(self, X, y): + """""" + X, y = self._validate_data(X=X, y=y) + + check_classification_targets(y) + + self.n_instances_, self.n_dims_, self.series_length_ = X.shape + self.classes_ = np.unique(y) + self.n_classes_ = self.classes_.shape[0] + self.class_dictionary_ = {} + for index, classVal in enumerate(self.classes_): + self.class_dictionary_[classVal] = index + + if len(self.classes_) == 1: + return self + + self._clf = SklearnDummyClassifier( + strategy=self.strategy, + random_state=self.random_state, + constant=self.constant, + ) + self._clf.fit(np.zeros(X.shape), y) + + return self + + def predict(self, X) -> np.ndarray: + """""" + check_is_fitted(self) + + # treat case of single class seen in fit + if self.n_classes_ == 1: + return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0) + + X = self._validate_data(X=X, reset=False) + + return self._clf.predict(np.zeros(X.shape)) + + def predict_proba(self, X) -> np.ndarray: + """""" + check_is_fitted(self) + + # treat case of single class seen in fit + if self.n_classes_ == 1: + return np.repeat([[1]], X.shape[0], axis=0) + + X = self._validate_data(X=X, reset=False) + + return self._clf.predict_proba(np.zeros(X.shape)) + + +class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator): + """DummyRegressor makes predictions that ignore the input features. + + This regressor is useful as a simple baseline to compare with other + (real) regressors. Do not use it for real problems. + + The specific behavior of the baseline is selected with the `strategy` + parameter. + + All strategies make predictions that ignore the input feature values passed + as the `X` argument to `fit` and `predict`. The predictions, however, + typically depend on values observed in the `y` parameter passed to `fit`. + + A wrapper for `sklearn.dummy.DummyRegressor` using the tsml interface. Functionally + identical. + + Parameters + ---------- + strategy : {"mean", "median", "quantile", "constant"}, default="mean" + Strategy to use to generate predictions. + + * "mean": always predicts the mean of the training set + * "median": always predicts the median of the training set + * "quantile": always predicts a specified quantile of the training set, + provided with the quantile parameter. + * "constant": always predicts a constant value that is provided by + the user. + constant : int or float or array-like of shape (n_outputs,), default=None + The explicit constant as predicted by the "constant" strategy. This + parameter is useful only for the "constant" strategy. + quantile : float in [0.0, 1.0], default=None + The quantile to predict using the "quantile" strategy. A quantile of + 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the + maximum. + + See Also + -------- + DummyClassifier : Classifier that makes predictions using simple rules. + + Examples + -------- + >>> from tsml.dummy import DummyRegressor + >>> from tsml.datasets import load_minimal_gas_prices + >>> X_train, y_train = load_minimal_gas_prices(split="train") + >>> X_test, y_test = load_minimal_gas_prices(split="test") + >>> reg = DummyRegressor() + >>> reg.fit(X_train, y_train) + DummyRegressor() + >>> reg.score(X_test, y_test) + -0.07184048625633688 + """ + + def __init__(self, strategy="mean", constant=None, quantile=None): + self.strategy = strategy + self.constant = constant + self.quantile = quantile + + super(DummyRegressor, self).__init__() + + def fit(self, X, y): + """""" + X, y = self._validate_data(X=X, y=y) + + self._reg = SklearnDummyRegressor( + strategy=self.strategy, constant=self.constant, quantile=self.quantile + ) + self._reg.fit(np.zeros(X.shape), y) + + return self + + def predict(self, X): + """""" + check_is_fitted(self) + + X = self._validate_data(X=X, reset=False) + + return self._reg.predict(np.zeros(X.shape)) + + +class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator): + """DummyRegressor makes predictions that ignore the input features. + + This cluster makes no effort to form reasonable clusters, and is primarily used + for interface testing. Do not use it for real problems. + + All strategies make predictions that ignore the input feature values passed + as the `X` argument to `fit` and `predict`. + + todo example adjusted_rand_score + + Examples + -------- + >>> from tsml.dummy import DummyClusterer + >>> from tsml.datasets import load_minimal_chinatown + >>> X_train, _ = load_minimal_chinatown(split="train") + >>> X_test, _ = load_minimal_chinatown(split="test") + >>> clu = DummyClusterer(random_state=0) + >>> clu.fit(X_train) + DummyClusterer(random_state=0) + >>> clu.labels_ + array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]) + >>> clu.predict(X_test) + array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]) + """ + + def __init__(self, strategy="single", n_clusters=2, random_state=None): + self.strategy = strategy + self.n_clusters = n_clusters + self.random_state = random_state + + super(DummyClusterer, self).__init__() + + def fit(self, X, y=None): + """""" + X = self._validate_data(X=X) + + if self.strategy == "single": + self.labels_ = np.zeros(len(X), dtype=np.int32) + elif self.strategy == "unique": + self.labels_ = np.arange(len(X), dtype=np.int32) + elif self.strategy == "random": + rng = check_random_state(self.random_state) + self.labels_ = rng.randint(self.n_clusters, size=len(X), dtype=np.int32) + else: + raise ValueError(f"Unknown strategy {self.strategy}") + + return self + + def predict(self, X): + """""" + check_is_fitted(self) + + X = self._validate_data(X=X, reset=False) + + if self.strategy == "single": + return np.zeros(len(X), dtype=np.int32) + elif self.strategy == "unique": + return np.arange(len(X), dtype=np.int32) + elif self.strategy == "random": + rng = check_random_state(self.random_state) + return rng.randint(self.n_clusters, size=len(X), dtype=np.int32) + else: + raise ValueError(f"Unknown strategy {self.strategy}") + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + For classifiers, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + return {} diff --git a/tsml/feature_based/_catch22_classifier.py b/tsml/feature_based/_catch22_classifier.py index eb3ec00..eb7dfc4 100644 --- a/tsml/feature_based/_catch22_classifier.py +++ b/tsml/feature_based/_catch22_classifier.py @@ -10,6 +10,7 @@ import numpy as np from sklearn.base import ClassifierMixin, RegressorMixin from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator, _clone_estimator @@ -113,6 +114,8 @@ def fit(self, X, y): X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3 ) + check_classification_targets(y) + self.n_instances_, self.n_dims_, self.series_length_ = X.shape self.classes_ = np.unique(y) self.n_classes_ = self.classes_.shape[0] diff --git a/tsml/interval_based/_base.py b/tsml/interval_based/_base.py index cfba9f8..52914f5 100644 --- a/tsml/interval_based/_base.py +++ b/tsml/interval_based/_base.py @@ -9,20 +9,21 @@ import warnings import numpy as np +from joblib import Parallel from sklearn.base import BaseEstimator, is_classifier, is_regressor from sklearn.tree import BaseDecisionTree, DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import check_random_state -from sklearn.utils.parallel import Parallel, delayed +from sklearn.utils.fixes import delayed from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator, _clone_estimator -from tsml.sklearn import CITClassifier from tsml.transformations.interval_extraction import ( RandomIntervalTransformer, SupervisedIntervalTransformer, ) from tsml.utils.numba_functions.stats import row_mean, row_slope, row_std from tsml.utils.validation import check_n_jobs, is_transformer +from tsml.vector import CITClassifier class BaseIntervalForest(BaseTimeSeriesEstimator): diff --git a/tsml/interval_based/_cif.py b/tsml/interval_based/_cif.py index f23de4b..524feb3 100644 --- a/tsml/interval_based/_cif.py +++ b/tsml/interval_based/_cif.py @@ -8,9 +8,9 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from tsml.interval_based._base import BaseIntervalForest -from tsml.sklearn import CITClassifier from tsml.transformations.catch22 import Catch22Transformer from tsml.utils.numba_functions.stats import row_mean, row_slope, row_std +from tsml.vector import CITClassifier class CIFClassifier(ClassifierMixin, BaseIntervalForest): @@ -200,6 +200,8 @@ def __init__( # interval_features = [Catch22(outlier_norm=True), None, None, None] + # check_classification_targets(y) + super(DrCIFClassifier, self).__init__( base_estimator=base_estimator, n_estimators=n_estimators, diff --git a/tsml/interval_based/_interval_pipelines.py b/tsml/interval_based/_interval_pipelines.py index 82979e1..83e899e 100644 --- a/tsml/interval_based/_interval_pipelines.py +++ b/tsml/interval_based/_interval_pipelines.py @@ -18,10 +18,10 @@ from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator, _clone_estimator -from tsml.sklearn import RotationForestClassifier from tsml.transformations.catch22 import Catch22Transformer from tsml.transformations.interval_extraction import RandomIntervalTransformer from tsml.utils.validation import check_n_jobs +from tsml.vector import RotationForestClassifier class RandomIntervalClassifier(ClassifierMixin, BaseTimeSeriesEstimator): diff --git a/tsml/interval_based/_rise.py b/tsml/interval_based/_rise.py index a657c25..ddfdc68 100644 --- a/tsml/interval_based/_rise.py +++ b/tsml/interval_based/_rise.py @@ -8,7 +8,7 @@ from sklearn.tree import DecisionTreeClassifier from tsml.interval_based._base import BaseIntervalForest -from tsml.sklearn import CITClassifier +from tsml.vector import CITClassifier class RISEClassifier(ClassifierMixin, BaseIntervalForest): diff --git a/tsml/interval_based/_tsf.py b/tsml/interval_based/_tsf.py index 55fbddd..6ecbabc 100644 --- a/tsml/interval_based/_tsf.py +++ b/tsml/interval_based/_tsf.py @@ -5,11 +5,9 @@ import numpy as np from sklearn.base import ClassifierMixin, RegressorMixin -from sklearn.tree import DecisionTreeClassifier -from sklearn.utils.parallel import Parallel, delayed from tsml.interval_based._base import BaseIntervalForest -from tsml.sklearn import CITClassifier +from tsml.vector import CITClassifier class TSFClassifier(ClassifierMixin, BaseIntervalForest): diff --git a/tsml/shapelet_based/_stc.py b/tsml/shapelet_based/_stc.py index 8c19bfb..469823d 100644 --- a/tsml/shapelet_based/_stc.py +++ b/tsml/shapelet_based/_stc.py @@ -10,12 +10,13 @@ import numpy as np from sklearn.base import ClassifierMixin +from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator, _clone_estimator -from tsml.sklearn import RotationForestClassifier from tsml.transformations.shapelet_transform import RandomShapeletTransform from tsml.utils.validation import check_n_jobs +from tsml.vector import RotationForestClassifier class ShapeletTransformClassifier(ClassifierMixin, BaseTimeSeriesEstimator): @@ -162,6 +163,8 @@ def fit(self, X, y): """ X, y = self._validate_data(X=X, y=y, ensure_min_samples=2) + check_classification_targets(y) + self.n_instances_, self.n_dims_, self.series_length_ = X.shape self.classes_ = np.unique(y) self.n_classes_ = self.classes_.shape[0] diff --git a/tsml/tests/_sklearn_checks.py b/tsml/tests/_sklearn_checks.py index cb8ef75..9c5116e 100644 --- a/tsml/tests/_sklearn_checks.py +++ b/tsml/tests/_sklearn_checks.py @@ -1101,14 +1101,15 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False): X, y = test_utils.generate_test_data() clusterer = clone(clusterer_orig) - X = StandardScaler().fit_transform(X) rng = np.random.RandomState(7) - X_noise = np.concatenate([X, rng.uniform(low=-3, high=3, size=(5, 2))]) + X_noise = np.concatenate( + [X, rng.uniform(low=-3, high=3, size=(5, X.shape[1], X.shape[2]))] + ) if readonly_memmap: X, y, X_noise = create_memmap_backed_data([X, y, X_noise]) - n_samples, n_features = X.shape + n_samples, n_dims, series_length = X.shape # catch deprecation and neighbors warnings if hasattr(clusterer, "n_clusters"): clusterer.set_params(n_clusters=3) @@ -1121,7 +1122,6 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False): pred = clusterer.labels_ assert pred.shape == (n_samples,) - assert adjusted_rand_score(pred, y) > 0.4 if _safe_tags(clusterer, key="non_deterministic"): return set_random_state(clusterer) diff --git a/tsml/transformations/catch22.py b/tsml/transformations/catch22.py index 092ffe2..c6fbc4e 100644 --- a/tsml/transformations/catch22.py +++ b/tsml/transformations/catch22.py @@ -10,9 +10,10 @@ import math import numpy as np -from joblib import Parallel, delayed +from joblib import Parallel from numba import njit from sklearn.base import TransformerMixin +from sklearn.utils.fixes import delayed from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator diff --git a/tsml/transformations/interval_extraction.py b/tsml/transformations/interval_extraction.py index 343539c..b8992ba 100644 --- a/tsml/transformations/interval_extraction.py +++ b/tsml/transformations/interval_extraction.py @@ -8,10 +8,11 @@ __all__ = ["RandomIntervalTransformer", "SupervisedIntervalTransformer"] import numpy as np -from joblib import Parallel, delayed +from joblib import Parallel from sklearn import preprocessing from sklearn.base import TransformerMixin from sklearn.utils import check_random_state +from sklearn.utils.fixes import delayed from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator, _clone_estimator diff --git a/tsml/transformations/shapelet_transform.py b/tsml/transformations/shapelet_transform.py index 98ef08c..ac32db0 100644 --- a/tsml/transformations/shapelet_transform.py +++ b/tsml/transformations/shapelet_transform.py @@ -12,12 +12,13 @@ import time import numpy as np -from joblib import Parallel, delayed +from joblib import Parallel from numba import njit from numba.typed.typedlist import List from sklearn import preprocessing from sklearn.base import TransformerMixin from sklearn.utils import check_random_state +from sklearn.utils.fixes import delayed from sklearn.utils.validation import check_is_fitted from tsml.base import BaseTimeSeriesEstimator diff --git a/tsml/utils/testing.py b/tsml/utils/testing.py index 62d5925..aab0216 100644 --- a/tsml/utils/testing.py +++ b/tsml/utils/testing.py @@ -82,7 +82,7 @@ def parametrize_with_checks(estimators: List[BaseEstimator]) -> Callable: -------- >>> from tsml.utils.testing import parametrize_with_checks >>> from tsml.interval_based import TSFRegressor - >>> from tsml.sklearn import RotationForestClassifier + >>> from tsml.vector import RotationForestClassifier >>> @parametrize_with_checks( ... [TSFRegressor(), RotationForestClassifier()] ... ) diff --git a/tsml/sklearn/__init__.py b/tsml/vector/__init__.py similarity index 51% rename from tsml/sklearn/__init__.py rename to tsml/vector/__init__.py index a6fb66b..1fc0a4b 100644 --- a/tsml/sklearn/__init__.py +++ b/tsml/vector/__init__.py @@ -6,5 +6,5 @@ "CITClassifier", ] -from tsml.sklearn._cit import CITClassifier -from tsml.sklearn._rotation_forest import RotationForestClassifier +from tsml.vector._cit import CITClassifier +from tsml.vector._rotation_forest import RotationForestClassifier diff --git a/tsml/sklearn/_cit.py b/tsml/vector/_cit.py similarity index 99% rename from tsml/sklearn/_cit.py rename to tsml/vector/_cit.py index 8ed0ae2..eeb75df 100644 --- a/tsml/sklearn/_cit.py +++ b/tsml/vector/_cit.py @@ -72,7 +72,7 @@ class CITClassifier(ClassifierMixin, BaseEstimator): Examples -------- - >>> from tsml.sklearn import CITClassifier + >>> from tsml.vector import CITClassifier >>> from tsml.datasets import load_minimal_chinatown >>> X_train, y_train = load_minimal_chinatown(split="train") >>> X_test, y_test = load_minimal_chinatown(split="test") diff --git a/tsml/sklearn/_rotation_forest.py b/tsml/vector/_rotation_forest.py similarity index 99% rename from tsml/sklearn/_rotation_forest.py rename to tsml/vector/_rotation_forest.py index 55493b9..e2488ba 100644 --- a/tsml/sklearn/_rotation_forest.py +++ b/tsml/vector/_rotation_forest.py @@ -11,11 +11,12 @@ import time import numpy as np -from joblib import Parallel, delayed +from joblib import Parallel from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.decomposition import PCA from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_random_state +from sklearn.utils.fixes import delayed from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import check_is_fitted @@ -101,7 +102,7 @@ class RotationForestClassifier(ClassifierMixin, BaseEstimator): Examples -------- - >>> from tsml.sklearn import RotationForestClassifier + >>> from tsml.vector import RotationForestClassifier >>> from tsml.datasets import load_minimal_chinatown >>> X_train, y_train = load_minimal_chinatown(split="train") >>> X_test, y_test = load_minimal_chinatown(split="test") diff --git a/tsml/sklearn/tests/__init__.py b/tsml/vector/tests/__init__.py similarity index 100% rename from tsml/sklearn/tests/__init__.py rename to tsml/vector/tests/__init__.py diff --git a/tsml/sklearn/tests/test_rotation_forest.py b/tsml/vector/tests/test_rotation_forest.py similarity index 94% rename from tsml/sklearn/tests/test_rotation_forest.py rename to tsml/vector/tests/test_rotation_forest.py index cec6b82..b9da88c 100644 --- a/tsml/sklearn/tests/test_rotation_forest.py +++ b/tsml/vector/tests/test_rotation_forest.py @@ -6,7 +6,7 @@ import numpy as np from tsml.datasets import load_minimal_chinatown -from tsml.sklearn import RotationForestClassifier +from tsml.vector import RotationForestClassifier def test_contracted_rotf(): From 1afddd490a5817bd463d59d6c7b66d55df7f7318 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Sun, 5 Mar 2023 00:56:17 +0000 Subject: [PATCH 3/4] test fix --- tsml/dummy/_dummy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsml/dummy/_dummy.py b/tsml/dummy/_dummy.py index 8e349ab..a6382ac 100644 --- a/tsml/dummy/_dummy.py +++ b/tsml/dummy/_dummy.py @@ -238,9 +238,9 @@ class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator): >>> from tsml.datasets import load_minimal_chinatown >>> X_train, _ = load_minimal_chinatown(split="train") >>> X_test, _ = load_minimal_chinatown(split="test") - >>> clu = DummyClusterer(random_state=0) + >>> clu = DummyClusterer(strategy="random", random_state=0) >>> clu.fit(X_train) - DummyClusterer(random_state=0) + DummyClusterer(random_state=0, strategy='random') >>> clu.labels_ array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]) >>> clu.predict(X_test) From 45b3d7eee7e45ef3ef2ab37be0bf576fb87f61d5 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Sun, 5 Mar 2023 01:00:53 +0000 Subject: [PATCH 4/4] test fix --- tsml/dummy/_dummy.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tsml/dummy/_dummy.py b/tsml/dummy/_dummy.py index a6382ac..1523915 100644 --- a/tsml/dummy/_dummy.py +++ b/tsml/dummy/_dummy.py @@ -236,15 +236,16 @@ class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator): -------- >>> from tsml.dummy import DummyClusterer >>> from tsml.datasets import load_minimal_chinatown - >>> X_train, _ = load_minimal_chinatown(split="train") - >>> X_test, _ = load_minimal_chinatown(split="test") + >>> from sklearn.metrics import adjusted_rand_score + >>> X_train, y_train = load_minimal_chinatown(split="train") + >>> X_test, y_test = load_minimal_chinatown(split="test") >>> clu = DummyClusterer(strategy="random", random_state=0) >>> clu.fit(X_train) DummyClusterer(random_state=0, strategy='random') - >>> clu.labels_ - array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]) - >>> clu.predict(X_test) - array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]) + >>> adjusted_rand_score(clu.labels_, y_train) + 0.2087729039422543 + >>> adjusted_rand_score(clu.predict(X_test), y_test) + 0.2087729039422543 """ def __init__(self, strategy="single", n_clusters=2, random_state=None):