sktime · TonyBagnall · Oct 19, 2021 · Oct 13, 2021 · Oct 13, 2021 · Oct 13, 2021
@@ -160,3 +160,4 @@ sktime/contrib/nothing_to_see_here.py
 sktime/distances/elastic_cython.html
 *.html
 sktime/contrib/distance_based/old_time_series_neighbors.py
+sktime/contrib/debug.py
@@ -49,7 +49,6 @@ class name: BaseEstimator
 __all__ = ["BaseEstimator", "BaseObject"]
 
 import inspect
-
 from copy import deepcopy
 
 from sklearn import clone
@@ -142,17 +141,19 @@ def get_tag(self, tag_name, tag_value_default=None, raise_error=True):
         tag_value_default : any type, optional; default=None
             Default/fallback value if tag is not found
         raise_error : bool
-            whether a ValueError is raised when the tag is not found
+            whether a ValueError is raised when the tag is not found and
+            tag_value_default is None
 
         Returns
         -------
         tag_value :
             Value of the `tag_name` tag in self. If not found, returns
-            `tag_value_default`.
+            `tag_value_default`, or raises a ValueError if `tag_value_default` is None.
 
         Raises
         ------
-        ValueError if raise_error is True and tag_name does not exist
+        ValueError if raise_error is True, tag_name does not exist and
+        `tag_value_default` is None.
             i.e., if tag_name is not in self.get_tags().keys()
         """
         collected_tags = self.get_tags()

@@ -8,6 +8,7 @@ class name: BaseClassifier
 Scitype defining methods:
     fitting         - fit(self, X, y)
     predicting      - predict(self, X)
+                    - predict_proba(self, X)
 
 State:
     fitted model/strategy   - by convention, any attributes ending in "_"
@@ -26,60 +27,39 @@ class name: BaseClassifier
 
 __all__ = [
     "BaseClassifier",
-    "classifier_list",
 ]
-__author__ = ["mloning", "fkiraly"]
+__author__ = ["mloning", "fkiraly", "TonyBagnall"]
 
 import numpy as np
 
 from sktime.base import BaseEstimator
+from sktime.utils.validation import check_n_jobs
 from sktime.utils.validation.panel import check_X, check_X_y
 
-"""
-Main list of classifiers extending this class. For clarity, some utility classifiers,
-such as Proximity Stump, are not listed.
-"""
-classifier_list = [
-    # in classification/distance_based
-    "ProximityForest",
-    # "KNeighborsTimeSeriesClassifier",
-    # "ElasticEnsemble",
-    # "ShapeDTW",
-    # in classification/dictionary_based
-    "BOSS",
-    "ContractableBOSS",
-    "TemporalDictionaryEnsemble",
-    "WEASEL",
-    "MUSE",
-    # in classification/interval_based
-    "RandomIntervalSpectralForest",
-    "TimeSeriesForest",
-    "CanonicalIntervalForest",
-    # in classification/shapelet_based
-    "ShapeletTransformClassifier",
-    "ROCKET",
-    "MrSEQLClassifier",
-]
-
 
 class BaseClassifier(BaseEstimator):
     """Base time series classifier template class.
 
-    The base classifier specifies the methods and method
-    signatures that all forecasters have to implement.
-
-    Specific implementations of these methods is deferred to concrete
-    forecasters.
+    The base classifier specifies the methods and method signatures that all
+    classifiers have to implement.
     """
 
     _tags = {
         "coerce-X-to-numpy": True,
         "coerce-X-to-pandas": False,
+        "capability:multivariate": False,
+        "capability:unequal_length": False,
+        "capability:missing_values": False,
+        "capability:train_estimate": False,
+        "capability:contractable": False,
+        "capability:multithreading": False,
     }
 
     def __init__(self):
-        self._is_fitted = False
-
+        self.classes_ = []
+        self.n_classes_ = 0
+        self._class_dictionary = {}
+        self._threads_to_use = 1
         super(BaseClassifier, self).__init__()
 
     def fit(self, X, y):
@@ -89,8 +69,7 @@ def fit(self, X, y):
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
-                or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
         y : array-like, shape =  [n_instances] - the class labels.
 
         Returns
@@ -103,12 +82,31 @@ def fit(self, X, y):
         Changes state by creating a fitted model that updates attributes
         ending in "_" and sets is_fitted flag to True.
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
         X, y = check_X_y(
-            X, y, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
+            X,
+            y,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
         )
 
+        multithread = self.get_tag("capability:multithreading")
+        if multithread:
+            try:
+                self._threads_to_use = check_n_jobs(self.n_jobs)
+            except NameError:
+                raise AttributeError(
+                    "self.n_jobs must be set if capability:multithreading is True"
+                )
+
+        self.classes_ = np.unique(y)
+        self.n_classes_ = self.classes_.shape[0]
+        for index, classVal in enumerate(self.classes_):
+            self._class_dictionary[classVal] = index
+
         self._fit(X, y)
 
         # this should happen last
@@ -124,22 +122,25 @@ def predict(self, X):
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
         y : array-like, shape =  [n_instances] - predicted class labels
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
-        X = check_X(
-            X, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
-        )
         self.check_is_fitted()
 
-        y = self._predict(X)
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
+        X = check_X(
+            X,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
+        )
 
-        return y
+        return self._predict(X)
 
     def predict_proba(self, X):
         """Predicts labels probabilities for sequences in X.
@@ -149,18 +150,25 @@ def predict_proba(self, X):
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
-        y : array-like, shape =  [n_instances, n_classes] - predictive pmf
+        y : array-like, shape =  [n_instances, n_classes] - estimated class
+        probabilities
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
+        self.check_is_fitted()
+
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
         X = check_X(
-            X, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
+            X,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
         )
-        self.check_is_fitted()
+
         return self._predict_proba(X)
 
     def score(self, X, y):
@@ -171,7 +179,7 @@ def score(self, X, y):
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
         y : array-like, shape =  [n_instances] - predicted class labels
 
         Returns
@@ -185,14 +193,14 @@ def score(self, X, y):
     def _fit(self, X, y):
         """Fit time series classifier to training data.
 
-        core logic
+        Abstract method
 
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
         y : array-like, shape = [n_instances] - the class labels
 
         Returns
@@ -205,46 +213,52 @@ def _fit(self, X, y):
         Changes state by creating a fitted model that updates attributes
         ending in "_" and sets is_fitted flag to True.
         """
-        raise NotImplementedError("abstract method")
+        raise NotImplementedError(
+            "_fit is a protected abstract method, it must be implemented."
+        )
 
     def _predict(self, X):
         """Predicts labels for sequences in X.
 
-        core logic
+        Abstract class, must be implemented.
 
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
         y : array-like, shape =  [n_instances] - predicted class labels
         """
-        distributions = self.predict_proba(X)
-        predictions = []
-        for instance_index in range(0, X.shape[0]):
-            distribution = distributions[instance_index]
-            prediction = np.argmax(distribution)
-            predictions.append(prediction)
-        y = self.label_encoder.inverse_transform(predictions)
-
-        return y
+        raise NotImplementedError(
+            "_predict is a protected abstract method, it must be implemented."
+        )
 
     def _predict_proba(self, X):
         """Predicts labels probabilities for sequences in X.
 
+        Default behaviour is to call _predict and set the predicted class probability
+        to 1, other class probabilities to 0. Override if better estimates are
+        obtainable.
+
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
-        y : array-like, shape =  [n_instances, n_classes] - predictive pmf
+        y : array-like, shape =  [n_instances, n_classes] - estimated probabilities
+        of class membership.
         """
-        raise NotImplementedError("abstract method")
+        dists = np.zeros((X.shape[0], self.n_classes_))
+        preds = self._predict(X)
+        for i in range(0, X.shape[0]):
+            dists[i, np.where(self.classes_ == preds[i])] = 1
+
+        return dists
@@ -43,7 +43,6 @@
 
 import pandas as pd
 
-
 ESTIMATOR_TAG_REGISTER = [
     (
         "ignores-exogeneous-X",
@@ -89,13 +88,13 @@
     ),
     (
         "X-y-must-have-same-index",
-        ["forecaster", "classifier", "regressor"],
+        ["forecaster", "regressor"],
         "bool",
         "do X/y in fit/update and X/fh in predict have to be same indices?",
     ),
     (
         "enforce_index_type",
-        ["forecaster", "classifier", "regressor"],
+        ["forecaster", "regressor"],
         "type",
         "passed to input checks, input conversion index type to enforce",
     ),
@@ -139,13 +138,13 @@
         "capability:multivariate",
         "classifier",
         "bool",
-        "can classifier classify time series with 2 or more variables?",
+        "can the classifier classify time series with 2 or more variables?",
     ),
     (
         "capability:unequal_length",
         "classifier",
         "bool",
-        "can classifier handle unequal length time series?",
+        "can the classifier handle unequal length time series?",
     ),
     # "capability:missing_values" is same as "handles-missing-data" tag.
     # They are kept distinct intentionally for easier TSC refactoring.
@@ -168,6 +167,12 @@
         "bool",
         "contract time setting, i.e. does the estimator support limiting max fit time?",
     ),
+    (
+        "capability:multithreading",
+        "classifier",
+        "bool",
+        "can the classifier set n_jobs to use multiple threads?",
+    ),
     (
         "coerce-X-to-pandas",
         ["classifier", "transformer"],