Merge branch 'corr_plot' of https://github.com/RNKuhns/sktime into co…

…rr_plot
sktime · May 14, 2021 · 85bd24b · 85bd24b
2 parents 05bb227 + f9c0815
commit 85bd24b
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 40 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -927,6 +927,16 @@
         "infra"
       ]
     },
+    {
+      "login": "kachayev",
+      "name": "Oleksii Kachaiev"
+      "avatar_url": "https://avatars.githubusercontent.com/u/485647?v=4",
+      "profile": "https://github.com/kachayev",
+      "contributions": [
+        "code",
+        "test"
+      ]
+    },
     {
       "login": "Ifeanyi30",
       "name": "Ifeanyi30",

diff --git a/README.rst b/README.rst
@@ -36,7 +36,7 @@
 .. |twitter| image:: https://img.shields.io/twitter/follow/sktime_toolbox?label=%20Twitter&style=social
 .. _twitter: https://twitter.com/sktime_toolbox
 
-.. |python| image:: https://img.shields.io/badge/python-3.6+-blue?logo=python
+.. |python| image:: https://img.shields.io/pypi/pyversions/sktime
 .. _python: https://www.python.org/
 
 .. |codestyle| image:: https://img.shields.io/badge/code%20style-black-000000.svg

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -6,7 +6,7 @@
 
 variables:
   REQUIREMENTS: build_tools/requirements.txt
-  EXCLUDE_PYTHON_VERSIONS: "2.7, 3.5, 3.9"  # comma-separate string
+  EXCLUDE_PYTHON_VERSIONS: "2.7, 3.5, 3.9, 3.10"  # comma-separate string
 
 trigger:
   branches:

diff --git a/sktime/classification/shapelet_based/_rocket_classifier.py b/sktime/classification/shapelet_based/_rocket_classifier.py
@@ -2,20 +2,26 @@
 """ RandOm Convolutional KErnel Transform (ROCKET)
 """
 
-__author__ = "Matthew Middlehurst"
+__author__ = ["Matthew Middlehurst", "Oleksii Kachaiev"]
 __all__ = ["ROCKETClassifier"]
 
 import numpy as np
+from joblib import delayed, Parallel
+from sklearn.base import clone
+from sklearn.ensemble._base import _set_random_states
 from sklearn.linear_model import RidgeClassifierCV
 from sklearn.pipeline import make_pipeline
 from sklearn.utils import check_random_state
 from sklearn.utils.multiclass import class_distribution
 
 from sktime.classification.base import BaseClassifier
 from sktime.transformations.panel.rocket import Rocket
+from sktime.utils.validation import check_n_jobs
 from sktime.utils.validation.panel import check_X
 from sktime.utils.validation.panel import check_X_y
 
+import warnings
+
 
 class ROCKETClassifier(BaseClassifier):
     """
@@ -28,14 +34,16 @@ class ROCKETClassifier(BaseClassifier):
     ----------
     num_kernels             : int, number of kernels for ROCKET transform
     (default=10,000)
-    ensemble                : boolean, create ensemble of ROCKET's (default=False)
-    ensemble_size           : int, size of the ensemble (default=25)
+    n_estimators            : int, ensemble size, optional (default=None). When set
+    to None (default) or 1, the classifier uses a single estimator rather than ensemble
     random_state            : int or None, seed for random, integer,
     optional (default to no seed)
+    n_jobs                  : int, the number of jobs to run in parallel for `fit`,
+    optional (default=1)
 
     Attributes
     ----------
-    classifiers             : array of IndividualTDE classifiers
+    estimators_             : array of individual classifiers
     weights                 : weight of each classifier in the ensemble
     weight_sum              : sum of all weights
     n_classes               : extracted from the data
@@ -67,16 +75,30 @@ class ROCKETClassifier(BaseClassifier):
     def __init__(
         self,
         num_kernels=10000,
-        ensemble=False,
+        ensemble=None,
         ensemble_size=25,
         random_state=None,
+        n_estimators=None,
+        n_jobs=1,
     ):
         self.num_kernels = num_kernels
+        self.random_state = random_state
+        self.n_jobs = n_jobs
+        self.n_estimators = n_estimators
+        # for compatibility only
         self.ensemble = ensemble
         self.ensemble_size = ensemble_size
-        self.random_state = random_state
 
-        self.classifiers = []
+        # for compatibility only
+        if ensemble is not None and n_estimators is None:
+            self.n_estimators = ensemble_size
+            warnings.warn(
+                "ensemble and ensemble_size params are deprecated and will be "
+                "removed in future releases, use n_estimators instead",
+                PendingDeprecationWarning,
+            )
+
+        self.estimators_ = []
         self.weights = []
         self.weight_sum = 0
 
@@ -102,37 +124,34 @@ def fit(self, X, y):
         self : object
         """
         X, y = check_X_y(X, y)
+        n_jobs = check_n_jobs(self.n_jobs)
 
         self.n_classes = np.unique(y).shape[0]
         self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
-        for index, classVal in enumerate(self.classes_):
-            self.class_dictionary[classVal] = index
-
-        if self.ensemble:
-            for i in range(self.ensemble_size):
-                rocket_pipeline = make_pipeline(
-                    Rocket(
-                        num_kernels=self.num_kernels, random_state=self.random_state
-                    ),
-                    RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
+        for index, class_val in enumerate(self.classes_):
+            self.class_dictionary[class_val] = index
+
+        if self.n_estimators is not None and self.n_estimators > 1:
+            base_estimator = _make_estimator(self.num_kernels, self.random_state)
+            self.estimators_ = Parallel(n_jobs=n_jobs)(
+                delayed(_fit_estimator)(
+                    _clone_estimator(base_estimator, self.random_state), X, y
                 )
-                rocket_pipeline.fit(X, y)
-                self.classifiers.append(rocket_pipeline)
-                self.weights.append(rocket_pipeline.steps[1][1].best_score_)
-                self.weight_sum = self.weight_sum + self.weights[i]
-        else:
-            rocket_pipeline = make_pipeline(
-                Rocket(num_kernels=self.num_kernels, random_state=self.random_state),
-                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
+                for _ in range(self.n_estimators)
             )
-            rocket_pipeline.fit(X, y)
-            self.classifiers.append(rocket_pipeline)
+            for rocket_pipeline in self.estimators_:
+                weight = rocket_pipeline.steps[1][1].best_score_
+                self.weights.append(weight)
+                self.weight_sum += weight
+        else:
+            base_estimator = _make_estimator(self.num_kernels, self.random_state)
+            self.estimators_ = [_fit_estimator(base_estimator, X, y)]
 
         self._is_fitted = True
         return self
 
     def predict(self, X):
-        if self.ensemble:
+        if self.n_estimators is not None:
             rng = check_random_state(self.random_state)
             return np.array(
                 [
@@ -142,25 +161,55 @@ def predict(self, X):
             )
         else:
             self.check_is_fitted()
-            return self.classifiers[0].predict(X)
+            return self.estimators_[0].predict(X)
 
     def predict_proba(self, X):
         self.check_is_fitted()
         X = check_X(X)
 
-        if self.ensemble:
+        if self.n_estimators is not None:
             sums = np.zeros((X.shape[0], self.n_classes))
 
-            for n, clf in enumerate(self.classifiers):
+            for n, clf in enumerate(self.estimators_):
                 preds = clf.predict(X)
                 for i in range(0, X.shape[0]):
                     sums[i, self.class_dictionary[preds[i]]] += self.weights[n]
 
             dists = sums / (np.ones(self.n_classes) * self.weight_sum)
         else:
             dists = np.zeros((X.shape[0], self.n_classes))
-            preds = self.classifiers[0].predict(X)
+            preds = self.estimators_[0].predict(X)
             for i in range(0, X.shape[0]):
                 dists[i, np.where(self.classes_ == preds[i])] = 1
 
         return dists
+
+    # for compatibility
+    @property
+    def classifiers(self):
+        warnings.warn(
+            "classifiers attribute is deprecated and will be removed "
+            "in future releases, use estimators_ instead",
+            PendingDeprecationWarning,
+        )
+        return self.estimators_
+
+
+def _fit_estimator(estimator, X, y):
+    return estimator.fit(X, y)
+
+
+def _make_estimator(num_kernels, random_state):
+    return make_pipeline(
+        Rocket(num_kernels=num_kernels, random_state=random_state),
+        RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
+    )
+
+
+def _clone_estimator(base_estimator, random_state=None):
+    estimator = clone(base_estimator)
+
+    if random_state is not None:
+        _set_random_states(estimator, random_state)
+
+    return estimator
diff --git a/sktime/classification/shapelet_based/tests/test_rocket_classifier.py b/sktime/classification/shapelet_based/tests/test_rocket_classifier.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 from numpy import testing
+import pytest
 
 from sktime.classification.shapelet_based import ROCKETClassifier
 from sktime.datasets import load_gunpoint, load_italy_power_demand, load_basic_motions
@@ -21,7 +22,11 @@ def test_rocket_on_gunpoint():
     testing.assert_array_equal(probas, rocket_gunpoint_probas)
 
 
-def test_rocket_ensemble_on_gunpoint():
+@pytest.mark.parametrize("n_jobs", [1, 8])
+@pytest.mark.parametrize("ensemble_config", [(10, True, None), (None, None, 10)])
+def test_rocket_ensemble_on_gunpoint(n_jobs, ensemble_config):
+    ensemble_size, ensemble, n_estimators = ensemble_config
+
     # load gunpoint data
     X_train, y_train = load_gunpoint(split="train", return_X_y=True)
     X_test, y_test = load_gunpoint(split="test", return_X_y=True)
@@ -30,9 +35,11 @@ def test_rocket_ensemble_on_gunpoint():
     # train ROCKET ensemble
     rocket_e = ROCKETClassifier(
         num_kernels=1000,
-        ensemble_size=10,
-        ensemble=True,
+        ensemble_size=ensemble_size,
+        ensemble=ensemble,
+        n_estimators=n_estimators,
         random_state=0,
+        n_jobs=n_jobs,
     )
     rocket_e.fit(X_train.iloc[indices], y_train[indices])
 

diff --git a/sktime/forecasting/compose/tests/test_reduce.py b/sktime/forecasting/compose/tests/test_reduce.py
@@ -33,7 +33,7 @@
 from sktime.forecasting.compose import DirRecTimeSeriesRegressionForecaster
 from sktime.forecasting.compose import RecursiveTimeSeriesRegressionForecaster
 from sktime.forecasting.compose import DirectTimeSeriesRegressionForecaster
-from sktime.performance_metrics.forecasting import smape_loss
+from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
 from sktime.utils._testing.forecasting import make_forecasting_problem
 from sktime.utils.validation.forecasting import check_fh
 
@@ -518,4 +518,6 @@ def test_dirrec_against_recursive_accumulated_error():
     preds_recursive = recursive.fit(y_train, fh=fh).predict(fh)
     preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh)
 
-    assert smape_loss(y_test, preds_dirrec) < smape_loss(y_test, preds_recursive)
+    assert mean_absolute_percentage_error(
+        y_test, preds_dirrec
+    ) < mean_absolute_percentage_error(y_test, preds_recursive)
diff --git a/sktime/forecasting/tests/test_theta.py b/sktime/forecasting/tests/test_theta.py
@@ -43,3 +43,15 @@ def test_pred_errors_against_y_test(fh):
     for ints in intervals:
         assert np.all(y_test > ints["lower"])
         assert np.all(y_test < ints["upper"])
+
+
+def test_forecaster_with_initial_level():
+    y = np.log1p(load_airline())
+    y_train, y_test = temporal_train_test_split(y)
+    fh = np.arange(len(y_test)) + 1
+
+    f = ThetaForecaster(initial_level=0.1, sp=12)
+    f.fit(y_train)
+    y_pred = f.predict(fh=fh)
+
+    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
diff --git a/sktime/forecasting/theta.py b/sktime/forecasting/theta.py
@@ -104,7 +104,6 @@ def __init__(self, initial_level=None, deseasonalize=True, sp=1):
 
         self.sp = sp
         self.deseasonalize = deseasonalize
-
         self.deseasonalizer_ = None
         self.trend_ = None
         self.initial_level_ = None
@@ -136,6 +135,7 @@ def fit(self, y, X=None, fh=None):
             self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative")
             y = self.deseasonalizer_.fit_transform(y)
 
+        self.initialization_method = "known" if self.initial_level else "estimated"
         # fit exponential smoothing forecaster
         # find theta lines: Theta lines are just SES + drift
         super(ThetaForecaster, self).fit(y, fh=fh)