Skip to content

Commit

Permalink
Merge branch 'corr_plot' of https://github.com/RNKuhns/sktime into co…
Browse files Browse the repository at this point in the history
…rr_plot
  • Loading branch information
RNKuhns committed May 14, 2021
2 parents 05bb227 + f9c0815 commit 85bd24b
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 40 deletions.
10 changes: 10 additions & 0 deletions .all-contributorsrc
Expand Up @@ -927,6 +927,16 @@
"infra"
]
},
{
"login": "kachayev",
"name": "Oleksii Kachaiev"
"avatar_url": "https://avatars.githubusercontent.com/u/485647?v=4",
"profile": "https://github.com/kachayev",
"contributions": [
"code",
"test"
]
},
{
"login": "Ifeanyi30",
"name": "Ifeanyi30",
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -36,7 +36,7 @@
.. |twitter| image:: https://img.shields.io/twitter/follow/sktime_toolbox?label=%20Twitter&style=social
.. _twitter: https://twitter.com/sktime_toolbox

.. |python| image:: https://img.shields.io/badge/python-3.6+-blue?logo=python
.. |python| image:: https://img.shields.io/pypi/pyversions/sktime
.. _python: https://www.python.org/

.. |codestyle| image:: https://img.shields.io/badge/code%20style-black-000000.svg
Expand Down
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Expand Up @@ -6,7 +6,7 @@

variables:
REQUIREMENTS: build_tools/requirements.txt
EXCLUDE_PYTHON_VERSIONS: "2.7, 3.5, 3.9" # comma-separate string
EXCLUDE_PYTHON_VERSIONS: "2.7, 3.5, 3.9, 3.10" # comma-separate string

trigger:
branches:
Expand Down
113 changes: 81 additions & 32 deletions sktime/classification/shapelet_based/_rocket_classifier.py
Expand Up @@ -2,20 +2,26 @@
""" RandOm Convolutional KErnel Transform (ROCKET)
"""

__author__ = "Matthew Middlehurst"
__author__ = ["Matthew Middlehurst", "Oleksii Kachaiev"]
__all__ = ["ROCKETClassifier"]

import numpy as np
from joblib import delayed, Parallel
from sklearn.base import clone
from sklearn.ensemble._base import _set_random_states
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.utils import check_random_state
from sklearn.utils.multiclass import class_distribution

from sktime.classification.base import BaseClassifier
from sktime.transformations.panel.rocket import Rocket
from sktime.utils.validation import check_n_jobs
from sktime.utils.validation.panel import check_X
from sktime.utils.validation.panel import check_X_y

import warnings


class ROCKETClassifier(BaseClassifier):
"""
Expand All @@ -28,14 +34,16 @@ class ROCKETClassifier(BaseClassifier):
----------
num_kernels : int, number of kernels for ROCKET transform
(default=10,000)
ensemble : boolean, create ensemble of ROCKET's (default=False)
ensemble_size : int, size of the ensemble (default=25)
n_estimators : int, ensemble size, optional (default=None). When set
to None (default) or 1, the classifier uses a single estimator rather than ensemble
random_state : int or None, seed for random, integer,
optional (default to no seed)
n_jobs : int, the number of jobs to run in parallel for `fit`,
optional (default=1)
Attributes
----------
classifiers : array of IndividualTDE classifiers
estimators_ : array of individual classifiers
weights : weight of each classifier in the ensemble
weight_sum : sum of all weights
n_classes : extracted from the data
Expand Down Expand Up @@ -67,16 +75,30 @@ class ROCKETClassifier(BaseClassifier):
def __init__(
self,
num_kernels=10000,
ensemble=False,
ensemble=None,
ensemble_size=25,
random_state=None,
n_estimators=None,
n_jobs=1,
):
self.num_kernels = num_kernels
self.random_state = random_state
self.n_jobs = n_jobs
self.n_estimators = n_estimators
# for compatibility only
self.ensemble = ensemble
self.ensemble_size = ensemble_size
self.random_state = random_state

self.classifiers = []
# for compatibility only
if ensemble is not None and n_estimators is None:
self.n_estimators = ensemble_size
warnings.warn(
"ensemble and ensemble_size params are deprecated and will be "
"removed in future releases, use n_estimators instead",
PendingDeprecationWarning,
)

self.estimators_ = []
self.weights = []
self.weight_sum = 0

Expand All @@ -102,37 +124,34 @@ def fit(self, X, y):
self : object
"""
X, y = check_X_y(X, y)
n_jobs = check_n_jobs(self.n_jobs)

self.n_classes = np.unique(y).shape[0]
self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
for index, classVal in enumerate(self.classes_):
self.class_dictionary[classVal] = index

if self.ensemble:
for i in range(self.ensemble_size):
rocket_pipeline = make_pipeline(
Rocket(
num_kernels=self.num_kernels, random_state=self.random_state
),
RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
for index, class_val in enumerate(self.classes_):
self.class_dictionary[class_val] = index

if self.n_estimators is not None and self.n_estimators > 1:
base_estimator = _make_estimator(self.num_kernels, self.random_state)
self.estimators_ = Parallel(n_jobs=n_jobs)(
delayed(_fit_estimator)(
_clone_estimator(base_estimator, self.random_state), X, y
)
rocket_pipeline.fit(X, y)
self.classifiers.append(rocket_pipeline)
self.weights.append(rocket_pipeline.steps[1][1].best_score_)
self.weight_sum = self.weight_sum + self.weights[i]
else:
rocket_pipeline = make_pipeline(
Rocket(num_kernels=self.num_kernels, random_state=self.random_state),
RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
for _ in range(self.n_estimators)
)
rocket_pipeline.fit(X, y)
self.classifiers.append(rocket_pipeline)
for rocket_pipeline in self.estimators_:
weight = rocket_pipeline.steps[1][1].best_score_
self.weights.append(weight)
self.weight_sum += weight
else:
base_estimator = _make_estimator(self.num_kernels, self.random_state)
self.estimators_ = [_fit_estimator(base_estimator, X, y)]

self._is_fitted = True
return self

def predict(self, X):
if self.ensemble:
if self.n_estimators is not None:
rng = check_random_state(self.random_state)
return np.array(
[
Expand All @@ -142,25 +161,55 @@ def predict(self, X):
)
else:
self.check_is_fitted()
return self.classifiers[0].predict(X)
return self.estimators_[0].predict(X)

def predict_proba(self, X):
self.check_is_fitted()
X = check_X(X)

if self.ensemble:
if self.n_estimators is not None:
sums = np.zeros((X.shape[0], self.n_classes))

for n, clf in enumerate(self.classifiers):
for n, clf in enumerate(self.estimators_):
preds = clf.predict(X)
for i in range(0, X.shape[0]):
sums[i, self.class_dictionary[preds[i]]] += self.weights[n]

dists = sums / (np.ones(self.n_classes) * self.weight_sum)
else:
dists = np.zeros((X.shape[0], self.n_classes))
preds = self.classifiers[0].predict(X)
preds = self.estimators_[0].predict(X)
for i in range(0, X.shape[0]):
dists[i, np.where(self.classes_ == preds[i])] = 1

return dists

# for compatibility
@property
def classifiers(self):
warnings.warn(
"classifiers attribute is deprecated and will be removed "
"in future releases, use estimators_ instead",
PendingDeprecationWarning,
)
return self.estimators_


def _fit_estimator(estimator, X, y):
return estimator.fit(X, y)


def _make_estimator(num_kernels, random_state):
return make_pipeline(
Rocket(num_kernels=num_kernels, random_state=random_state),
RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
)


def _clone_estimator(base_estimator, random_state=None):
estimator = clone(base_estimator)

if random_state is not None:
_set_random_states(estimator, random_state)

return estimator
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import numpy as np
from numpy import testing
import pytest

from sktime.classification.shapelet_based import ROCKETClassifier
from sktime.datasets import load_gunpoint, load_italy_power_demand, load_basic_motions
Expand All @@ -21,7 +22,11 @@ def test_rocket_on_gunpoint():
testing.assert_array_equal(probas, rocket_gunpoint_probas)


def test_rocket_ensemble_on_gunpoint():
@pytest.mark.parametrize("n_jobs", [1, 8])
@pytest.mark.parametrize("ensemble_config", [(10, True, None), (None, None, 10)])
def test_rocket_ensemble_on_gunpoint(n_jobs, ensemble_config):
ensemble_size, ensemble, n_estimators = ensemble_config

# load gunpoint data
X_train, y_train = load_gunpoint(split="train", return_X_y=True)
X_test, y_test = load_gunpoint(split="test", return_X_y=True)
Expand All @@ -30,9 +35,11 @@ def test_rocket_ensemble_on_gunpoint():
# train ROCKET ensemble
rocket_e = ROCKETClassifier(
num_kernels=1000,
ensemble_size=10,
ensemble=True,
ensemble_size=ensemble_size,
ensemble=ensemble,
n_estimators=n_estimators,
random_state=0,
n_jobs=n_jobs,
)
rocket_e.fit(X_train.iloc[indices], y_train[indices])

Expand Down
6 changes: 4 additions & 2 deletions sktime/forecasting/compose/tests/test_reduce.py
Expand Up @@ -33,7 +33,7 @@
from sktime.forecasting.compose import DirRecTimeSeriesRegressionForecaster
from sktime.forecasting.compose import RecursiveTimeSeriesRegressionForecaster
from sktime.forecasting.compose import DirectTimeSeriesRegressionForecaster
from sktime.performance_metrics.forecasting import smape_loss
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sktime.utils._testing.forecasting import make_forecasting_problem
from sktime.utils.validation.forecasting import check_fh

Expand Down Expand Up @@ -518,4 +518,6 @@ def test_dirrec_against_recursive_accumulated_error():
preds_recursive = recursive.fit(y_train, fh=fh).predict(fh)
preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh)

assert smape_loss(y_test, preds_dirrec) < smape_loss(y_test, preds_recursive)
assert mean_absolute_percentage_error(
y_test, preds_dirrec
) < mean_absolute_percentage_error(y_test, preds_recursive)
12 changes: 12 additions & 0 deletions sktime/forecasting/tests/test_theta.py
Expand Up @@ -43,3 +43,15 @@ def test_pred_errors_against_y_test(fh):
for ints in intervals:
assert np.all(y_test > ints["lower"])
assert np.all(y_test < ints["upper"])


def test_forecaster_with_initial_level():
y = np.log1p(load_airline())
y_train, y_test = temporal_train_test_split(y)
fh = np.arange(len(y_test)) + 1

f = ThetaForecaster(initial_level=0.1, sp=12)
f.fit(y_train)
y_pred = f.predict(fh=fh)

np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
2 changes: 1 addition & 1 deletion sktime/forecasting/theta.py
Expand Up @@ -104,7 +104,6 @@ def __init__(self, initial_level=None, deseasonalize=True, sp=1):

self.sp = sp
self.deseasonalize = deseasonalize

self.deseasonalizer_ = None
self.trend_ = None
self.initial_level_ = None
Expand Down Expand Up @@ -136,6 +135,7 @@ def fit(self, y, X=None, fh=None):
self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative")
y = self.deseasonalizer_.fit_transform(y)

self.initialization_method = "known" if self.initial_level else "estimated"
# fit exponential smoothing forecaster
# find theta lines: Theta lines are just SES + drift
super(ThetaForecaster, self).fit(y, fh=fh)
Expand Down

0 comments on commit 85bd24b

Please sign in to comment.