Skip to content

Commit

Permalink
[ENH] DummyRegressor for time series regression (#3968)
Browse files Browse the repository at this point in the history
Fixes #3899.

Adds a `DummyRegressor` for time series regression that wraps over a `sklearn` `DummyRegressor` and inherits from `sktime` `BaseRegressor`.
  • Loading branch information
Badr-Eddine Marani committed Dec 22, 2022
1 parent ca971a2 commit 397f4ee
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 1 deletion.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Expand Up @@ -40,6 +40,15 @@
"video"
]
},
{
"login": "badrmarani",
"name": "Badr-Eddine Marani",
"avatar_url": "https://avatars.githubusercontent.com/badrmarani",
"profile": "https://github.com/badrmarani",
"contributions": [
"code"
]
},
{
"login": "sajaysurya",
"name": "Sajaysurya Ganesh",
Expand Down
3 changes: 3 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Expand Up @@ -3,6 +3,9 @@

* @fkiraly @aiwalter @guzalbulatova


sktime/regression/dummy/ @badrmarani

sktime/classification/dictionary_based/_boss.py @patrickzib @MatthewMiddlehurst @TonyBagnall
sktime/classification/dictionary_based/_cboss.py @patrickzib @MatthewMiddlehurst @TonyBagnall
sktime/classification/dictionary_based/_muse.py @patrickzib @MatthewMiddlehurst @TonyBagnall
Expand Down
23 changes: 22 additions & 1 deletion docs/source/api_reference/regression.rst
Expand Up @@ -8,7 +8,6 @@ The :mod:`sktime.regression` module contains algorithms and composition tools fo
All current sktime Regressors can be listed using the ``sktime.registry import
all_estimators`` function.


Composition
-----------

Expand All @@ -32,6 +31,28 @@ Deep learning
CNNRegressor
TapNetRegressor

Distance-based
--------------

.. currentmodule:: sktime.regression.distance_based

.. autosummary::
:toctree: auto_generated/
:template: class.rst

KNeighborsTimeSeriesRegressor

Dummy
-----

.. currentmodule:: sktime.regression.dummy

.. autosummary::
:toctree: auto_generated/
:template: class.rst

DummyRegressor

Interval-based
--------------

Expand Down
7 changes: 7 additions & 0 deletions sktime/regression/dummy/__init__.py
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""Dummy regressor."""
__all__ = [
"DummyRegressor",
]

from sktime.regression.dummy._dummy import DummyRegressor
93 changes: 93 additions & 0 deletions sktime/regression/dummy/_dummy.py
@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
"""Dummy time series regressor."""

__author__ = ["Badr-Eddine Marani"]
__all__ = ["DummyRegressor"]

import numpy as np
from sklearn.dummy import DummyRegressor as SklearnDummyRegressor

from sktime.regression.base import BaseRegressor


class DummyRegressor(BaseRegressor):
"""DummyRegressor makes predictions that ignore the input features.
This regressor serves as a simple baseline to compare against other more
complex regressors.
The specific behavior of the baseline is selected with the `strategy`
parameter.
All strategies make predictions that ignore the input feature values passed
as the `X` argument to `fit` and `predict`. The predictions, however,
typically depend on values observed in the `y` parameter passed to `fit`.
Function-identical to `sklearn.dummy.DummyRegressor`, which is called
inside.
Parameters
----------
strategy : {"mean", "median", "quantile", "constant"}, default="mean"
Strategy to use to generate predictions.
* "mean": always predicts the mean of the training set
* "median": always predicts the median of the training set
* "quantile": always predicts a specified quantile of the training set,
provided with the quantile parameter.
* "constant": always predicts a constant value that is provided by
the user.
constant : int or float or array-like of shape (n_outputs,), default=None
The explicit constant as predicted by the "constant" strategy. This
parameter is useful only for the "constant" strategy.
quantile : float in [0.0, 1.0], default=None
The quantile to predict using the "quantile" strategy. A quantile of
0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
maximum.
"""

_tags = {
"X_inner_mtype": "nested_univ",
"capability:missing_values": True,
"capability:unequal_length": True,
"capability:multivariate": True,
}

def __init__(self, strategy="mean", constant=None, quantile=None):
self.strategy = strategy
self.constant = constant
self.quantile = quantile
self.sklearn_dummy_regressor = SklearnDummyRegressor(
strategy=strategy, constant=constant, quantile=quantile
)
super(DummyRegressor, self).__init__()

def _fit(self, X, y) -> np.ndarray:
"""Fit the dummy regressor.
Parameters
----------
X : sktime-format pandas dataframe with shape(n,d),
or numpy ndarray with shape(n,d,m)
y : array-like, shape = [n_instances] - the target values
Returns
-------
self : reference to self.
"""
self.sklearn_dummy_regressor.fit(np.zeros(X.shape), y)
return self

def _predict(self, X) -> np.ndarray:
"""Perform regression on test vectors X.
Parameters
----------
X : sktime-format pandas dataframe or array-like, shape (n, d)
Returns
-------
y : predictions of target values for X, np.ndarray
"""
return self.sklearn_dummy_regressor.predict(np.zeros(X.shape))
2 changes: 2 additions & 0 deletions sktime/regression/dummy/tests/__init__.py
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
"""Dummy regressor test code."""
15 changes: 15 additions & 0 deletions sktime/regression/dummy/tests/test_dummy.py
@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
"""Test function of DummyRegressor."""

from sktime.datasets import load_unit_test
from sktime.regression.dummy import DummyRegressor


def test_dummy_regressor():
"""Test function for DummyRegressor."""
X_train, y_train = load_unit_test(split="train", return_type="numpy3D")
X_test, _ = load_unit_test(split="test", return_type="numpy3D")
dummy = DummyRegressor()
dummy.fit(X_train, y_train)
pred = dummy.predict(X_test)
assert (pred == 1.5).all()

0 comments on commit 397f4ee

Please sign in to comment.