diff --git a/sktime/transformations/series/fourier.py b/sktime/transformations/series/fourier.py index 8bfa17d625a..ce235c2297e 100644 --- a/sktime/transformations/series/fourier.py +++ b/sktime/transformations/series/fourier.py @@ -6,6 +6,7 @@ import warnings from copy import deepcopy +from distutils.log import warn from typing import List, Union import numpy as np @@ -45,6 +46,11 @@ class FourierFeatures(BaseTransformer): For example, if sp_list = [7, 365] and fourier_terms_list = [3, 9], the seasonal frequency of 7 will have 3 fourier terms and the seasonal frequency of 365 will have 9 fourier terms. + freq : str, optional, default = None + Only used when X has a pd.DatetimeIndex without a specified frequency. + Specifies the frequency of the index of your data. The string should + match a pandas offset alias: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases References ---------- @@ -78,7 +84,10 @@ class FourierFeatures(BaseTransformer): # this can be a Panel mtype even if transform-input is Series, vectorized "y_inner_mtype": "None", # which mtypes do _fit/_predict support for y? "requires_y": False, # does y need to be passed in fit? - "enforce_index_type": [pd.PeriodIndex], # index type that needs to be enforced + "enforce_index_type": [ + pd.PeriodIndex, + pd.DatetimeIndex, + ], # index type that needs to be enforced # in X/y "fit_is_empty": False, # is fit empty and can be skipped? Yes = True "X-y-must-have-same-index": False, # can estimator handle different X/y index? @@ -97,9 +106,12 @@ class FourierFeatures(BaseTransformer): "python_version": None, # PEP 440 python version specifier to limit versions } - def __init__(self, sp_list: List[Union[int, float]], fourier_terms_list: List[int]): + def __init__( + self, sp_list: List[Union[int, float]], fourier_terms_list: List[int], freq=None + ): self.sp_list = sp_list self.fourier_terms_list = fourier_terms_list + self.freq = freq if len(self.sp_list) != len(self.fourier_terms_list): raise ValueError( @@ -127,6 +139,11 @@ def _fit(self, X, y=None): Data to fit transform to y : Series or Panel of mtype y_inner_mtype, default=None Additional data, e.g., labels for transformation + freq : str, optional, default = None + Only used when X has a pd.DatetimeIndex without a specified frequency. + Specifies the frequency of the index of your data. The string should + match a pandas offset alias: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases Returns ------- @@ -149,9 +166,22 @@ def _fit(self, X, y=None): "exists from other seasonal period, fourier term pairs." ) - # store the integer form of the minimum date in the prediod index + X = deepcopy(X) + + if isinstance(X.index, pd.DatetimeIndex): + # Chooses first non None value + self.freq_ = X.index.freq or self.freq or pd.infer_freq(X.index) + if self.freq_ is None: + ValueError("X has no known frequency and none is supplied") + if self.freq_ == X.index.freq and self.freq_ != self.freq: + warn( + f"Using frequency from index: {X.index.freq}, which \ + does not match the frequency given:{self.freq}." + ) + X.index = X.index.to_period(self.freq_) # this is used to make sure that time t is calculated with reference to # the data passed on fit + # store the integer form of the minimum date in the prediod index self.min_t_ = np.min(X.index.astype(int)) return self @@ -174,6 +204,10 @@ def _transform(self, X, y=None): transformed version of X """ X_transformed = deepcopy(X) + + if isinstance(X.index, pd.DatetimeIndex): + X_transformed.index = X_transformed.index.to_period(self.freq_) + # get the integer form of the PeriodIndex int_index = X_transformed.index.astype(int) - self.min_t_ @@ -184,6 +218,8 @@ def _transform(self, X, y=None): X_transformed[f"sin_{sp}_{k}"] = np.sin(int_index * 2 * k * np.pi / sp) X_transformed[f"cos_{sp}_{k}"] = np.cos(int_index * 2 * k * np.pi / sp) + # Ensure transformed X has same index + X_transformed.index = deepcopy(X.index) return X_transformed @classmethod diff --git a/sktime/transformations/series/tests/test_fourier.py b/sktime/transformations/series/tests/test_fourier.py index cbca0520bd5..45241d91ed6 100644 --- a/sktime/transformations/series/tests/test_fourier.py +++ b/sktime/transformations/series/tests/test_fourier.py @@ -1,15 +1,20 @@ # -*- coding: utf-8 -*- """Tests for the FourierFeatures transformer.""" +from copy import deepcopy + import numpy as np import pandas as pd import pytest +from pandas.testing import assert_index_equal from pandas.util.testing import assert_frame_equal from sktime.datasets import load_airline from sktime.transformations.series.fourier import FourierFeatures Y = load_airline() +Y_datetime = deepcopy(Y) +Y_datetime.index = Y_datetime.index.to_timestamp(freq="M") def test_fourier_list_length_missmatch(): @@ -62,6 +67,23 @@ def test_fit_transform_outputs(): assert_frame_equal(y_transformed, expected) +def test_fit_transform_datetime_outputs(): + """Tests that we get expected outputs when the input has a pd.DatetimeIndex.""" + y = Y_datetime.iloc[:3] + y_transformed = FourierFeatures(sp_list=[12], fourier_terms_list=[2]).fit_transform( + y + ) + expected = ( + y.to_frame() + .assign(sin_12_1=[np.sin(2 * np.pi * i / 12) for i in range(3)]) + .assign(cos_12_1=[np.cos(2 * np.pi * i / 12) for i in range(3)]) + .assign(sin_12_2=[np.sin(4 * np.pi * i / 12) for i in range(3)]) + .assign(cos_12_2=[np.cos(4 * np.pi * i / 12) for i in range(3)]) + ) + assert_frame_equal(y_transformed, expected) + assert_index_equal(y_transformed.index, y.index) + + def test_fit_transform_behaviour(): """Tests that the transform method evaluates time steps passed based on X in fit.""" transformer = FourierFeatures(sp_list=[12], fourier_terms_list=[2])