Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] fix behaviour of FourierFeatures with pd.DatetimeIndex #3606

Merged
merged 9 commits into from Nov 10, 2022
46 changes: 42 additions & 4 deletions sktime/transformations/series/fourier.py
Expand Up @@ -6,6 +6,7 @@

import warnings
from copy import deepcopy
from distutils.log import warn
from typing import List, Union

import numpy as np
Expand Down Expand Up @@ -45,6 +46,11 @@ class FourierFeatures(BaseTransformer):
For example, if sp_list = [7, 365] and fourier_terms_list = [3, 9], the seasonal
frequency of 7 will have 3 fourier terms and the seasonal frequency of 365
will have 9 fourier terms.
freq : str, optional, default = None
Only used when X has a pd.DatetimeIndex without a specified frequency.
Specifies the frequency of the index of your data. The string should
match a pandas offset alias:
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

References
----------
Expand Down Expand Up @@ -78,7 +84,10 @@ class FourierFeatures(BaseTransformer):
# this can be a Panel mtype even if transform-input is Series, vectorized
"y_inner_mtype": "None", # which mtypes do _fit/_predict support for y?
"requires_y": False, # does y need to be passed in fit?
"enforce_index_type": [pd.PeriodIndex], # index type that needs to be enforced
"enforce_index_type": [
pd.PeriodIndex,
pd.DatetimeIndex,
], # index type that needs to be enforced
# in X/y
"fit_is_empty": False, # is fit empty and can be skipped? Yes = True
"X-y-must-have-same-index": False, # can estimator handle different X/y index?
Expand All @@ -97,9 +106,12 @@ class FourierFeatures(BaseTransformer):
"python_version": None, # PEP 440 python version specifier to limit versions
}

def __init__(self, sp_list: List[Union[int, float]], fourier_terms_list: List[int]):
def __init__(
self, sp_list: List[Union[int, float]], fourier_terms_list: List[int], freq=None
):
self.sp_list = sp_list
self.fourier_terms_list = fourier_terms_list
self.freq = freq

if len(self.sp_list) != len(self.fourier_terms_list):
raise ValueError(
Expand Down Expand Up @@ -127,6 +139,11 @@ def _fit(self, X, y=None):
Data to fit transform to
y : Series or Panel of mtype y_inner_mtype, default=None
Additional data, e.g., labels for transformation
freq : str, optional, default = None
Only used when X has a pd.DatetimeIndex without a specified frequency.
Specifies the frequency of the index of your data. The string should
match a pandas offset alias:
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

Returns
-------
Expand All @@ -149,11 +166,26 @@ def _fit(self, X, y=None):
"exists from other seasonal period, fourier term pairs."
)

# store the integer form of the minimum date in the prediod index
# Copy X to avoid global side effects
temp_idx = X.index
eenticott-shell marked this conversation as resolved.
Show resolved Hide resolved
X = deepcopy(X)
ltsaprounis marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(X.index, pd.DatetimeIndex):
# Chooses first non None value
self.freq_ = X.index.freq or self.freq or pd.infer_freq(X.index)
if self.freq_ is None:
ValueError("X has no known frequency and none is supplied")
if self.freq_ == X.index.freq and self.freq_ != self.freq:
warn(
f"Using frequency from index: {X.index.freq}, which \
does not match the frequency given:{self.freq}."
)
X.index = X.index.to_period(self.freq_)
# this is used to make sure that time t is calculated with reference to
# the data passed on fit
# store the integer form of the minimum date in the prediod index
self.min_t_ = np.min(X.index.astype(int))

X.index = temp_idx
return self

def _transform(self, X, y=None):
Expand All @@ -174,6 +206,10 @@ def _transform(self, X, y=None):
transformed version of X
"""
X_transformed = deepcopy(X)

if isinstance(X.index, pd.DatetimeIndex):
X_transformed.index = X_transformed.index.to_period(self.freq_)

# get the integer form of the PeriodIndex
int_index = X_transformed.index.astype(int) - self.min_t_

Expand All @@ -184,6 +220,8 @@ def _transform(self, X, y=None):
X_transformed[f"sin_{sp}_{k}"] = np.sin(int_index * 2 * k * np.pi / sp)
X_transformed[f"cos_{sp}_{k}"] = np.cos(int_index * 2 * k * np.pi / sp)

# Ensure transformed X has same index
X_transformed.index = X.index
eenticott-shell marked this conversation as resolved.
Show resolved Hide resolved
return X_transformed

@classmethod
Expand Down
22 changes: 22 additions & 0 deletions sktime/transformations/series/tests/test_fourier.py
@@ -1,15 +1,20 @@
# -*- coding: utf-8 -*-
"""Tests for the FourierFeatures transformer."""

from copy import deepcopy

import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_index_equal
from pandas.util.testing import assert_frame_equal

from sktime.datasets import load_airline
from sktime.transformations.series.fourier import FourierFeatures

Y = load_airline()
Y_datetime = deepcopy(Y)
Y_datetime.index = Y_datetime.index.to_timestamp(freq="M")


def test_fourier_list_length_missmatch():
Expand Down Expand Up @@ -62,6 +67,23 @@ def test_fit_transform_outputs():
assert_frame_equal(y_transformed, expected)


def test_fit_transform_datetime_outputs():
"""Tests that we get expected outputs when the input has a pd.DatetimeIndex."""
y = Y_datetime.iloc[:3]
y_transformed = FourierFeatures(sp_list=[12], fourier_terms_list=[2]).fit_transform(
y
)
expected = (
y.to_frame()
.assign(sin_12_1=[np.sin(2 * np.pi * i / 12) for i in range(3)])
.assign(cos_12_1=[np.cos(2 * np.pi * i / 12) for i in range(3)])
.assign(sin_12_2=[np.sin(4 * np.pi * i / 12) for i in range(3)])
.assign(cos_12_2=[np.cos(4 * np.pi * i / 12) for i in range(3)])
)
assert_frame_equal(y_transformed, expected)
assert_index_equal(y_transformed.index, y.index)


def test_fit_transform_behaviour():
"""Tests that the transform method evaluates time steps passed based on X in fit."""
transformer = FourierFeatures(sp_list=[12], fourier_terms_list=[2])
Expand Down