In [2]:
from sktime.datasets import load_airline
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.pipeline import Pipeline
from sktime.transformations.series.difference import Differencer
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.transformations.series.date import DateTimeFeatures
from sktime.transformations.series.dropna import DropNA
from sktime.transformations.series.subset import IndexSubset
from sktime.transformations.compose import FeatureUnion
from sklearn.linear_model import Ridge
from unittest.mock import MagicMock, patch
import numpy as np
import pandas as pd

$$y_t = \sum_{i=1}^n w_{i} y_{t-i} + \sum_j^m w_j D_{j,t} + \sum_{k}^l w_{k} S_{k,t-1}$$

autoregressive terms starting from $t-1$  
$D$ datetime features, for the day to be predicted $t$  
$S$ window summarizer, last included value at $t-1$ with some window-length and lag,

In [3]:
n = 20
y = load_airline()
y_train, y_test = temporal_train_test_split(y)
y_train = pd.Series(np.arange(n))
y_train.index = y.index[:n]

ridge = Ridge()
mock = MagicMock()
forecaster = make_reduction(ridge, window_length=12, strategy="recursive")

kwargs = {
    "lag_feature": {
        "sum": [[0, 6]],  # [lag,window_length], if lag=0 includes most current
    }
}

p = Pipeline()
p = p.add_step(WindowSummarizer(**kwargs), "window", edges={"X": "y"})

p = p.add_step(
    forecaster,
    name="forecaster",
    edges={
        "X": ["window"],
        "y": ["y"],
    },
)

with patch.object(Ridge, "fit", mock):
    p.fit(y=y_train)

fit_args = mock.call_args[0]
print(fit_args)



(array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., nan,
        nan, nan, nan, nan, 15., 21., 27., 33., 39., 45., 51.],
       [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., nan,
        nan, nan, nan, 15., 21., 27., 33., 39., 45., 51., 57.],
       [ 2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., nan,
        nan, nan, 15., 21., 27., 33., 39., 45., 51., 57., 63.],
       [ 3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14., nan,
        nan, 15., 21., 27., 33., 39., 45., 51., 57., 63., 69.],
       [ 4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14., 15., nan,
        15., 21., 27., 33., 39., 45., 51., 57., 63., 69., 75.],
       [ 5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14., 15., 16., 15.,
        21., 27., 33., 39., 45., 51., 57., 63., 69., 75., 81.],
       [ 6.,  7.,  8.,  9., 10., 11., 12., 13., 14., 15., 16., 17., 21.,
        27., 33., 39., 45., 51., 57., 63., 69., 75., 81., 87.],
       [ 7.,  8.,  9., 10., 11., 12., 13

In [1]:
n = 20
y = load_airline()
y_train, y_test = temporal_train_test_split(y)
y_train = pd.Series(np.arange(n))
y_train.index = y.index[:n]

ridge = Ridge()
mock = MagicMock()
forecaster = make_reduction(ridge, window_length=12, strategy="recursive")

kwargs = {
    "lag_feature": {
        "sum": [[0, 6]],  # [lag,window_length], if lag=0 includes most current
    }
}

p = Pipeline()
p = p.add_step(WindowSummarizer(**kwargs), "window", edges={"X": "y"})
p = p.add_step(DropNA(), "dropnan", edges={"X": "window"})
p = p.add_step(
    IndexSubset("remove"), "subset", edges={"X": "y", "y": "dropna"}
)  # Subsets X from y.index
p = p.add_step(
    forecaster,
    name="forecaster",
    edges={
        "X": ["subset"],
        "y": ["subset"],
    },
)

with patch.object(Ridge, "fit", mock):
    p.fit(y=y_train)

fit_args = mock.call_args[0]
print(fit_args)

# p.fit(y=y_train)



Exception: Required Input does not exist

In [2]:
y_train

# p = p.add_step(DateTimeFeatures(ts_freq="M"),"datetime", edges={"X": "y"})
# p = p.add_step(FeatureUnion([WindowSummarizer(**kwargs),DateTimeFeatures(ts_freq="M")]), "union", edges={"X": "y"})
# p = p.add_step(DropNA(), "dropna", edges={"X": "union"})

Period
1949-01     0
1949-02     1
1949-03     2
1949-04     3
1949-05     4
1949-06     5
1949-07     6
1949-08     7
1949-09     8
1949-10     9
1949-11    10
1949-12    11
1950-01    12
1950-02    13
1950-03    14
1950-04    15
1950-05    16
1950-06    17
1950-07    18
1950-08    19
Freq: M, dtype: int32

In [None]:
# p = p.add_step(FeatureUnion([WindowSummarizer(**kwargs),DateTimeFeatures(ts_freq="M")]), "union", edges={"X": "y"})

In [None]:
fit_args[0]

array([0.000e+00, 1.000e+00, 2.000e+00, 3.000e+00, 4.000e+00, 5.000e+00,
       6.000e+00, 7.000e+00, 8.000e+00, 9.000e+00, 1.000e+01, 1.100e+01,
       1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03,
       1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03, 1.949e+03,
       1.000e+00, 2.000e+00, 3.000e+00, 4.000e+00, 5.000e+00, 6.000e+00,
       7.000e+00, 8.000e+00, 9.000e+00, 1.000e+01, 1.100e+01, 1.200e+01,
             nan,       nan,       nan,       nan,       nan, 1.500e+01,
       2.100e+01, 2.700e+01, 3.300e+01, 3.900e+01, 4.500e+01, 5.100e+01])

In [None]:
idx = pd.period_range(start=y_test.index[0], end=y_test.index[-1], freq="M")
horizon = ForecastingHorizon(idx, is_relative=False)
y_pred = p.predict(fh=horizon)

y_pred.plot()
y_test.plot()
y_train.plot()

NotFittedError: This Ridge instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [None]:
from sktime.datasets import load_airline
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.pipeline import Pipeline
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.transformations.series.subset import IndexSubset

from sklearn.linear_model import Ridge
from unittest.mock import MagicMock, patch
import numpy as np
import pandas as pd
from sktime.transformations.series.dropna import DropNA

y = load_airline()
y_train, y_test = temporal_train_test_split(y)

y_train = pd.Series(np.arange(20))
y_train.index = y.index[:20]

forecaster = make_reduction(Ridge(), window_length=5, strategy="recursive")

pipe = Pipeline()
kwargs = {"lag_feature": {"sum": [[0, 6]]}}
pipe = pipe.add_step(WindowSummarizer(**kwargs), "window", edges={"X": "y"})
pipe = pipe.add_step(DropNA(), "nan-dropped", edges={"X": "window"})
pipe = pipe.add_step(IndexSubset("remove"), "subset", edges={"X": "y", "y":"nan-dropped"})
pipe = pipe.add_step(forecaster,name="forecaster",edges={"X": ["subset"],"y": ["subset"]})


mock = MagicMock()
with patch.object(Ridge, "fit", mock):
    pipe.fit(y=y_train)

print(mock.call_args[0])



(array([[ 5.,  6.,  7.,  8.,  9.,  5.,  6.,  7.,  8.,  9.],
       [ 6.,  7.,  8.,  9., 10.,  6.,  7.,  8.,  9., 10.],
       [ 7.,  8.,  9., 10., 11.,  7.,  8.,  9., 10., 11.],
       [ 8.,  9., 10., 11., 12.,  8.,  9., 10., 11., 12.],
       [ 9., 10., 11., 12., 13.,  9., 10., 11., 12., 13.],
       [10., 11., 12., 13., 14., 10., 11., 12., 13., 14.],
       [11., 12., 13., 14., 15., 11., 12., 13., 14., 15.],
       [12., 13., 14., 15., 16., 12., 13., 14., 15., 16.],
       [13., 14., 15., 16., 17., 13., 14., 15., 16., 17.],
       [14., 15., 16., 17., 18., 14., 15., 16., 17., 18.]]), array([10., 11., 12., 13., 14., 15., 16., 17., 18., 19.]))
