In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn                import preprocessing 
from sklearn.metrics        import mean_squared_error,mean_absolute_percentage_error,mean_absolute_error
from sklearn.ensemble       import RandomForestRegressor
from sklearn.linear_model   import Lasso,Ridge

from sktime.forecasting.theta               import ThetaForecaster
from sktime.forecasting.fbprophet           import Prophet
from sktime.forecasting.arima               import ARIMA
from sktime.forecasting.base                import ForecastingHorizon
from sktime.forecasting.trend               import PolynomialTrendForecaster,TrendForecaster
from sktime.forecasting.exp_smoothing       import ExponentialSmoothing
from sktime.forecasting.statsforecast       import StatsForecastAutoARIMA
from sktime.forecasting.compose             import TransformedTargetForecaster,ForecastingPipeline
from sktime.transformations.series.detrend  import Deseasonalizer,Detrender
from sktime.forecasting.model_selection     import temporal_train_test_split
from sktime.forecasting.trend               import PiecewiseLinearTrendForecaster
from sktime.forecasting.compose             import make_reduction

from sktime.split import ExpandingGreedySplitter
from sktime.forecasting.compose import make_reduction
from sklearn.ensemble import HistGradientBoostingRegressor
from sktime.transformations.compose import Id, YtoX
from sktime.transformations.series.lag import Lag
from sktime.transformations.series.impute import Imputer
from sktime.datasets import load_airline
from sktime.split import temporal_train_test_split
from sktime.pipeline import Pipeline


from sktime.transformations.series.difference import Differencer

import warnings

In [14]:
data = pd.read_csv("../../data/monthly/ewz_stromabgabe_netzebenen_stadt_zuerich.csv",index_col=None)

data.index = pd.to_datetime(data['Timestamp'],utc=True)
data.index = data.index.tz_localize(None)  
data["y"] = data["NE7_GWh"].values
data = data.drop(columns=["Timestamp","NE5_GWh","NE7_GWh"])
data.index = pd.period_range(start=data.index[0], end= data.index[-1], freq="M")

#split data
#don't use stride (step_length=5).Keeps last splitt completely unseen. Test_size fixed by task
splitter = ExpandingGreedySplitter(test_size=12, folds=4)  # step_length=5, 
splits = list(splitter.split(data))

In [15]:
y_train = data.iloc[splits[0][0]]
y_test = data.iloc[splits[0][1]]

In [26]:
regressor = Ridge()
forecaster = make_reduction(regressor, window_length=12, strategy="recursive")

pipe = Pipeline()
pipe = pipe.add_step(YtoX(), "ytox", edges={"X": "y"})
pipe = pipe.add_step(Differencer(lags=[1,3,4,12]), "differencer", edges={"X": "ytox"})
pipe = pipe.add_step(Lag([1], index_out="original"),name="lag",edges={"X": "ytox"})
pipe = pipe.add_step(forecaster,
                  name="forecaster",
                  edges={
                      "X": ["lag","differencer"],
                      "y": "y"
                  })

pipe.fit(y=y_train) #<--- can't pass X=y_train since at predict time i would pass the target X=X_test=y_test
y_pred = pipe.predict(fh=[1, 2, 3]) 



TypeError: Pipeline.fit() missing 1 required positional argument: 'X'

In [None]:
from sktime.forecasting.compose import make_reduction
from sklearn.ensemble import HistGradientBoostingRegressor
from sktime.transformations.compose import Id, YtoX
from sktime.transformations.series.lag import Lag
from sktime.transformations.series.impute import Imputer
from sktime.datasets import load_airline
from sktime.split import temporal_train_test_split
from sktime.pipeline import Pipeline


y = load_airline()
y_train, y_test = temporal_train_test_split(y)
X_train = y_train
X_test = y_test
pipeline = Pipeline()
pipeline = pipeline.add_step(Id(),
                  name="identity",
                  edges={
                      "X": "y"
                  })

pipeline = pipeline.add_step(Lag([1], index_out="original"),
                             name="lag",
                             edges={
                                 "X": "y"
                             })

pipeline = pipeline.add_step(Imputer("nearest"),
                  name="imputer",
                  edges={
                      "X": "lag"
                  })

regressor = HistGradientBoostingRegressor()
forecaster = make_reduction(regressor, window_length=12, strategy="recursive")

pipeline = pipeline.add_step(forecaster,
                  name="forecaster",
                  edges={
                      "X": ["lag", "identity"],
                      "y": "y"
                  })
pipeline.fit(y=y, X=X_train)
y_pred = pipeline.predict(fh=[1, 2, 3], X=X_test, y=y_test)
