In [5]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
c:\Users\Joaquín Amat\Documents\GitHub\skforecast


In [6]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import TargetEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.pipeline import make_pipeline
from feature_engine.datetime import DatetimeFeatures
from feature_engine.timeseries.forecasting import WindowFeatures
from feature_engine.timeseries.forecasting import LagFeatures

from skforecast.datasets import fetch_dataset
from skforecast.recursive import ForecasterRecursive

plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
color = '\033[1m\033[38;5;208m' 

In [7]:
# Downloading data
# ==============================================================================
data = fetch_dataset(name='bike_sharing', raw=False)
data = data.loc[:, ['users', 'holiday', 'temp', 'windspeed']]
data.head(3)

bike_sharing
------------
Hourly usage of the bike share system in the city of Washington D.C. during the
years 2011 and 2012. In addition to the number of users per hour, information
about weather conditions and holidays is available.
Fanaee-T,Hadi. (2013). Bike Sharing Dataset. UCI Machine Learning Repository.
https://doi.org/10.24432/C5W894.
Shape of the dataset: (17544, 11)


Unnamed: 0_level_0,users,holiday,temp,windspeed
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011-01-01 00:00:00,16.0,0.0,9.84,0.0
2011-01-01 01:00:00,40.0,0.0,9.02,0.0
2011-01-01 02:00:00,32.0,0.0,9.02,0.0


In [8]:
# create lagged features and rolling windows features from exogenous variables
# ==============================================================================

calendar_transformer = DatetimeFeatures(
                            variables           = 'index',
                            features_to_extract = ['day_of_week', 'hour'],
                            drop_original       = False,
                       )

wf_transformer = WindowFeatures(
                    variables      = ["temp", "windspeed"],
                    window         = ["24h"],
                    functions      = ["mean"],
                    freq           = "h",
                    missing_values = "ignore",
                    drop_na        = False,
                )

lag_transformer = LagFeatures(
                    variables = ["temp", "windspeed"],
                    periods   = [1, 2, 3, 4, 5],
                )


exog_transformer = make_pipeline(
                        calendar_transformer,
                        wf_transformer,
                        lag_transformer
                    )

exog_transformer


In [9]:
exog_transformer.fit_transform(data).head(5)

Unnamed: 0_level_0,users,holiday,temp,windspeed,day_of_week,hour,temp_window_24h_mean,windspeed_window_24h_mean,temp_lag_1,windspeed_lag_1,temp_lag_2,windspeed_lag_2,temp_lag_3,windspeed_lag_3,temp_lag_4,windspeed_lag_4,temp_lag_5,windspeed_lag_5
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2011-01-01 00:00:00,16.0,0.0,9.84,0.0,5,0,,,,,,,,,,,,
2011-01-01 01:00:00,40.0,0.0,9.02,0.0,5,1,9.84,0.0,9.84,0.0,,,,,,,,
2011-01-01 02:00:00,32.0,0.0,9.02,0.0,5,2,9.43,0.0,9.02,0.0,9.84,0.0,,,,,,
2011-01-01 03:00:00,13.0,0.0,9.84,0.0,5,3,9.293333,0.0,9.02,0.0,9.02,0.0,9.84,0.0,,,,
2011-01-01 04:00:00,1.0,0.0,9.84,0.0,5,4,9.43,0.0,9.84,0.0,9.02,0.0,9.02,0.0,9.84,0.0,,
