In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scalecast.Forecaster import Forecaster
from scalecast.MVForecaster import MVForecaster
from scalecast import GridGenerator
from scalecast.util import plot_reduction_errors
from statsmodels.tsa.stattools import adfuller

In [None]:
sns.set(rc={"figure.figsize": (12, 8)})

GridGenerator.get_example_grids()
GridGenerator.get_mv_grids()

In [None]:
df = pd.read_csv("IAH-Dom-Multivariate.csv", parse_dates=["Date"])
f = Forecaster(
    y=df["PASSENGERS"],
    current_dates=df["Date"],
    require_future_dates = False,
)
f.set_test_length(24)

f.add_seasonal_regressors(
    'month',
    raw=False,
    sincos=True
)
f.integrate(train_only=True)
f.add_time_trend()
f.add_poly_terms('t',pwr=3)

f

In [None]:
df_stationary = df.drop('PASSENGERS',axis=1).set_index('Date')
exclude = (
    'Recession',
    'post_covid',
    'post_vaccine',
    'post_delta',
)
diffs = {
    c:0 for c in df_stationary if c not in exclude
}
for c in diffs:
    series = df_stationary[c]
    while adfuller(series)[1] >= 0.05:
        df_stationary[c] = df_stationary[c].diff()
        series = df_stationary[c].dropna()
        diffs[c] += 1

for k, v in diffs.items():
    print(f'column: {k} differenced: {v} time(s)')
    
df_stationary = df_stationary.dropna()

In [None]:
f.keep_smaller_history(df_stationary.reset_index()['Date'].min())
f.ingest_Xvars_df(df_stationary.reset_index(),date_col='Date')

In [None]:
f

In [None]:
f.plot()
plt.show()

In [None]:
f.reduce_Xvars(
    estimator='gbt',
    method='shap',
    cross_validate=True,
    cvkwargs = {'k':3},
    dynamic_tuning = 6,
    overwrite = True,
    grid_search=False,
)

plot_reduction_errors(f)
plt.show()

In [None]:
selected_vars = f.reduced_Xvars
selected_vars

In [None]:
df_selected = df.set_index('Date')[[c for c in selected_vars if c in df]]
df_selected.head()

In [None]:
forecasters = {}
f = Forecaster(
    y=df['PASSENGERS'],
    current_dates=df['Date'],
)
f.generate_future_dates(24)
f.add_seasonal_regressors(
    'month',
    sincos=True,
    raw=False
)

f.diff()

forecasters['PASSENGERS'] = f

for c in df_selected:
    f = Forecaster(
        y=df_selected[c],
        current_dates=df_selected.index,
    )
    f.integrate()
    forecasters[c] = f
    
mvf = MVForecaster(
    *forecasters.values(),
    names=forecasters.keys(),
)
mvf.set_optimize_on('PASSENGERS')
mvf.set_test_length(24)
mvf

In [None]:
models = (
    'xgboost',
    'mlr',
    'lightgbm',
    'sgd',
    'elasticnet',
    'svr',
    'mlp',
)

mvf.tune_test_forecast(
    models,
    cross_validate=True,
    k=3,
    dynamic_tuning=6,
    probabilistic=True,
    limit_grid_size=.25,
)

In [None]:
mvf.set_best_model(determine_best_by='LevelTestSetRMSE')
mvf.plot_test_set(
    put_best_on_top=True,
    series='PASSENGERS',
    level=True,
    #models=mvf.best_model,
    ci=True,
)
plt.show()

In [None]:
mvf.plot(
    put_best_on_top=True,
    series='PASSENGERS',
    #models=mvf.best_model,
    level=True,
    ci=True,
)
plt.show()

In [None]:
results = mvf.export('model_summaries')
results[
    [
        'ModelNickname',
        'Series',
        'HyperParams',
        'Lags',
        'ValidationMetricValue',
        'LevelTestSetRMSE'
    ]
]

In [None]:
mvf.export_validation_grid('xgboost').head()