In [1]:
import time
import numpy as np
import pandas as pd
# from sklearn import metrics

from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment

from sktime.utils.plotting import plot_series

In [2]:
y = get_data('airline', verbose=False)

In [3]:
fh = 12 # or alternately fh = np.arange(1,13)
fold = 3

# Available Models

In [4]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh)
exp.models()

Unnamed: 0,Description,Value
0,session_id,3102
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,CPU Jobs,-1
8,Use GPU,False
9,Log Experiment,False


Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
naive,Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
snaive,Seasonal Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
polytrend,Polynomial Trend Forecaster,sktime.forecasting.trend.PolynomialTrendForeca...,True
arima,ARIMA,sktime.forecasting.arima.ARIMA,True
auto_arima,Auto ARIMA,sktime.forecasting.arima.AutoARIMA,True
exp_smooth,Exponential Smoothing,sktime.forecasting.exp_smoothing.ExponentialSm...,True
ets,ETS,sktime.forecasting.ets.AutoETS,True
theta,Theta Forecaster,sktime.forecasting.theta.ThetaForecaster,True
tbats,TBATS,sktime.forecasting.tbats.TBATS,False
bats,BATS,sktime.forecasting.bats.BATS,False


# Plotting Data

In [5]:
# Without any argument, this will plot the original dataset
exp.plot_model()

In [6]:
# Without an estimator argument, this will plot the original dataset
exp.plot_model(plot="ts")

In [7]:
# ACF and PACF for the original dataset
exp.plot_model(plot="acf")

# NOTE: you can customize the plots with kwargs - e.g. number of lags, figure size (width, height), etc
# data_kwargs such as `nlags` are passed to the underlying functon that gets the ACF values
# figure kwargs such as `fig_size` & `fig_template` are passed to plotly and can have any value that plotly accepts
exp.plot_model(plot="pacf", data_kwargs={'nlags':36, }, fig_kwargs={'fig_size': [800, 500], 'fig_template': 'simple_white'})

In [8]:
exp.plot_model(plot="decomp_classical")
exp.plot_model(plot="decomp_classical", data_kwargs={'type': 'multiplicative'})
exp.plot_model(plot="decomp_stl")

In [9]:
# Show the train-test splits on the dataset
# Internally split - len(fh) as test set, remaining used as test set
exp.plot_model(plot="train_test_split")

# Show the Cross Validation splits inside the train set
exp.plot_model(plot="cv")

In [10]:
# Plot diagnostics
exp.plot_model(plot="diagnostics")

# Tests

In [11]:
exp.check_stats(alpha = 0.2)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,144.0
1,Summary,Statistics,Mean,,280.298611
2,Summary,Statistics,Median,,265.5
3,Summary,Statistics,Standard Deviation,,119.966317
4,Summary,Statistics,Variance,,14391.917201
5,Summary,Statistics,Kurtosis,,-0.364942
6,Summary,Statistics,Skewness,,0.58316
7,Summary,Statistics,# Distinct Values,,118.0
8,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.2, 'K': 24}",1606.083817
9,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.2, 'K': 48}",1933.155822


In [12]:
# Options are: 'all', 'summary', 'white_noise' 'stationarity', 'adf', 'kpss', 'normality'
exp.check_stats(test="summary")

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,144.0
1,Summary,Statistics,Mean,,280.298611
2,Summary,Statistics,Median,,265.5
3,Summary,Statistics,Standard Deviation,,119.966317
4,Summary,Statistics,Variance,,14391.917201
5,Summary,Statistics,Kurtosis,,-0.364942
6,Summary,Statistics,Skewness,,0.58316
7,Summary,Statistics,# Distinct Values,,118.0


In [13]:
# Setting denotes alpha value used (for most tests). For white noise, it denotes the lags used to test
exp.check_stats(test='stationarity')

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Stationarity,ADF,Stationarity,{'alpha': 0.05},False
1,Stationarity,ADF,p-value,{'alpha': 0.05},0.99188
2,Stationarity,ADF,Test Statistic,{'alpha': 0.05},0.815369
3,Stationarity,ADF,Critical Value 1%,{'alpha': 0.05},-3.481682
4,Stationarity,ADF,Critical Value 5%,{'alpha': 0.05},-2.884042
5,Stationarity,ADF,Critical Value 10%,{'alpha': 0.05},-2.57877
6,Stationarity,KPSS,Trend Stationarity,{'alpha': 0.05},True
7,Stationarity,KPSS,p-value,{'alpha': 0.05},0.1
8,Stationarity,KPSS,Test Statistic,{'alpha': 0.05},0.09615
9,Stationarity,KPSS,Critical Value 10%,{'alpha': 0.05},0.119


In [14]:
# For white noise, Setting it denotes the lags used to test
exp.check_stats(test='white_noise')

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 24}",1606.083817
1,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 48}",1933.155822
2,White Noise,Ljung-Box,p-value,"{'alpha': 0.05, 'K': 24}",0.0
3,White Noise,Ljung-Box,p-value,"{'alpha': 0.05, 'K': 48}",0.0
4,White Noise,Ljung-Box,White Noise,"{'alpha': 0.05, 'K': 24}",False
5,White Noise,Ljung-Box,White Noise,"{'alpha': 0.05, 'K': 48}",False


In [15]:
# You can change alpha if needed (would not recommend though)
exp.check_stats(test='stationarity', alpha = 0.2)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Stationarity,ADF,Stationarity,{'alpha': 0.2},False
1,Stationarity,ADF,p-value,{'alpha': 0.2},0.99188
2,Stationarity,ADF,Test Statistic,{'alpha': 0.2},0.815369
3,Stationarity,ADF,Critical Value 1%,{'alpha': 0.2},-3.481682
4,Stationarity,ADF,Critical Value 5%,{'alpha': 0.2},-2.884042
5,Stationarity,ADF,Critical Value 10%,{'alpha': 0.2},-2.57877
6,Stationarity,KPSS,Trend Stationarity,{'alpha': 0.2},False
7,Stationarity,KPSS,p-value,{'alpha': 0.2},0.1
8,Stationarity,KPSS,Test Statistic,{'alpha': 0.2},0.09615
9,Stationarity,KPSS,Critical Value 10%,{'alpha': 0.2},0.119


# Flow example

## Common Setup

In [16]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,CPU Jobs,-1
8,Use GPU,False
9,Log Experiment,False


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x2559c578948>

In [17]:
y_train = exp.get_config("y_train")
y_test = exp.get_config("y_test")

## Manual Create

### Classical Statistical Models

In [18]:
model = exp.create_model("exp_smooth")
y_predict = exp.predict_model(model)
exp.plot_model(estimator=model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.7395,17.8049,0.0347,0.0355,0.8968
1,1957-12,24.6241,27.0346,0.0684,0.0656,0.8085
2,1958-12,20.819,23.4547,0.048,0.0494,0.8769
Mean,NaT,19.7275,22.7648,0.0503,0.0502,0.8608
SD,NaT,4.5101,3.7994,0.0138,0.0123,0.0378


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Exponential Smoothing,13.3808,16.9801,0.028,0.028,0.948


In [19]:
## NOTE: Model that supports prediction intervals will plot them by default
model_pi = exp.create_model("arima")
exp.plot_model(estimator=model_pi)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.0286,16.1485,0.0327,0.0334,0.9151
1,1957-12,18.292,20.3442,0.0506,0.0491,0.8916
2,1958-12,28.6999,30.1669,0.0671,0.0697,0.7964
Mean,NaT,20.0069,22.2199,0.0501,0.0507,0.8677
SD,NaT,6.5117,5.8746,0.0141,0.0148,0.0513


In [20]:
# Check Goodness of Fit
exp.check_stats(model)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,132.0
1,Summary,Statistics,Mean,,-0.000034
2,Summary,Statistics,Median,,-1.769647
3,Summary,Statistics,Standard Deviation,,11.668967
4,Summary,Statistics,Variance,,136.164793
5,Summary,Statistics,Kurtosis,,0.419007
6,Summary,Statistics,Skewness,,0.385646
7,Summary,Statistics,# Distinct Values,,132.0
8,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 24}",146.290907
9,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 48}",243.168347


In [21]:
# Plot Goodness of Fit (Plots based on model in-sample residuals)
exp.plot_model(model, plot='residuals')
exp.plot_model(model, plot='diagnostics')


In [22]:
# Compare Model residual ACF/PACF to original Time Series ACF/PACF
# 1. Do you see any visible trend or seasonality component that has not been captured in the model (i.e. still visible in the residual ACF/PACF)?
exp.plot_model(plot='acf')
exp.plot_model(model, plot='acf')

exp.plot_model(plot='pacf')
exp.plot_model(model, plot='pacf')

In [23]:
# Check the decomposition of the residuals
# 1. Is the residual in the decomposition the largest component?
# 2. Do you see any visible trend or seasonality component that has not been captured in the model?
exp.plot_model(model, plot="decomp_classical")
exp.plot_model(model, plot="decomp_stl")

In [24]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.7395,17.8049,0.0347,0.0355,0.8968
1,1957-12,24.6241,27.0346,0.0684,0.0656,0.8085
2,1958-12,20.819,23.4547,0.048,0.0494,0.8769
Mean,NaT,19.7275,22.7648,0.0503,0.0502,0.8608
SD,NaT,4.5101,3.7994,0.0138,0.0123,0.0378


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=False)


In [25]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.7395,17.8049,0.0347,0.0355,0.8968
1,1957-12,24.6241,27.0346,0.0684,0.0656,0.8085
2,1958-12,20.819,23.4547,0.048,0.0494,0.8769
Mean,NaT,19.7275,22.7648,0.0503,0.0502,0.8608
SD,NaT,4.5101,3.7994,0.0138,0.0123,0.0378


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=False)


In [26]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Exponential Smoothing,13.3808,16.9801,0.028,0.028,0.948


In [27]:
# Random Grid Search with different number of iterations
tuned_model = exp.tune_model(model, search_algorithm="random", n_iter=5)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.7395,17.8049,0.0347,0.0355,0.8968
1,1957-12,24.6241,27.0346,0.0684,0.0656,0.8085
2,1958-12,20.819,23.4547,0.048,0.0494,0.8769
Mean,NaT,19.7275,22.7648,0.0503,0.0502,0.8608
SD,NaT,4.5101,3.7994,0.0138,0.0123,0.0378


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=False)


### Reduced Regressors: Random Forest (with internal conditional deseasonalize and detrending)

In [28]:
model = exp.create_model("rf_cds_dt")
y_predict = exp.predict_model(model)
exp.plot_model(estimator=model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,28.0638,40.1439,0.0678,0.0721,0.4754
1,1957-12,29.7063,38.0812,0.0738,0.0748,0.6201
2,1958-12,22.8056,35.9245,0.047,0.0493,0.7113
Mean,NaT,26.8586,38.0499,0.0629,0.0654,0.6023
SD,NaT,2.9433,1.7227,0.0115,0.0115,0.0971


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,RandomForestRegressor,32.1796,46.0571,0.0606,0.0637,0.6171


In [29]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,28.7546,37.5161,0.0714,0.075,0.5419
1,1957-12,26.7678,32.3833,0.069,0.0682,0.7253
2,1958-12,29.3616,39.1808,0.0637,0.0657,0.6566
Mean,NaT,28.2947,36.3601,0.0681,0.0696,0.6412
SD,NaT,1.1077,2.8929,0.0032,0.0039,0.0757


BaseCdsDt(degree=1, deseasonal_model='additive',
          regressor=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                          criterion='mse', max_depth=None,
                                          max_features='auto',
                                          max_leaf_nodes=None, max_samples=None,
                                          min_impurity_decrease=0.0,
                                          min_impurity_split=None,
                                          min_samples_leaf=1,
                                          min_samples_split=2,
                                          min_weight_fraction_leaf=0.0,
                                          n_estimators=100, n_jobs=-1,
                                          oob_score=False, random_state=42,
                                          verbose=0, warm_start=False),
          sp=1, window_length=10)
BaseCdsDt(degree=1, deseasonal_model='additive',
          regressor=Rand

In [30]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,28.7546,37.5161,0.0714,0.075,0.5419
1,1957-12,26.7678,32.3833,0.069,0.0682,0.7253
2,1958-12,29.3616,39.1808,0.0637,0.0657,0.6566
Mean,NaT,28.2947,36.3601,0.0681,0.0696,0.6412
SD,NaT,1.1077,2.8929,0.0032,0.0039,0.0757


BaseCdsDt(degree=1, deseasonal_model='additive',
          regressor=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                          criterion='mse', max_depth=None,
                                          max_features='auto',
                                          max_leaf_nodes=None, max_samples=None,
                                          min_impurity_decrease=0.0,
                                          min_impurity_split=None,
                                          min_samples_leaf=1,
                                          min_samples_split=2,
                                          min_weight_fraction_leaf=0.0,
                                          n_estimators=100, n_jobs=-1,
                                          oob_score=False, random_state=42,
                                          verbose=0, warm_start=False),
          sp=1, window_length=10)
BaseCdsDt(degree=1, deseasonal_model='additive',
          regressor=Rand

In [31]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,RandomForestRegressor,34.5229,46.002,0.0668,0.0696,0.618


## Auto Create

### Compare Models

In [32]:
best_baseline_models = exp.compare_models(fold=fold, sort='smape', n_select=3)
best_baseline_models

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ets,ETS,19.5889,22.7315,0.0498,0.0498,0.8624,0.1167
exp_smooth,Exponential Smoothing,19.7275,22.7648,0.0503,0.0502,0.8608,0.09
arima,ARIMA,20.0069,22.2199,0.0501,0.0507,0.8677,0.04
auto_arima,Auto ARIMA,21.0297,23.4661,0.0525,0.0531,0.8509,2.9867
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,24.4233,31.4395,0.0584,0.0601,0.7169,0.5167
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,24.8102,31.3995,0.0613,0.0631,0.6888,0.4667
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,25.8293,34.9633,0.0617,0.0641,0.626,0.4567
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,26.8586,38.0499,0.0629,0.0654,0.6023,0.5633
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,27.82,37.791,0.0661,0.0686,0.6015,0.0733
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,29.1314,38.308,0.0685,0.0715,0.5855,0.03


[AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='add', sp=12, start_params=None, trend='add'),
 ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='add', sp=12,
                      trend='add', use_boxcox=None),
 ARIMA(maxiter=50, method='lbfgs', order=(1, 0, 0), out_of_sample_size=0,
       scoring='mse', scoring_args=None, seasonal_order=(0, 1, 0, 12),
       with_intercept=True)]

In [33]:
compare_metrics = exp.pull()
compare_metrics

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ets,ETS,19.5889,22.7315,0.0498,0.0498,0.8624,0.1167
exp_smooth,Exponential Smoothing,19.7275,22.7648,0.0503,0.0502,0.8608,0.09
arima,ARIMA,20.0069,22.2199,0.0501,0.0507,0.8677,0.04
auto_arima,Auto ARIMA,21.0297,23.4661,0.0525,0.0531,0.8509,2.9867
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,24.4233,31.4395,0.0584,0.0601,0.7169,0.5167
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonali...,24.8102,31.3995,0.0613,0.0631,0.6888,0.4667
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,25.8293,34.9633,0.0617,0.0641,0.626,0.4567
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,26.8586,38.0499,0.0629,0.0654,0.6023,0.5633
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,27.82,37.791,0.0661,0.0686,0.6015,0.0733
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Det...,29.1314,38.308,0.0685,0.0715,0.5855,0.03


* Note that some models like BATS and TBATS are disabled by default. 
* You can enable them by setting `turbo = False`

In [34]:
# _ = exp.compare_models(fold=fold, sort='smape', n_select=3, turbo=False)

### Tune Best Models

In [35]:
best_tuned_models = [exp.tune_model(model) for model in best_baseline_models]
best_tuned_models

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.2626,16.6689,0.0331,0.0339,0.9096
1,1957-12,19.1686,21.3384,0.053,0.0513,0.8807
2,1958-12,21.1925,23.4747,0.0491,0.0506,0.8767
Mean,NaT,17.8746,20.494,0.0451,0.0453,0.889
SD,NaT,3.3642,2.8419,0.0086,0.008,0.0146


[AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='add', sp=12, start_params=None, trend='add'),
 ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='add', sp=12,
                      trend='add', use_boxcox=False),
 ARIMA(maxiter=50, method='lbfgs', order=(0, 0, 0), out_of_sample_size=0,
       scoring='mse', scoring_args=None, seasonal_order=(1, 1, 0, 12),
       with_intercept=True)]

### Blend Best Models

#### Mean Blender

In [36]:
mean_blender = exp.blend_models(best_tuned_models, method='mean')

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.1354,17.2182,0.0328,0.0336,0.9035
1,1957-12,22.4964,24.4307,0.0623,0.0601,0.8436
2,1958-12,20.507,23.6402,0.047,0.0485,0.875
Mean,NaT,18.7129,21.7631,0.0474,0.0474,0.874
SD,NaT,4.0266,3.2298,0.0121,0.0108,0.0244


In [37]:
y_predict = exp.predict_model(mean_blender)
exp.plot_model(estimator=mean_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,EnsembleForecaster,15.5018,18.7176,0.0316,0.0319,0.9368


#### Median Blender

In [38]:
median_blender = exp.blend_models(best_tuned_models, method='median')

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.4074,17.7466,0.0337,0.0345,0.8975
1,1957-12,23.7237,25.8581,0.0657,0.0632,0.8248
2,1958-12,21.5181,24.4933,0.0497,0.0513,0.8658
Mean,NaT,19.5497,22.6993,0.0497,0.0497,0.8627
SD,NaT,4.4356,3.5461,0.013,0.0118,0.0297


In [39]:
y_predict = exp.predict_model(median_blender)
exp.plot_model(estimator=median_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,EnsembleForecaster,12.9901,16.8566,0.0271,0.027,0.9487


#### Voting Blender

In [40]:
top_model_metrics = compare_metrics.iloc[0:3]['SMAPE']
display(top_model_metrics)

top_model_weights = 1 - top_model_metrics/top_model_metrics.sum()
display(top_model_weights)

ets           0.0498
exp_smooth    0.0502
arima         0.0507
Name: SMAPE, dtype: object

ets           0.669542
exp_smooth    0.666888
arima          0.66357
Name: SMAPE, dtype: object

In [41]:
voting_blender = exp.blend_models(best_tuned_models, method='voting', weights=top_model_weights.values)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.1365,17.2203,0.0328,0.0336,0.9035
1,1957-12,22.5035,24.4384,0.0624,0.0601,0.8435
2,1958-12,20.5107,23.6423,0.047,0.0485,0.875
Mean,NaT,18.7169,21.767,0.0474,0.0474,0.874
SD,NaT,4.0289,3.2314,0.0121,0.0108,0.0245


In [42]:
y_predict = exp.predict_model(voting_blender)
print(y_predict)
exp.plot_model(estimator=voting_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,EnsembleForecaster,15.4844,18.705,0.0316,0.0319,0.9368


1960-01    407.6144
1960-02    389.1529
1960-03    449.6514
1960-04    438.0303
1960-05    459.4166
1960-06    514.2010
1960-07    586.2378
1960-08    594.3397
1960-09    495.3860
1960-10    439.6841
1960-11    393.8875
1960-12    429.6637
Freq: M, Name: Number of airline passengers, dtype: float64


## Save and Load Model

In [43]:
_ = exp.save_model(voting_blender, "my_blender")

Transformation Pipeline and Model Successfully Saved


In [44]:
loaded_exp = TimeSeriesExperiment()
m = loaded_exp.load_model("my_blender")
# Predictions should be same as before the model was saved and loaded
loaded_exp.predict_model(m)

Transformation Pipeline and Model Successfully Loaded


1960-01    407.6144
1960-02    389.1529
1960-03    449.6514
1960-04    438.0303
1960-05    459.4166
1960-06    514.2010
1960-07    586.2378
1960-08    594.3397
1960-09    495.3860
1960-10    439.6841
1960-11    393.8875
1960-12    429.6637
Freq: M, dtype: float64

## Prediction Customization

In [45]:
model = exp.create_model("auto_arima")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,12.0769,15.8014,0.0298,0.0305,0.9187
1,1957-12,19.4102,21.4989,0.0538,0.052,0.8789
2,1958-12,31.602,33.0981,0.0738,0.0769,0.7549
Mean,NaT,21.0297,23.4661,0.0525,0.0531,0.8509
SD,NaT,8.0529,7.197,0.018,0.019,0.0698


In [46]:
# Default prediction
exp.predict_model(model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.938


1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
Freq: M, Name: Number of airline passengers, dtype: float64

In [47]:
# With Prediction Interval (default alpha = 0.05)
exp.predict_model(model, return_pred_int=True)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.938


Unnamed: 0,y_pred,lower,upper
1960-01,419.9672,400.2603,439.674
1960-02,399.8499,375.7392,423.9605
1960-03,457.9944,429.6697,486.319
1960-04,444.4742,414.0056,474.9427
1960-05,464.7892,432.7993,496.779
1960-06,514.1254,481.2385,547.0123
1960-07,587.8042,554.3237,621.2846
1960-08,597.0108,563.1606,630.861
1960-09,499.5313,465.441,533.6215
1960-10,442.3597,408.1167,476.6027


In [48]:
# With Prediction Interval (custom alpha = 0.2)
exp.predict_model(model, return_pred_int=True, alpha=0.2)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.938


Unnamed: 0,y_pred,lower,upper
1960-01,419.9672,407.0816,432.8528
1960-02,399.8499,384.0847,415.615
1960-03,457.9944,439.4739,476.5149
1960-04,444.4742,424.5519,464.3965
1960-05,464.7892,443.8722,485.7062
1960-06,514.1254,492.6219,535.629
1960-07,587.8042,565.9125,609.6959
1960-08,597.0108,574.8774,619.1443
1960-09,499.5313,477.2408,521.8217
1960-10,442.3597,419.9694,464.75


In [49]:
# Increased forecast horizon to 2 years instead of the original 1 year
exp.predict_model(model, fh = np.arange(1, 25))

predict_model >> Forecast Horizon does not match the horizon length used during training. Metrics displayed will be using indices that match only


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.938


1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
1961-01    453.0077
1961-02    432.4005
1961-03    490.1513
1961-04    476.3150
1961-05    496.3762
1961-06    545.5087
1961-07    619.0237
1961-08    628.0990
1961-09    530.5139
1961-10    473.2576
1961-11    427.2400
1961-12    469.4268
Freq: M, Name: Number of airline passengers, dtype: float64

In [50]:
# For models that do not produce a prediction interval --> returns NA values
model = exp.create_model("lr_cds_dt")
exp.predict_model(model, return_pred_int=True)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,38.6824,45.082,0.0998,0.1051,0.3384
1,1957-12,28.0608,34.6867,0.0751,0.0734,0.6848
2,1958-12,32.1693,38.2681,0.0737,0.0753,0.6724
Mean,NaT,32.9708,39.3456,0.0828,0.0846,0.5652
SD,NaT,4.3731,4.3117,0.012,0.0145,0.1604


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,R2
0,LinearRegression,47.7429,55.0154,0.0965,0.1005,0.4536


Unnamed: 0,y_pred,lower,upper
1960-01,433.1925,,
1960-02,414.247,,
1960-03,394.9755,,
1960-04,374.2084,,
1960-05,431.222,,
1960-06,493.9331,,
1960-07,527.8974,,
1960-08,512.4796,,
1960-09,456.4409,,
1960-10,436.8202,,


## Types of Window Splitters

### Sliding Window Splitter

In [51]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='sliding')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.4802,17.755,0.034,0.0348,0.8974
1,1957-12,25.485,27.5563,0.0707,0.0678,0.8011
2,1958-12,20.1357,23.0437,0.0465,0.0479,0.8812
Mean,NaT,19.7003,22.785,0.0504,0.0502,0.8599
SD,NaT,4.9106,4.0055,0.0153,0.0136,0.0421


### Expanding/Rolling Window

* They are identical

In [52]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='expanding')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.4802,17.755,0.034,0.0348,0.8974
1,1957-12,23.6964,25.8274,0.0656,0.0631,0.8252
2,1958-12,21.5902,24.6119,0.0499,0.0515,0.8645
Mean,NaT,19.5889,22.7315,0.0498,0.0498,0.8624
SD,NaT,4.4043,3.5537,0.0129,0.0116,0.0295


In [53]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='rolling')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,13.4802,17.755,0.034,0.0348,0.8974
1,1957-12,23.6964,25.8274,0.0656,0.0631,0.8252
2,1958-12,21.5902,24.6119,0.0499,0.0515,0.8645
Mean,NaT,19.5889,22.7315,0.0498,0.0498,0.8624
SD,NaT,4.4043,3.5537,0.0129,0.0116,0.0295


## Error Handling

In [54]:
try:
    exp = TimeSeriesExperiment()
    exp.setup(data=y, fh=17, fold=76, fold_strategy='expanding')
except ValueError as error:
    print(error)

IntProgress(value=0, description='Processing: ', max=3)

Not Enough Data Points, set a lower number of folds or fh
