In [1]:
import time
import numpy as np
import pandas as pd

from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment

In [2]:
y = get_data('airline', verbose=False)

In [3]:
fh = 12 # or alternately fh = np.arange(1,13)
fold = 3

# Available Models

In [4]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh)
exp.models()

Unnamed: 0,Description,Value
0,session_id,6327
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
naive,Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
grand_means,Grand Means Forecaster,sktime.forecasting.naive.NaiveForecaster,True
snaive,Seasonal Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
polytrend,Polynomial Trend Forecaster,sktime.forecasting.trend.PolynomialTrendForeca...,True
arima,ARIMA,sktime.forecasting.arima.ARIMA,True
auto_arima,Auto ARIMA,sktime.forecasting.arima.AutoARIMA,True
exp_smooth,Exponential Smoothing,sktime.forecasting.exp_smoothing.ExponentialSm...,True
croston,Croston,sktime.forecasting.croston.Croston,True
ets,ETS,sktime.forecasting.ets.AutoETS,True
theta,Theta Forecaster,sktime.forecasting.theta.ThetaForecaster,True


# Plotting Data

In [5]:
# Without any argument, this will plot the original dataset
exp.plot_model()

In [6]:
# Without an estimator argument, this will plot the original dataset
exp.plot_model(plot="ts")

In [7]:
# ACF and PACF for the original dataset
exp.plot_model(plot="acf")

# NOTE: you can customize the plots with kwargs - e.g. number of lags, figure size (width, height), etc
# data_kwargs such as `nlags` are passed to the underlying functon that gets the ACF values
# figure kwargs such as `fig_size` & `fig_template` are passed to plotly and can have any value that plotly accepts
exp.plot_model(plot="pacf", data_kwargs={'nlags':36, }, fig_kwargs={'fig_size': [800, 500], 'fig_template': 'simple_white'})

In [8]:
exp.plot_model(plot="decomp_classical")
exp.plot_model(plot="decomp_classical", data_kwargs={'type': 'multiplicative'})
exp.plot_model(plot="decomp_stl")

In [9]:
# Show the train-test splits on the dataset
# Internally split - len(fh) as test set, remaining used as test set
exp.plot_model(plot="train_test_split")

# Show the Cross Validation splits inside the train set
exp.plot_model(plot="cv")

In [10]:
# Plot diagnostics
exp.plot_model(plot="diagnostics")

In [11]:
# Plot differences along with diagnostics such as ACF and PACF

# Row 1: Original
# Row 2: d = 1
# Row 3: d = 2
exp.plot_model(plot="diff", data_kwargs={"order_list": [1, 2], "pacf": True})

# Row 1: Original
# Row 2: d = 1
# Row 3: First (d = 1) then (D = 1, s = 12)
#   - Corresponds to applying a standard first difference to handle trend, and
#     followed by a seasonal difference (at lag 12) to attempt to account for
#     seasonal dependence.
# Ref: https://www.sktime.org/en/v0.8.0/api_reference/modules/auto_generated/sktime.transformations.series.difference.Differencer.html
exp.plot_model(plot="diff", data_kwargs={"lags_list": [[1], [1, 12]], "acf": True, "pacf": True})

# Tests

In [12]:
exp.check_stats(alpha = 0.2)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,144.0
1,Summary,Statistics,Mean,,280.298611
2,Summary,Statistics,Median,,265.5
3,Summary,Statistics,Standard Deviation,,119.966317
4,Summary,Statistics,Variance,,14391.917201
5,Summary,Statistics,Kurtosis,,-0.364942
6,Summary,Statistics,Skewness,,0.58316
7,Summary,Statistics,# Distinct Values,,118.0
8,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.2, 'K': 24}",1606.083817
9,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.2, 'K': 48}",1933.155822


In [13]:
# Options are: 'all', 'summary', 'white_noise' 'stationarity', 'adf', 'kpss', 'normality'
exp.check_stats(test="summary")

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,144.0
1,Summary,Statistics,Mean,,280.298611
2,Summary,Statistics,Median,,265.5
3,Summary,Statistics,Standard Deviation,,119.966317
4,Summary,Statistics,Variance,,14391.917201
5,Summary,Statistics,Kurtosis,,-0.364942
6,Summary,Statistics,Skewness,,0.58316
7,Summary,Statistics,# Distinct Values,,118.0


In [14]:
# Setting denotes alpha value used (for most tests). For white noise, it denotes the lags used to test
exp.check_stats(test='stationarity')

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Stationarity,ADF,Stationarity,{'alpha': 0.05},False
1,Stationarity,ADF,p-value,{'alpha': 0.05},0.99188
2,Stationarity,ADF,Test Statistic,{'alpha': 0.05},0.815369
3,Stationarity,ADF,Critical Value 1%,{'alpha': 0.05},-3.481682
4,Stationarity,ADF,Critical Value 5%,{'alpha': 0.05},-2.884042
5,Stationarity,ADF,Critical Value 10%,{'alpha': 0.05},-2.57877
6,Stationarity,KPSS,Trend Stationarity,{'alpha': 0.05},True
7,Stationarity,KPSS,p-value,{'alpha': 0.05},0.1
8,Stationarity,KPSS,Test Statistic,{'alpha': 0.05},0.09615
9,Stationarity,KPSS,Critical Value 10%,{'alpha': 0.05},0.119


In [15]:
# For white noise, Setting it denotes the lags used to test
exp.check_stats(test='white_noise')

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 24}",1606.083817
1,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 48}",1933.155822
2,White Noise,Ljung-Box,p-value,"{'alpha': 0.05, 'K': 24}",0.0
3,White Noise,Ljung-Box,p-value,"{'alpha': 0.05, 'K': 48}",0.0
4,White Noise,Ljung-Box,White Noise,"{'alpha': 0.05, 'K': 24}",False
5,White Noise,Ljung-Box,White Noise,"{'alpha': 0.05, 'K': 48}",False


In [16]:
# You can change alpha if needed (would not recommend though)
exp.check_stats(test='stationarity', alpha = 0.2)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Stationarity,ADF,Stationarity,{'alpha': 0.2},False
1,Stationarity,ADF,p-value,{'alpha': 0.2},0.99188
2,Stationarity,ADF,Test Statistic,{'alpha': 0.2},0.815369
3,Stationarity,ADF,Critical Value 1%,{'alpha': 0.2},-3.481682
4,Stationarity,ADF,Critical Value 5%,{'alpha': 0.2},-2.884042
5,Stationarity,ADF,Critical Value 10%,{'alpha': 0.2},-2.57877
6,Stationarity,KPSS,Trend Stationarity,{'alpha': 0.2},False
7,Stationarity,KPSS,p-value,{'alpha': 0.2},0.1
8,Stationarity,KPSS,Test Statistic,{'alpha': 0.2},0.09615
9,Stationarity,KPSS,Critical Value 10%,{'alpha': 0.2},0.119


# Flow example

## Common Setup

In [17]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x21a60f62788>

In [18]:
y_train = exp.get_config("y_train")
y_test = exp.get_config("y_test")

## Manual Create

### Classical Statistical Models

In [19]:
model = exp.create_model("exp_smooth")
y_predict = exp.predict_model(model)
#### Plot Out-of-Sample Forecasts
exp.plot_model(estimator=model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,14.5613,18.7758,0.0366,0.0376,0.4986,0.5736,0.8852
1,1957-12,16.8009,19.3269,0.0458,0.0447,0.5495,0.5693,0.9021
2,1958-12,20.2155,22.3873,0.0479,0.0494,0.7075,0.6887,0.8879
Mean,NaT,17.1926,20.1633,0.0435,0.0439,0.5852,0.6105,0.8918
SD,NaT,2.3249,1.5886,0.0049,0.0048,0.0889,0.0553,0.0074


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Exponential Smoothing,10.2997,15.8074,0.0221,0.0216,0.3382,0.4575,0.9549


In [20]:
## NOTE: Model that supports prediction intervals will plot them by default
model_pi = exp.create_model("arima")
exp.plot_model(estimator=model_pi)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,13.0286,16.1485,0.0327,0.0334,0.4462,0.4933,0.9151
1,1957-12,18.292,20.3442,0.0506,0.0491,0.5983,0.5993,0.8916
2,1958-12,28.6999,30.1669,0.0671,0.0697,1.0044,0.928,0.7964
Mean,NaT,20.0069,22.2199,0.0501,0.0507,0.683,0.6735,0.8677
SD,NaT,6.5117,5.8746,0.0141,0.0148,0.2356,0.1851,0.0513


In [21]:
# Check Goodness of Fit
exp.check_stats(model)

Unnamed: 0,Test,Test Name,Property,Setting,Value
0,Summary,Statistics,Length,,132.0
1,Summary,Statistics,Mean,,-0.079834
2,Summary,Statistics,Median,,-1.383823
3,Summary,Statistics,Standard Deviation,,9.804906
4,Summary,Statistics,Variance,,96.136182
5,Summary,Statistics,Kurtosis,,0.485739
6,Summary,Statistics,Skewness,,0.098919
7,Summary,Statistics,# Distinct Values,,132.0
8,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 24}",41.981068
9,White Noise,Ljung-Box,Test Statictic,"{'alpha': 0.05, 'K': 48}",67.64294


In [22]:
# Plot Goodness of Fit (Plots based on model in-sample residuals)
exp.plot_model(model, plot='residuals')
exp.plot_model(model, plot='diagnostics')
exp.plot_model(model, plot='insample')

In [23]:
# Compare Model residual ACF/PACF to original Time Series ACF/PACF
# 1. Do you see any visible trend or seasonality component that has not been captured in the model (i.e. still visible in the residual ACF/PACF)?
exp.plot_model(plot='acf')
exp.plot_model(model, plot='acf')

exp.plot_model(plot='pacf')
exp.plot_model(model, plot='pacf')

In [24]:
# Check the decomposition of the residuals
# 1. Is the residual in the decomposition the largest component?
# 2. Do you see any visible trend or seasonality component that has not been captured in the model?
exp.plot_model(model, plot="decomp_classical")
exp.plot_model(model, plot="decomp_stl")

In [25]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.562,13.4978,0.0272,0.0273,0.3617,0.4124,0.9407
1,1957-12,26.2572,30.0651,0.0738,0.0703,0.8588,0.8856,0.7632
2,1958-12,11.2644,13.4112,0.0261,0.0265,0.3942,0.4126,0.9598
Mean,NaT,16.0278,18.9914,0.0424,0.0414,0.5382,0.5702,0.8879
SD,NaT,7.2389,7.8304,0.0222,0.0205,0.2271,0.223,0.0885


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)


In [26]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.562,13.4978,0.0272,0.0273,0.3617,0.4124,0.9407
1,1957-12,26.2572,30.0651,0.0738,0.0703,0.8588,0.8856,0.7632
2,1958-12,11.2644,13.4112,0.0261,0.0265,0.3942,0.4126,0.9598
Mean,NaT,16.0278,18.9914,0.0424,0.0414,0.5382,0.5702,0.8879
SD,NaT,7.2389,7.8304,0.0222,0.0205,0.2271,0.223,0.0885


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)


In [27]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Exponential Smoothing,17.8363,22.7139,0.0375,0.0364,0.5858,0.6575,0.9069


In [28]:
# Random Grid Search with different number of iterations
tuned_model = exp.tune_model(model, search_algorithm="random", n_iter=5)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.562,13.4978,0.0272,0.0273,0.3617,0.4124,0.9407
1,1957-12,26.2572,30.0651,0.0738,0.0703,0.8588,0.8856,0.7632
2,1958-12,11.2644,13.4112,0.0261,0.0265,0.3942,0.4126,0.9598
Mean,NaT,16.0278,18.9914,0.0424,0.0414,0.5382,0.5702,0.8879
SD,NaT,7.2389,7.8304,0.0222,0.0205,0.2271,0.223,0.0885


ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)


### Reduced Regressors: Random Forest (with internal conditional deseasonalize and detrending)

In [29]:
model = exp.create_model("rf_cds_dt")
y_predict = exp.predict_model(model)
exp.plot_model(estimator=model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,22.5879,30.6931,0.0551,0.0577,0.7735,0.9377,0.6933
1,1957-12,21.9259,24.7972,0.0583,0.0571,0.7172,0.7304,0.8389
2,1958-12,23.7786,30.8886,0.0509,0.0528,0.8322,0.9502,0.7865
Mean,NaT,22.7641,28.793,0.0548,0.0559,0.7743,0.8728,0.7729
SD,NaT,0.7666,2.8266,0.003,0.0022,0.047,0.1008,0.0602


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,RandomForestRegressor,24.0379,29.6366,0.0473,0.0486,0.7894,0.8578,0.8414


In [30]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,20.1918,26.1257,0.0502,0.0521,0.6914,0.7981,0.7778
1,1957-12,18.5111,22.5684,0.0496,0.0482,0.6055,0.6648,0.8666
2,1958-12,18.1008,22.755,0.0396,0.0405,0.6335,0.7,0.8842
Mean,NaT,18.9346,23.8164,0.0465,0.047,0.6435,0.721,0.8428
SD,NaT,0.9046,1.6347,0.0049,0.0048,0.0358,0.0564,0.0465


BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                    regressor=RandomForestRegressor(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=100, 

In [31]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,20.1918,26.1257,0.0502,0.0521,0.6914,0.7981,0.7778
1,1957-12,18.5111,22.5684,0.0496,0.0482,0.6055,0.6648,0.8666
2,1958-12,18.1008,22.755,0.0396,0.0405,0.6335,0.7,0.8842
Mean,NaT,18.9346,23.8164,0.0465,0.047,0.6435,0.721,0.8428
SD,NaT,0.9046,1.6347,0.0049,0.0048,0.0358,0.0564,0.0465


BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                    regressor=RandomForestRegressor(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=100, 

In [32]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,RandomForestRegressor,26.7246,34.0891,0.0513,0.0532,0.8777,0.9867,0.7902


## Getting Ready for Productionization 

### Finalizing Models

In [33]:
model = exp.create_model("ets")
tuned_model = exp.tune_model(model, search_algorithm='grid')

# Trains the model with the best hyperparameters on the entire dataset now
final_model = exp.finalize_model(tuned_model)
exp.plot_model(final_model)
exp.predict_model(final_model)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,14.5582,18.7725,0.0366,0.0376,0.4985,0.5735,0.8853
1,1957-12,16.8005,19.3265,0.0458,0.0447,0.5495,0.5693,0.9021
2,1958-12,20.8908,23.4318,0.0495,0.0512,0.7311,0.7208,0.8772
Mean,NaT,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882
SD,NaT,2.6217,2.0782,0.0054,0.0055,0.0998,0.0705,0.0104


1961-01    445.4229
1961-02    418.3921
1961-03    464.7036
1961-04    494.5817
1961-05    505.5179
1961-06    573.3778
1961-07    663.6585
1961-08    654.8065
1961-09    546.7023
1961-10    488.2774
1961-11    415.7382
1961-12    460.1488
Freq: M, Name: Time Series, dtype: float64

In [34]:
## NOTE: You can choose to predict further out in the horizon as well (if needed)
# Example here shows forecasting out 36 months instead of the default of 12
exp.plot_model(estimator=final_model, data_kwargs={'fh': 36})

### Save model pickle file

In [35]:
exp.save_model(final_model, "my_final_model")

Transformation Pipeline and Model Successfully Saved


(AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 'my_final_model.pkl')

## Load Model 

**Usually done in another session**

In [36]:
exp_load = TimeSeriesExperiment()
loaded_model = exp_load.load_model("my_final_model")

Transformation Pipeline and Model Successfully Loaded


In [37]:
# Should match predictions from before the save and load
exp_load.predict_model(loaded_model)

1961-01    445.4229
1961-02    418.3921
1961-03    464.7036
1961-04    494.5817
1961-05    505.5179
1961-06    573.3778
1961-07    663.6585
1961-08    654.8065
1961-09    546.7023
1961-10    488.2774
1961-11    415.7382
1961-12    460.1488
Freq: M, dtype: float64

## Auto Create

### Compare Models

In [38]:
best_baseline_models = exp.compare_models(fold=fold, sort='smape', n_select=3)
best_baseline_models

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2,TT (Sec)
exp_smooth,Exponential Smoothing,17.1926,20.1633,0.0435,0.0439,0.5852,0.6105,0.8918,0.1367
ets,ETS,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882,0.1833
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,19.662,24.0121,0.049,0.0489,0.6666,0.7255,0.8465,0.56
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,20.0334,25.967,0.0491,0.0499,0.6813,0.7866,0.8113,0.03
arima,ARIMA,20.0069,22.2199,0.0501,0.0507,0.683,0.6735,0.8677,0.0967
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,20.6084,25.4401,0.0509,0.0514,0.7004,0.7702,0.8215,0.0167
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,20.6086,25.4405,0.0509,0.0514,0.7004,0.7703,0.8215,0.0267
lar_cds_dt,Least Angular Regressor w/ Cond. Deseasonalize & Detrending,20.6084,25.4401,0.0509,0.0514,0.7004,0.7702,0.8215,0.06
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,20.6816,25.5362,0.0511,0.0516,0.7029,0.7732,0.8201,0.0267
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,20.7373,25.6005,0.0512,0.0517,0.7048,0.7751,0.8193,0.0233


[ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='mul', sp=12,
                      trend='add', use_boxcox=None),
 AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                     regressor=ExtraTreesRegressor(bootstrap=False,
                                                   ccp_alpha=0.0,
                    

In [39]:
compare_metrics = exp.pull()
compare_metrics

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2,TT (Sec)
exp_smooth,Exponential Smoothing,17.1926,20.1633,0.0435,0.0439,0.5852,0.6105,0.8918,0.1367
ets,ETS,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882,0.1833
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,19.662,24.0121,0.049,0.0489,0.6666,0.7255,0.8465,0.56
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,20.0334,25.967,0.0491,0.0499,0.6813,0.7866,0.8113,0.03
arima,ARIMA,20.0069,22.2199,0.0501,0.0507,0.683,0.6735,0.8677,0.0967
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,20.6084,25.4401,0.0509,0.0514,0.7004,0.7702,0.8215,0.0167
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,20.6086,25.4405,0.0509,0.0514,0.7004,0.7703,0.8215,0.0267
lar_cds_dt,Least Angular Regressor w/ Cond. Deseasonalize...,20.6084,25.4401,0.0509,0.0514,0.7004,0.7702,0.8215,0.06
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,20.6816,25.5362,0.0511,0.0516,0.7029,0.7732,0.8201,0.0267
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,20.7373,25.6005,0.0512,0.0517,0.7048,0.7751,0.8193,0.0233


* Note that some models like BATS and TBATS are disabled by default. 
* You can enable them by setting `turbo = False`

In [40]:
# _ = exp.compare_models(fold=fold, sort='smape', n_select=3, turbo=False)

### Tune Best Models

In [41]:
best_tuned_models = [exp.tune_model(model) for model in best_baseline_models]
best_tuned_models

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,29.6381,40.5265,0.0723,0.0768,1.0149,1.2381,0.4654
1,1957-12,23.7528,27.152,0.0624,0.0611,0.7769,0.7998,0.8069
2,1958-12,18.9035,24.3671,0.0412,0.0421,0.6616,0.7496,0.8672
Mean,NaT,24.0981,30.6819,0.0586,0.06,0.8178,0.9292,0.7131
SD,NaT,4.3892,7.0534,0.013,0.0142,0.1471,0.2194,0.1769


[ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='add', sp=12,
                      trend='add', use_boxcox=True),
 AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                     regressor=ExtraTreesRegressor(bootstrap=False,
                                                   ccp_alpha=0.0,
                    

### Blend Best Models

#### Mean Blender

In [42]:
mean_blender = exp.blend_models(best_tuned_models, method='mean')

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.8673,15.6086,0.0265,0.0271,0.3721,0.4768,0.9207
1,1957-12,21.9544,24.4857,0.0609,0.0587,0.7181,0.7212,0.8429
2,1958-12,15.6429,18.0749,0.0355,0.0364,0.5475,0.556,0.9269
Mean,NaT,16.1549,19.3897,0.041,0.0407,0.5459,0.5847,0.8968
SD,NaT,4.5407,3.7414,0.0146,0.0133,0.1412,0.1018,0.0382


In [43]:
y_predict = exp.predict_model(mean_blender)
exp.plot_model(estimator=mean_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,EnsembleForecaster,10.1564,13.5247,0.022,0.0218,0.3335,0.3915,0.967


#### Median Blender

In [44]:
median_blender = exp.blend_models(best_tuned_models, method='median')

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.5077,16.1201,0.0252,0.0258,0.3598,0.4925,0.9154
1,1957-12,21.4761,23.952,0.0596,0.0575,0.7025,0.7055,0.8497
2,1958-12,15.4222,17.0721,0.0357,0.0365,0.5397,0.5252,0.9348
Mean,NaT,15.802,19.0481,0.0402,0.0399,0.534,0.5744,0.9
SD,NaT,4.4859,3.4893,0.0144,0.0132,0.1399,0.0937,0.0364


In [45]:
y_predict = exp.predict_model(median_blender)
exp.plot_model(estimator=median_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,EnsembleForecaster,10.3849,15.9636,0.0225,0.022,0.341,0.4621,0.954


#### Voting Blender

In [46]:
top_model_metrics = compare_metrics.iloc[0:3]['SMAPE']
display(top_model_metrics)

top_model_weights = 1 - top_model_metrics/top_model_metrics.sum()
display(top_model_weights)

exp_smooth    0.0439
ets           0.0445
et_cds_dt     0.0489
Name: SMAPE, dtype: object

exp_smooth    0.680262
ets           0.675892
et_cds_dt     0.643846
Name: SMAPE, dtype: object

In [47]:
voting_blender = exp.blend_models(best_tuned_models, method='voting', weights=top_model_weights.values.tolist())

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,10.8228,15.5743,0.0264,0.027,0.3706,0.4758,0.921
1,1957-12,21.9399,24.4782,0.0609,0.0587,0.7176,0.721,0.843
2,1958-12,15.5563,18.006,0.0354,0.0362,0.5444,0.5539,0.9275
Mean,NaT,16.1063,19.3528,0.0409,0.0406,0.5442,0.5836,0.8972
SD,NaT,4.5552,3.7577,0.0146,0.0133,0.1417,0.1023,0.0384


In [48]:
y_predict = exp.predict_model(voting_blender)
print(y_predict)
exp.plot_model(estimator=voting_blender)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,EnsembleForecaster,10.0666,13.4752,0.0218,0.0216,0.3306,0.39,0.9672


1960-01    411.7342
1960-02    392.6736
1960-03    453.2320
1960-04    443.1429
1960-05    464.5462
1960-06    530.9557
1960-07    606.4344
1960-08    614.7683
1960-09    508.3600
1960-10    451.2903
1960-11    402.7466
1960-12    435.1228
Freq: M, Name: Time Series, dtype: float64


## Save and Load Model

In [49]:
_ = exp.save_model(voting_blender, "my_blender")

Transformation Pipeline and Model Successfully Saved


In [50]:
loaded_exp = TimeSeriesExperiment()
m = loaded_exp.load_model("my_blender")
# Predictions should be same as before the model was saved and loaded
loaded_exp.predict_model(m)

Transformation Pipeline and Model Successfully Loaded


1960-01    411.7342
1960-02    392.6736
1960-03    453.2320
1960-04    443.1429
1960-05    464.5462
1960-06    530.9557
1960-07    606.4344
1960-08    614.7683
1960-09    508.3600
1960-10    451.2903
1960-11    402.7466
1960-12    435.1228
Freq: M, dtype: float64

## Prediction Customization

In [51]:
model = exp.create_model("auto_arima")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,12.0769,15.8014,0.0298,0.0305,0.4136,0.4827,0.9187
1,1957-12,19.4102,21.4989,0.0538,0.052,0.6349,0.6333,0.8789
2,1958-12,31.602,33.0981,0.0738,0.0769,1.106,1.0182,0.7549
Mean,NaT,21.0297,23.4661,0.0525,0.0531,0.7181,0.7114,0.8509
SD,NaT,8.0529,7.197,0.018,0.019,0.2887,0.2255,0.0698


In [52]:
# Default prediction
exp.predict_model(model)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.4893,0.5365,0.938


1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
Freq: M, Name: Time Series, dtype: float64

In [53]:
# With Prediction Interval (default alpha = 0.05)
exp.predict_model(model, return_pred_int=True)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.4893,0.5365,0.938


Unnamed: 0,y_pred,lower,upper
1960-01,419.9672,400.2603,439.674
1960-02,399.8499,375.7392,423.9605
1960-03,457.9944,429.6697,486.319
1960-04,444.4742,414.0056,474.9427
1960-05,464.7892,432.7993,496.779
1960-06,514.1254,481.2385,547.0123
1960-07,587.8042,554.3237,621.2846
1960-08,597.0108,563.1606,630.861
1960-09,499.5313,465.441,533.6215
1960-10,442.3597,408.1167,476.6027


In [54]:
# With Prediction Interval (custom alpha = 0.2)
exp.predict_model(model, return_pred_int=True, alpha=0.2)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Auto ARIMA,14.8982,18.5365,0.031,0.0309,0.4893,0.5365,0.938


Unnamed: 0,y_pred,lower,upper
1960-01,419.9672,407.0816,432.8528
1960-02,399.8499,384.0847,415.615
1960-03,457.9944,439.4739,476.5149
1960-04,444.4742,424.5519,464.3965
1960-05,464.7892,443.8722,485.7062
1960-06,514.1254,492.6219,535.629
1960-07,587.8042,565.9125,609.6959
1960-08,597.0108,574.8774,619.1443
1960-09,499.5313,477.2408,521.8217
1960-10,442.3597,419.9694,464.75


In [55]:
# Increased forecast horizon to 2 years instead of the original 1 year
exp.predict_model(model, fh = np.arange(1, 25))

1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
1961-01    453.0077
1961-02    432.4005
1961-03    490.1513
1961-04    476.3150
1961-05    496.3762
1961-06    545.5087
1961-07    619.0237
1961-08    628.0990
1961-09    530.5139
1961-10    473.2576
1961-11    427.2400
1961-12    469.4268
Freq: M, Name: Time Series, dtype: float64

In [56]:
# For models that do not produce a prediction interval --> returns NA values
model = exp.create_model("lr_cds_dt")
exp.predict_model(model, return_pred_int=True)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,20.8412,27.6262,0.0513,0.0533,0.7137,0.844,0.7516
1,1957-12,20.4172,23.8918,0.0557,0.0539,0.6678,0.7038,0.8505
2,1958-12,20.5669,24.8024,0.0457,0.0471,0.7198,0.763,0.8624
Mean,NaT,20.6084,25.4401,0.0509,0.0514,0.7004,0.7702,0.8215
SD,NaT,0.1756,1.5898,0.0041,0.0031,0.0232,0.0575,0.0497


Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,LinearRegression,24.3376,32.0418,0.0475,0.0493,0.7993,0.9275,0.8147


Unnamed: 0,y_pred,lower,upper
1960-01,399.574,,
1960-02,384.6911,,
1960-03,420.8922,,
1960-04,412.8696,,
1960-05,438.352,,
1960-06,494.9357,,
1960-07,556.8907,,
1960-08,558.1492,,
1960-09,503.6881,,
1960-10,449.0433,,


## Enforce Prediction Intervals

In [57]:
exp = TimeSeriesExperiment()

# enforce_opi restricts the models to only those that provide a prediction interval
# This also adds an extra metric called COV_PROB.
# COV_PROB gives the percentage of actuals that are within the prediction interval.
exp.setup(data=y, fh=fh, fold=fold, enforce_pi=True)
best_model = exp.compare_models()

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2,COV_PROB,TT (Sec)
ets,ETS,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882,0.7778,0.1667
arima,ARIMA,20.0069,22.2199,0.0501,0.0507,0.683,0.6735,0.8677,0.7778,0.0533
auto_arima,Auto ARIMA,21.0297,23.4661,0.0525,0.0531,0.7181,0.7114,0.8509,0.8056,4.13
theta,Theta Forecaster,28.3192,33.8639,0.067,0.07,0.9729,1.0306,0.671,0.75,0.0167


## Types of Window Splitters

### Sliding Window Splitter

In [58]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='sliding')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,14.5582,18.7725,0.0366,0.0376,0.4985,0.5735,0.8853
1,1957-12,16.5552,19.0087,0.0452,0.0441,0.5004,0.5301,0.9053
2,1958-12,35.6971,39.1497,0.0808,0.0847,1.1695,1.1352,0.6571
Mean,NaT,22.2701,25.6436,0.0542,0.0555,0.7228,0.7463,0.8159
SD,NaT,9.5292,9.5507,0.0191,0.0208,0.3158,0.2756,0.1126


### Expanding/Rolling Window

* They are identical

In [59]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='expanding')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,14.5582,18.7725,0.0366,0.0376,0.4985,0.5735,0.8853
1,1957-12,16.8005,19.3265,0.0458,0.0447,0.5495,0.5693,0.9021
2,1958-12,20.8908,23.4318,0.0495,0.0512,0.7311,0.7208,0.8772
Mean,NaT,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882
SD,NaT,2.6217,2.0782,0.0054,0.0055,0.0998,0.0705,0.0104


In [60]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='rolling')
model = exp.create_model("ets")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1956-12,14.5582,18.7725,0.0366,0.0376,0.4985,0.5735,0.8853
1,1957-12,16.8005,19.3265,0.0458,0.0447,0.5495,0.5693,0.9021
2,1958-12,20.8908,23.4318,0.0495,0.0512,0.7311,0.7208,0.8772
Mean,NaT,17.4165,20.5103,0.044,0.0445,0.5931,0.6212,0.8882
SD,NaT,2.6217,2.0782,0.0054,0.0055,0.0998,0.0705,0.0104


## Error Handling

In [61]:
try:
    exp = TimeSeriesExperiment()
    exp.setup(data=y, fh=17, fold=76, fold_strategy='expanding')
except ValueError as error:
    print(error)

IntProgress(value=0, description='Processing: ', max=3)

Not Enough Data Points, set a lower number of folds or fh
