In [1]:
# try:
#   import pycaret
# except:
#    !pip install pycaret-ts-alpha

In [2]:
import numpy as np
import pandas as pd
from pycaret.datasets import  get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment

In [3]:
y = get_data("airline")

Period
1949-01    112.0
1949-02    118.0
1949-03    132.0
1949-04    129.0
1949-05    121.0
Freq: M, Name: Number of airline passengers, dtype: float64

In [4]:
exp = TimeSeriesExperiment()

# Understanding Hyperparameters

## Hyper-Parameter: Window Length

In [5]:
#### Setup experiment with MLFlow logging ----
exp.setup(
    data=y,
    fh=12,
    session_id=42,
    log_experiment=True,
    experiment_name="my_exp_hyper_window",
    log_plots=True
  )

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x1f2b984cc70>

In [6]:
exp.plot_model()
exp.plot_model(plot="acf")

In [7]:
## Run from terminal that has the python/anaconda environment.
## Alternately, run from notebook as below
## Web browser must be accessible to see results.
# !mlflow ui

In [8]:
#### Create models with varying window lengths ----
for window_length in np.arange(1, 25):
    model = exp.create_model("lr_cds_dt", window_length=window_length) # , sp=12, deseasonal_model="multiplicative"


Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,28.4175,30.8825,0.0765,0.0801,0.6895
1,1957-12,17.2156,20.0185,0.0455,0.0442,0.895
2,1958-12,25.6147,28.6554,0.0627,0.0654,0.8163
Mean,NaT,23.7493,26.5188,0.0616,0.0632,0.8003
SD,NaT,4.7596,4.6854,0.0127,0.0147,0.0846


## Hyper-Parameter: Seasonal Period

In [9]:
#### Setup experiment with MLFlow logging ----
exp.setup(
    data=y,
    fh=12,
    session_id=42,
    log_experiment=True,
    experiment_name="my_exp_hyper_sp",
    log_plots=True
  )

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x1f2b984cc70>

In [10]:
#### Create a model with varying seasonal periods ----
for sp in np.arange(1, 25):
    model = exp.create_model("lr_cds_dt", sp=sp)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,38.6824,45.082,0.0998,0.1051,0.3384
1,1957-12,28.0608,34.6867,0.0751,0.0734,0.6848
2,1958-12,32.1693,38.2681,0.0737,0.0753,0.6724
Mean,NaT,32.9708,39.3456,0.0828,0.0846,0.5652
SD,NaT,4.3731,4.3117,0.012,0.0145,0.1604


## Hyper-Parameter: Window Length & Seasonal Period

In [11]:
#### Setup experiment with MLFlow logging ----
exp.setup(
    data=y,
    fh=12,
    session_id=42,
    log_experiment=True,
    experiment_name="my_exp_hyper_window_sp",
    log_plots=True
  )

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x1f2b984cc70>

In [12]:
#### Create a model with varying window length & seasonal periods ----
runs = 50
window_lengths = np.random.randint(1, 25, runs)
sps = np.random.randint(1, 25, runs)

for window_length, sp in zip(window_lengths, sps):
    print(window_length, sp)
    model = exp.create_model("lr_cds_dt", window_length=window_length, sp=sp)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,25.6712,35.2141,0.0623,0.0657,0.5964
1,1957-12,21.7773,27.1792,0.0571,0.0564,0.8065
2,1958-12,21.8883,31.284,0.0458,0.0477,0.781
Mean,NaT,23.1123,31.2257,0.0551,0.0566,0.728
SD,NaT,1.81,3.2805,0.0069,0.0074,0.0936


# Create and Tune Model

* Now with a better understanding of the models, we can appreciate what pycaret does internally during the tuning process.
* We see below that the tuning grid automatically takes care of things like harmonics of seasonal period in the tuning process.
* Of course, there is flexibility to change the tuning grid if needed. Refer: https://github.com/pycaret/pycaret/discussions/1795
* Moreover, the default search is limited to only 10 hyperparameter runs (compared to 50 runs in the MLflow bi-variate setting above) thus saving wall clock time while still giving close to optimum results.

In [13]:
## Create setup, this time without MLFlow
exp.setup(
    data=y,
    fh=12,
    session_id=42,
  )

Unnamed: 0,Description,Value
0,session_id,42
1,Original Data,"(144, 1)"
2,Missing Values,False
3,Transformed Train Set,"(132,)"
4,Transformed Test Set,"(12,)"
5,Fold Generator,ExpandingWindowSplitter
6,Fold Number,3
7,Enforce Prediction Interval,False
8,Seasonal Period Tested,12
9,Seasonality Detected,True


<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x1f2b984cc70>

In [14]:
# Create model ----
model = exp.create_model("lr_cds_dt")

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,38.6824,45.082,0.0998,0.1051,0.3384
1,1957-12,28.0608,34.6867,0.0751,0.0734,0.6848
2,1958-12,32.1693,38.2681,0.0737,0.0753,0.6724
Mean,NaT,32.9708,39.3456,0.0828,0.0846,0.5652
SD,NaT,4.3731,4.3117,0.012,0.0145,0.1604


In [15]:
# What does the internal tuning grid look like?
exp.models(internal=True).loc["lr_cds_dt"]["Tune Distributions"]

# If you do not like it, you can change it.
# See: https://github.com/pycaret/pycaret/discussions/1795

{'sp': CategoricalDistribution(values=[12, 24]),
 'deseasonal_model': CategoricalDistribution(values=['additive', 'multiplicative']),
 'degree': IntUniformDistribution(lower=1, upper=10, log=False),
 'window_length': IntUniformDistribution(lower=12, upper=24, log=False),
 'regressor__fit_intercept': CategoricalDistribution(values=[True, False]),
 'regressor__normalize': CategoricalDistribution(values=[True, False])}

In [16]:
# Defaults to only 10 iterations (i.e. only searches 10 hyperparameter combinations)
tuned_model, tuner = exp.tune_model(model, return_tuner=True)

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,R2
0,1956-12,9.2184,12.1077,0.0233,0.0235,0.9523
1,1957-12,30.6011,33.3898,0.0834,0.0794,0.7079
2,1958-12,13.6786,15.8682,0.032,0.0325,0.9437
Mean,NaT,17.8327,20.4552,0.0462,0.0452,0.868
SD,NaT,9.2104,9.2741,0.0265,0.0245,0.1132


In [17]:
tuned_model

BaseCdsDtForecaster(degree=2, deseasonal_model='multiplicative',
                    regressor=LinearRegression(copy_X=True, fit_intercept=False,
                                               n_jobs=-1, normalize=True,
                                               positive=False),
                    sp=12, window_length=23)

In [18]:
pd.DataFrame(tuner.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_degree,param_deseasonal_model,param_regressor__fit_intercept,param_regressor__normalize,param_sp,param_window_length,params,split0_test_smape,split1_test_smape,split2_test_smape,mean_test_smape,std_test_smape,rank_test_smape
0,0.028259,0.008238,0.000665,0.0004704714,7,multiplicative,True,True,12,19,"{'degree': 7, 'deseasonal_model': 'multiplicat...",0.346732,0.136618,0.058968,0.180773,0.121557,5
1,0.021943,2e-06,0.000665,0.0004700779,5,additive,False,True,12,22,"{'degree': 5, 'deseasonal_model': 'additive', ...",0.128711,0.090898,0.276334,0.165315,0.080006,4
2,0.021943,0.003549,0.000665,0.0004701355,8,additive,False,False,24,14,"{'degree': 8, 'deseasonal_model': 'additive', ...",1.588286,0.28509,0.212746,0.695374,0.632074,10
3,0.020278,0.000469,0.000998,2.247832e-07,6,additive,False,False,24,17,"{'degree': 6, 'deseasonal_model': 'additive', ...",0.878957,0.39539,0.332348,0.535565,0.244175,8
4,0.027924,0.004532,0.000332,0.0004697969,2,multiplicative,False,True,12,23,"{'degree': 2, 'deseasonal_model': 'multiplicat...",0.023537,0.079412,0.032535,0.045161,0.024496,1
5,0.04621,0.000939,0.000665,0.0004700217,6,additive,False,True,12,22,"{'degree': 6, 'deseasonal_model': 'additive', ...",0.465248,0.122866,0.169187,0.252434,0.151666,6
6,0.018949,0.001411,0.000333,0.0004702464,3,multiplicative,True,False,24,20,"{'degree': 3, 'deseasonal_model': 'multiplicat...",0.01397,0.088033,0.058526,0.05351,0.030443,2
7,0.02992,0.009872,0.000664,0.0004694603,3,additive,True,True,12,20,"{'degree': 3, 'deseasonal_model': 'additive', ...",0.025654,0.094477,0.056408,0.058846,0.02815,3
8,0.023935,0.00636,0.000665,0.0004703029,7,multiplicative,False,True,24,13,"{'degree': 7, 'deseasonal_model': 'multiplicat...",1.187042,0.547762,0.345976,0.693593,0.358514,9
9,0.024934,0.004886,0.000665,0.0004702465,9,multiplicative,True,False,24,23,"{'degree': 9, 'deseasonal_model': 'multiplicat...",0.486621,0.356463,0.286245,0.376443,0.083014,7
