<a href="https://colab.research.google.com/github/ngupta23/medium_articles/blob/main/time_series/pycaret/pycaret_ts_diagnostics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def what_is_installed():
    import pycaret
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except:
    !pip install pycaret-ts-alpha
    what_is_installed()

In [None]:
from typing import Optional, List, Union
import numpy as np
import pandas as pd
from pycaret.datasets import get_data
from pycaret.time_series import TSForecastingExperiment

# Helper Function(s)

In [75]:
def extract_ts_info(
    data: pd.Series,
    seasonal_period:Optional[int]=None,
    lags_list:Optional[List[Union[int, List[int]]]]=None,
    verbose: bool = False
):
  """Helper function to diagnose time series characteristics
  """
  #### Create Time Series Forecasting Experiment ----
  eda_exp = TSForecastingExperiment()
  global_plot_settings = {"renderer": "colab"}
  eda_exp.setup(data=data, seasonal_period=seasonal_period, fig_kwargs=global_plot_settings, session_id=42, verbose=verbose)
  eda_exp.plot_model(plot="diagnostics", fig_kwargs={"height": 900, "width": 1200})
  if lags_list:
    eda_exp.plot_model(
        plot="diff",
        data_kwargs={"lags_list":lags_list, "acf": True, "pacf": True, "periodogram": True},
        fig_kwargs={"height": len(lags_list)*600, "width": 1600}
    )

In [None]:
# get_data?

# AR(1)

In [None]:
index = get_data(folder="time_series/ar1")

## AR(1) with positive phi

### phi = 0.99

In [None]:
index.query("phi==0.99")

In [None]:
data = get_data(1, folder="time_series/ar1", verbose=False)
extract_ts_info(data, seasonal_period=1)

### phi = 0.5

In [None]:
index.query("phi==0.5")

In [None]:
data = get_data(246, folder="time_series/ar1", verbose=False)
extract_ts_info(data, seasonal_period=1)

## AR(1) with negative phi

### phi = -0.99

In [None]:
index.query("phi==-0.99")

In [None]:
data = get_data(991, folder="time_series/ar1", verbose=False)
extract_ts_info(data, seasonal_period=1)

### phi = -0.5

In [None]:
index.query("phi==-0.5")

In [None]:
data = get_data(746, folder="time_series/ar1", verbose=False)
extract_ts_info(data, seasonal_period=1)

# MA(1)

In [23]:
index = get_data(folder="time_series/ma1")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
0,1,0,0,0,1,340,1,176,0.639106,40,0,0.99
1,2,0,0,0,1,1222,2,-865,0.776357,41,0,0.99
2,3,0,0,0,1,1647,3,-355,1.300325,42,0,0.99
3,4,0,0,0,1,220,4,-267,1.510748,43,0,0.99
4,5,0,0,0,1,183,5,447,6.954868,44,0,0.99
...,...,...,...,...,...,...,...,...,...,...,...,...
990,991,0,0,0,1,503,1,-581,6.139167,40,0,-0.99
991,992,0,0,0,1,616,2,-164,0.121958,41,0,-0.99
992,993,0,0,0,1,1064,3,354,11.536447,42,0,-0.99
993,994,0,0,0,1,262,4,-503,4.027265,43,0,-0.99


(995, 12)

## MA(1) with positive theta

### theta = 0.99

In [24]:
index.query("theta==0.99")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
0,1,0,0,0,1,340,1,176,0.639106,40,0,0.99
1,2,0,0,0,1,1222,2,-865,0.776357,41,0,0.99
2,3,0,0,0,1,1647,3,-355,1.300325,42,0,0.99
3,4,0,0,0,1,220,4,-267,1.510748,43,0,0.99
4,5,0,0,0,1,183,5,447,6.954868,44,0,0.99


In [27]:
data = get_data(1, folder="time_series/ma1", verbose=False)
extract_ts_info(data, seasonal_period=1)

### theta = 0.5

In [28]:
index.query("theta==0.5")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
245,246,0,0,0,1,29,1,339,1.022834,40,0,0.5
246,247,0,0,0,1,101,2,570,0.96818,41,0,0.5
247,248,0,0,0,1,231,3,-216,0.791295,42,0,0.5
248,249,0,0,0,1,1076,4,930,1.795469,43,0,0.5
249,250,0,0,0,1,86,5,-725,2.163763,44,0,0.5


In [30]:
data = get_data(247, folder="time_series/ma1", verbose=False)
extract_ts_info(data, seasonal_period=1)

## MA(1) with negative theta

### theta = -0.99

In [31]:
index.query("theta==-0.99")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
990,991,0,0,0,1,503,1,-581,6.139167,40,0,-0.99
991,992,0,0,0,1,616,2,-164,0.121958,41,0,-0.99
992,993,0,0,0,1,1064,3,354,11.536447,42,0,-0.99
993,994,0,0,0,1,262,4,-503,4.027265,43,0,-0.99
994,995,0,0,0,1,45,5,520,0.531432,44,0,-0.99


In [32]:
data = get_data(991, folder="time_series/ma1", verbose=False)
extract_ts_info(data, seasonal_period=1)

### theta = -0.5

In [37]:
index.query("theta==-0.5")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
745,746,0,0,0,1,523,1,47,0.156105,40,0,-0.5
746,747,0,0,0,1,158,2,-321,5.72773,41,0,-0.5
747,748,0,0,0,1,897,3,-825,0.763,42,0,-0.5
748,749,0,0,0,1,302,4,-63,1.464787,43,0,-0.5
749,750,0,0,0,1,75,5,516,9.005816,44,0,-0.5


In [38]:
data = get_data(746, folder="time_series/ma1", verbose=False)
extract_ts_info(data, seasonal_period=1)

# Random Walk 

d = 1

Action for Reader: Compare to AR(1) with phi = 0.99. One should see similar characxteristics, expect the fact that AR(1) is stationary and forecasts will revert to mean while Random Walk is non-statinary and forecasts will not revert to mean.

In [77]:
index = get_data(folder="time_series/random_walk")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
0,1,0,1,0,0,340,1,176,0.639106,1,0,0
1,2,0,1,0,0,241,2,596,7.613575,2,0,0
2,3,0,1,0,0,452,3,-675,5.388653,3,0,0
3,4,0,1,0,0,80,4,-737,4.119034,4,0,0
4,5,0,1,0,0,441,5,856,1.825233,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,0,1,0,0,416,496,-434,8.222747,496,0,0
496,497,0,1,0,0,606,497,740,9.088138,497,0,0
497,498,0,1,0,0,509,498,-472,0.732094,498,0,0
498,499,0,1,0,0,341,499,-956,0.728765,499,0,0


In [81]:
data = get_data(61, folder="time_series/random_walk", verbose=False)
extract_ts_info(data, seasonal_period=1)

# Seasonal Data

In [60]:
index = get_data(folder="time_series/seasonal")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
0,1,1,0,0,4,16,1,97,0.046448,40,0,0.631477142440397 -0.163116680352825 0.6648996...
1,2,1,0,6,4,16,2,991,4.294360,41,-0.0422930356855119 0.420200062798738 -0.23174...,-0.0851445749108851 -0.544165855934772 -1.5055...
2,3,1,0,9,10,39,3,176,0.480406,42,-0.075976288978699 0.395648647852772 0.1325090...,0.0358305954484125 -0.0587204627053036 -0.6022...
3,4,1,0,4,9,25,4,929,5.795382,43,-0.075976288978699 0.395648647852772 0.1325090...,0.116786991778112 -0.181070647462236 0.0049714...
4,5,1,0,5,1,32,5,-324,0.342188,44,-0.075976288978699 0.395648647852772 0.1325090...,-0.181070647462236
...,...,...,...,...,...,...,...,...,...,...,...,...
515,516,52,1,6,6,582,1,-285,1.735645,40,0.343249502380434 -0.146881709557787 0.1961214...,-0.0243899410698834 -0.24256076338914 -0.02094...
516,517,52,1,1,8,2080,2,-368,6.242845,41,0.343249502380434,-0.146881709557787 0.196121407672383 -0.237524...
517,518,52,1,11,11,416,3,-52,0.226525,42,0.343249502380434 -0.146881709557787 0.1961214...,0.142776352275337 0.555652872488089 -0.3723392...
518,519,52,1,9,9,266,4,-579,1.545159,43,0.343249502380434 -0.146881709557787 0.1961214...,-0.198055306201126 -0.109942932434522 0.142776...


## Seasonal Period = 7
e.g Daily Data

In [61]:
seasonal_period = 7

### d = 0

In [62]:
index.query("s==@seasonal_period and d==0")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
60,61,7,0,7,3,93,1,-490,5.395724,40,0.343249502380434 -0.146881709557787 0.1961214...,-0.24256076338914 -0.0209406738588465 -0.19805...
61,62,7,0,8,12,265,2,-321,4.206335,41,0.343249502380434 -0.146881709557787 0.1961214...,-0.0209406738588465 -0.198055306201126 -0.1099...
62,63,7,0,3,11,43,3,2,0.008998,42,0.343249502380434 -0.146881709557787 0.1961214...,-0.237524268273818 -0.0816369612166716 -0.1221...
63,64,7,0,9,8,122,4,-886,1.341899,43,0.343249502380434 -0.146881709557787 0.1961214...,-0.198055306201126 -0.109942932434522 0.142776...
64,65,7,0,6,7,105,5,470,5.736638,44,0.343249502380434 -0.146881709557787 0.1961214...,-0.0243899410698834 -0.24256076338914 -0.02094...


In [63]:
data = get_data(61, folder="time_series/seasonal", verbose=False)
extract_ts_info(data, seasonal_period=seasonal_period)

### d = 1

In [64]:
index.query("s==@seasonal_period and d==1")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
65,66,7,1,4,1,127,1,836,2.278801,40,0.343249502380434 -0.146881709557787 0.1961214...,-0.0816369612166716
66,67,7,1,1,11,46,2,-758,9.496216,41,0.343249502380434,-0.146881709557787 0.196121407672383 -0.237524...
67,68,7,1,9,7,55,3,-904,8.997322,42,0.343249502380434 -0.146881709557787 0.1961214...,-0.198055306201126 -0.109942932434522 0.142776...
68,69,7,1,9,0,293,4,619,6.75663,43,0.343249502380434 -0.146881709557787 0.1961214...,0
69,70,7,1,2,9,248,5,690,5.749927,44,0.343249502380434 -0.146881709557787,0.196121407672383 -0.237524268273818 -0.081636...


In [76]:
data = get_data(66, folder="time_series/seasonal", verbose=False)
extract_ts_info(data, seasonal_period=seasonal_period, lags_list=[1])

## Seasonal Period = 12
e.g. Monthly Data

In [53]:
seasonal_period = 12

### d = 0

In [46]:
index.query("s==@seasonal_period and d==0")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
110,111,12,0,4,8,52,1,-731,6.458975,40,0.343249502380434 -0.146881709557787 0.1961214...,-0.0816369612166716 -0.122160276390184 -0.0243...
111,112,12,0,5,9,119,2,236,0.297378,41,0.343249502380434 -0.146881709557787 0.1961214...,-0.122160276390184 -0.0243899410698834 -0.2425...
112,113,12,0,4,8,135,3,391,1.489828,42,0.343249502380434 -0.146881709557787 0.1961214...,-0.0816369612166716 -0.122160276390184 -0.0243...
113,114,12,0,4,5,171,4,191,3.175622,43,0.343249502380434 -0.146881709557787 0.1961214...,-0.0816369612166716 -0.122160276390184 -0.0243...
114,115,12,0,0,4,39,5,-957,5.570359,44,0,0.343249502380434 -0.146881709557787 0.1961214...


In [49]:
data = get_data(113, folder="time_series/seasonal", verbose=False)
extract_ts_info(data, seasonal_period=seasonal_period)

### d = 1

In [54]:
index.query("s==@seasonal_period and d==1")

Unnamed: 0,index,s,d,p,q,n,rep,mean,vara,seed,phi,theta
115,116,12,1,2,4,223,1,208,0.778268,40,0.343249502380434 -0.146881709557787,0.196121407672383 -0.237524268273818 -0.081636...
116,117,12,1,2,2,72,2,372,0.218849,41,0.343249502380434 -0.146881709557787,0.196121407672383 -0.237524268273818
117,118,12,1,4,3,120,3,770,6.166481,42,0.343249502380434 -0.146881709557787 0.1961214...,-0.0816369612166716 -0.122160276390184 -0.0243...
118,119,12,1,0,8,69,4,45,0.39274,43,0,0.343249502380434 -0.146881709557787 0.1961214...
119,120,12,1,7,4,61,5,-454,2.443383,44,0.343249502380434 -0.146881709557787 0.1961214...,-0.24256076338914 -0.0209406738588465 -0.19805...


In [82]:
data = get_data(116, folder="time_series/seasonal", verbose=False)
extract_ts_info(data, seasonal_period=seasonal_period, lags_list=[1])