In [None]:
#| default_exp time_series.unit_root_tests

In [None]:
#| export
from typing import Union
from itertools import product as Product
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import national.time_series.process as ts_process
from IPython.display import display, Markdown


Sources:


[Wikipedia](https://en.wikipedia.org/wiki/Augmented_Dickey%E2%80%93Fuller_test)

[Statsmodels](https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html)

In [None]:
#| export
def augmented_dickey_fuller(
    df: pd.DataFrame,
    kpi: str,
    freqs: Union[str, list] = ['W-Fri', 'Q'],
    ylog: bool = True,
):
    """
    Augmented-Dickey-Fuller
    
    Returns:
    
adffloat
The test statistic.

pvaluefloat
MacKinnon’s approximate p-value based on MacKinnon (1994, 2010).

usedlagint
The number of lags used.

nobsint
The number of observations used for the ADF regression and calculation of the critical values.

critical valuesdict
Critical values for the test statistic at the 1 %, 5 %, and 10 % levels. Based on MacKinnon (2010).

icbestfloat
The maximized information criterion if autolag is not None.

resstoreResultStore, optional
A dummy class with results attached as attributes.
    """

    agg_method = {
        kpi: np.mean,
    }

    ts = ts_process.time_series(
        df=df,
        time_column='date',
        freq=freqs,
        agg_method=agg_method,
    )

    regression = {
        'constant': 'c',
        'constant and trend': 'ct',
        'const., and linear and quad. trend': 'ctt',
        'no constant, no trend': 'n',
    }
    multi_index = pd.MultiIndex.from_product(
        [
            regression.keys(),
        ],
        names=['Regression'],
    )
    multi_column = pd.MultiIndex.from_product([
        ['Augmented Dickey-Fuller'],
        ['Test statistic', 'MacKinnon’s p-value'],
    ])
    df = pd.DataFrame(
        columns=multi_column,
        index=multi_index,
    )

    for rg in multi_index.get_level_values(0):
        adf = adfuller(
            ts[kpi],
            maxlag=None,
            regression=regression[rg],
            autolag='AIC',
            store=True,
            regresults=False,
        )
        res = adf[-1]
        col_test_stat = ('Augmented Dickey-Fuller', 'Test statistic')
        col_dickey_p_value = ('Augmented Dickey-Fuller', 'MacKinnon’s p-value')
        df.loc[rg, col_test_stat] = res.adfstat
        df.loc[rg, col_dickey_p_value] = adf[1]

    return df

In [None]:
##%% export
from national.data_preprocessing.date_features import Data
from national.time_series import unit_root_tests


data = Data()
adf_df = unit_root_tests.augmented_dickey_fuller(data.national_historic, kpi='price', freqs='M')
adf_df

Unnamed: 0_level_0,Augmented Dickey-Fuller,Augmented Dickey-Fuller
Unnamed: 0_level_1,Test statistic,MacKinnon’s p-value
Regression,Unnamed: 1_level_2,Unnamed: 2_level_2
constant,-1.921263,0.322125
constant and trend,-2.852782,0.178234
"const., and linear and quad. trend",-2.926008,0.331608
"no constant, no trend",0.637235,0.854519


In [None]:
#| export
def is_time_series_adf_stationary(adf_df: pd.DataFrame,) -> display:

    idx = 'no constant, no trend'
    col = (
        "Augmented Dickey-Fuller",
        "MacKinnon’s p-value",
    )
    p_value = adf_df.loc[idx, col].values[0]

    issta = ' not ' if p_value > 0.05 else ' '
    text = f"Under {idx} regression the time series is {issta} stationary"
    return display(Markdown(text))

In [None]:
##%% export 

unit_root_tests.is_time_series_adf_stationary(adf_df)

Under no constant, no trend regression the time series is  not  stationary