# use ADF to determine whether series is stationary

## get source Dataframe

In [1]:
def get_ytw():
    import nb_credit_spread as cslibrary

    cslib = cslibrary.creditspread()
    srcfile = r'YTW-All-Values.xlsx'
    cs_df = cslib.get_ytw_from_date_delta(srcfile=srcfile, start='2009-01-31')
    return cs_df

In [12]:

from statsmodels.tsa.stattools import adfuller
print(f"statsmodel adf pvalue: {adfuller(get_ytw()['CS-Aaa-3MO'].values, regression='ct', autolag='BIC')[1]}")

from arch.unitroot import ADF
print(f"arch adf pvalue: {ADF(y=get_ytw()['CS-Aaa-3MO'].values, trend='ct', method='BIC').pvalue}")

statsmodel adf pvalue: 0.05535603133209759
arch adf pvalue: 0.05535603133209759


In [25]:
def stationarity_stats_model(df, column, regression='ct', ic='BIC', signif=0.05):
    from statsmodels.tsa.stattools import adfuller
    from statsmodels.tsa.stattools import kpss

    import warnings
    # warnings.filterwarnings('ignore')

    adf = adfuller(df[column].values, regression=regression, autolag=ic)
    
    kps = None
    if (regression == 'c') or (regression == 'ct'):
        kps = kpss(df[column].values, regression=regression)
    
    o = { 'column': column, 'signif': signif, 'ic': ic, 'regression': regression, 'adf-pvalue': adf[1], 'kpss-pvalue': kps[1] }
    # t = t.append(o, ignore_index=True)
    # warnings.simplefilter('always')  # Restore warnings
    return o
    
#o = stationarity_stats_model(df=get_ytw(), column='CS-Aaa-3MO', regression='ct', signif=0.05)
#print(o)

from statsmodels.tsa.stattools import adfuller
adf = adfuller(get_ytw()['CS-Aaa-3MO'].values, regression='ct', autolag='BIC')
print(f"pvalue: {adf[1]}")


  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
pvalue: 0.05535603133209759


In [31]:
def run_stationarity_stats_model():
    import pandas as pd
    cs_df = get_ytw()
    regress = ['ct', 'c']  # 'nc', 'c', 'ct', 'ctt'
    ic = 'BIC'
    signif = [0.01, 0.05]

    stats_results = pd.DataFrame(columns=['column', 'signif', 'ic', 'regression', 'adf-pvalue', 'kpss-pvalue'])
    column_prefix = 'CS-'

    for col in cs_df.columns:
        if col.startswith(column_prefix):
            for i in regress:
                for j in signif:
                    stats = stationarity_stats_model(df=cs_df, column=col, regression=i, ic=ic, signif=j)
                    stats_results = stats_results.append(stats, ignore_index=True)

    import test_cs_data_analysis as test_cs
    import cs_data_analysis as cs
    import logging as logger
    log = test_cs.setLogger('nb_ADF_unit_root_statsmodel', logger.INFO)
    # cs.print_full(stats_results, log)
    log.info(stats_results)

run_stationarity_stats_model()
# print(stats_results)

  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
  for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():


In [9]:
def stationarity_arch(df, column, regression='ct', ic='BIC', signif=0.05):
    '''
    https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.ADF.html
    arch.unitroot.ADF(y, lags=None, trend='c', max_lags=None, method='AIC', low_memory=None)

    https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.KPSS.html
    arch.unitroot.KPSS(y, lags=None, trend='c')

    https://arch.readthedocs.io/en/latest/unitroot/generated/arch.unitroot.PhillipsPerron.html
    arch.unitroot.PhillipsPerron(y, lags=None, trend='c', test_type='tau')
    '''
    import warnings
    # warnings.filterwarnings('ignore')

    from arch.unitroot import ADF
    from arch.unitroot import KPSS
    from arch.unitroot import PhillipsPerron

    # ignore maxlags
    adf = ADF(y=df[column].values, trend=regression, method=ic)
    # kps = KPSS(y=df[column].values, trend=regression)
    pp = PhillipsPerron(y=df[column].values, trend=regression)

    # warnings.simplefilter('always')  # Restore warnings
    # return { 'column': column, 'signif': signif, 'ic': ic, 'regression': regression, 'adf-pvalue': adf.pvalue, 'adf-stationary': True if adf.pvalue <= signif else False, 'kpss-pvalue': kps.pvalue, 'kpss-stationary': kps.pvalue > signif, 'pp-pvalue': pp.pvalue, 'pp-stationary': pp.pvalue < signif}

    return { 'column': column, 'signif': signif, 'ic': ic, 'regression': regression, 'adf-pvalue': adf.pvalue, 'kpss-pvalue': None, 'pp-pvalue': pp.pvalue}

    # print(adf.summary().as_text())

import pandas as pd
t = pd.DataFrame(columns=['column', 'signif', 'ic', 'regression', 'adf-pvalue', 'kpss-pvalue', 'pp-pvalue'])
o = stationarity_arch(df=get_ytw(), column='CS-DCF-Aaa-3MO-diff', regression='ct', ic='BIC', signif=0.05)
print(o)

{'column': 'CS-DCF-Aaa-3MO-diff', 'signif': 0.05, 'ic': 'BIC', 'regression': 'ct', 'adf-pvalue': 1.0488696583805572e-21, 'kpss-pvalue': None, 'pp-pvalue': 9.35087928818092e-22}


In [11]:
import pandas as pd
cs_df = get_ytw()
# pp: n, c, ct
# kpss: c, ct
# adf: 'nc', 'c', 'ct', 'ctt'
regress = ['c', 'ct']
regress = ['n']
ic = 'BIC'
signif = [0.01, 0.05]
column_prefix = 'CS-'

arch_results = pd.DataFrame(columns=['column', 'signif', 'ic', 'regression', 'adf-pvalue', 'kpss-pvalue', 'pp-pvalue'])

for col in cs_df.columns:
    if col.startswith(column_prefix):
        for i in regress:
            for j in signif:
                arch = stationarity_arch(df=cs_df, column=col, regression=i, ic=ic, signif=j)
                arch_results = arch_results.append(arch, ignore_index=True)

import test_cs_data_analysis as test_cs
import cs_data_analysis as cs
import logging as logger
log = test_cs.setLogger('nb_ADF_unit_root_arch', logger.INFO)
cs.print_full(arch_results, log)
# print(arch_results)

## KPSS test

In [None]:
from statsmodels.tsa.stattools import kpss
import pandas as pd
cs_df = get_ytw()
regress = ['nc', 'ct']  # 'nc', 'c', 'ct', 'ctt'
auto_lag = 'BIC'
signif = [0.05, 0.01]

t = pd.DataFrame(columns=['column', 'p-value', 'signif','regression'])
column_prefix = 'CS-'

for col_data in cs_df.columns:
    if col_data.startswith(column_prefix):
        for j in signif:
            for i in regress:
                r = kpss(cs_df[col_data].values, regression=i, nlags="auto")
                # r = adfuller(cs_df[col_data].values, maxlag=40, regression=i, autolag=auto_lag)
                if r[1] > j:  # p_value
                    statement = f"{col_data} Series Stationary. Null Hypothesis Rejecting. {r[1]} <= {j}. regression {i}"
                else:
                    statement = f"{col_data} Series Non-stationary. Null Hypothesis Weak evidence reject. {r[1]} <= {j}. regression {i}"
                # print(statement)
                o = { 'column': col_data, 'p-value': r[0], 'signif': j, 'regression': i, 'stationary': True if r[1] <= j else False }
                t = t.append(o, ignore_index=True)

print(t)

In [None]:
def kpss_test(timeseries):
    '''
    Null Hypothesis: The process is trend stationary.
    Alternate Hypothesis: The series has a unit root (series is not stationary).
    
    if pvalue < 5% cannot reject Null. Conclude series is stationary

    Based upon the significance level of 0.05 and the p-value of ADF test, the null hypothesis can not be rejected. Hence, the series is non-stationary.
    '''
    import pandas as pd
    from statsmodels.tsa.stattools import kpss

    print ('Results of KPSS Test:')
    kpsstest = kpss(timeseries, regression='c', nlags="auto")
    pvalue = kpsstest[1]
    kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used'])
    for key,value in kpsstest[3].items():
        kpss_output['Critical Value (%s)'%key] = value
    print (kpss_output)
    if pvalue > 0.05:
        print(f"{'+'*10} {pvalue} > 0.05. reject null. series is stationary {'+'*10}")
    else:
        print(f"{'*'*10} series is non-stationary {'*'*10}")

In [None]:
from statsmodels.tsa.stattools import adfuller
'''
 CS-Aaa-3MO	 CS-Aa-3MO	 CS-A-3MO	 CS-Baa-3MO	 CS-Aaa-1YR	 CS-Aa-1YR	 CS-A-1YR	 CS-Baa-1YR	 CS-Aaa-5YR	 CS-Aa-5YR	 CS-A-5YR	 CS-Baa-5YR		 
 TB-3MO-TY	 TB-1YR-TY	 TB-5YR-TY
'''

import nb_credit_spread as cslibrary
cslib = cslibrary.creditspread()

ytw = cslib.get_ytw_from_date(start=start_date)
print(ytw.shape)

# start='2009-01-31' on level
# TB-3MO-TY stationary at 5% on level. 1 and 5 year not stationary
# CS-3MO : CS-Aa-3MO, CA-A-3MO stationary at 10% on level. other CS not stationary
# CS-1YR : CS-A-1YR stationary at 5% on level. other CS not stationary
# CS-5YR : not stationary

# start='2009-01-31' first diff
# TB - 3 month, 1 and 5 year not stationary
# CS-3MO
#   CS-Aaa-3MO. stationary at 1% lags 3
#   CS-Aa-3MO. stationary at 1% lags 3
#   CS-A-3MO. stationary at 1% lags 3
#   CS-Baa-3MO. stationary at 5% lags 8
# CS-1YR
#   CS-Aaa-1YR. stationary at 1% lags 1
#   CS-Aa-1YR. stationary at 1% lags 0 ??
#   CS-A-1YR. stationary at 1% lags 3
#   CS-Baa-1YR. stationary at 1% lags 3
# CS-5YR
#   CS-Aaa-5YR. stationary at 1% lags 6
#   CS-Aa-5YR. stationary at 1% lags 13
#   CS-A-5YR. stationary at 1% lags 13
#   CS-Baa-5YR. stationary at 1% lags 13

col = 'CS-Aaa-3MO'
cs = ytw[col]
# https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html
# regression{“c”,”ct”,”ctt”,”nc”} # nc = no constant, no trend
# autolag{“AIC”, “BIC”, “t-stat”, None}
data = cs.diff(periods=1).dropna()
result = adfuller(x=data, regression="ct", autolag='BIC')

adf_stat = result[0]
print(f"ADF Statistic: {result[0]}. p-value: {result[1]}. lags used {result[2]}. obs {result[3]}")
print('Critical Values:')

# We can see that our statistic value of -4 is less than the value of -3.449 at 1%.
# This suggests that we can reject the null hypothesis with a significance level of less than 1% (i.e. a low probability that the result is a statistical fluke).

# if ADF Statistic is less that critcal value, then reject null at significance
# Rejecting the null hypothesis means that the process has no unit root, and in turn that the time series is stationary or does not have time-dependent structure

for key, value in result[4].items():
	# print('\t%s: %.3f' % (key, value))
    if adf_stat < value:
        print(f"is stationary. reject at {key}. {adf_stat} < {value}. {col} no unit root")
    else:
        print(f"not stationary. not reject at {key}. {adf_stat} > {value}. {col} has unit root")

print(result)

In [None]:
result = adfuller(x=data, regression="ct", autolag='BIC', maxlag=40)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
print(result)