# ARIMA lag selection
# Determine optimal lag based on ARIMA
#### total range: 1988-08-31 to 2019-12-31
#### analysis range: 1990-01-31 to 2019-07-31
#### training: 2017-01-01 to 2017-12-31
#### predict: 2018-01-01 to 2018-12-31
## CS-Aaa-3MO CS-Aa-3MO CS-A-3MO CS-Baa-3MO CS-Aaa-1YR CS-Aa-1YR CS-A-1YR CS-Baa-1YR CS-Aaa-5YR CS-Aa-5YR CS-A-5YR CS-Baa-5YR
## TB-3MO-TY TB-1YR-TY TB-5YR-TY

In [2]:
def get_ytw():
    import nb_credit_spread as cslibrary

    cslib = cslibrary.creditspread()
    srcfile = r'YTW-All-Values.xlsx'
    cs_df = cslib.get_ytw_from_date_delta(srcfile=srcfile, start='2009-01-31')
    return cs_df
    

In [3]:
def arima_model():
    from statsmodels.tsa.arima.model import ARIMA
    import numpy as np
    model = ARIMA(endog=endog, order=(1, 0, 0),  trend='ct')
    model_fit = model.fit()
    model_fit.summary()

In [37]:
def lag_ARIMA_selection(column, colname, trend=None):
    '''
    get optimal lag using ARIMA
    '''
    from statsmodels.tsa.arima.model import ARIMA
    import numpy as np

    endog = column

    num = 10
    info_crit = np.zeros(shape=(num, 1))
    for i in range(num):
        order = (i + 1, 1, 0)
        # model_fit = VAR(endog=endog).fit(maxlags=i + 1, verbose=True)
        print(f"{colname} checking {order} trend {trend}")
        model_fit = ARIMA(endog=endog, exog=None, order=order, trend=trend).fit()
        # AIC, BIC, HQIC
        # print(f'order: {order} bic: {model_fit.bic}')
        ''' ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals '''
        info_crit[i] = model_fit.bic  # update poistion with info criteria result

    # print(f"BIC critical values:\n{info_crit}")
    
    info_crit_min = np.min(info_crit)
    model_min = np.argmin(info_crit)    # returns first position of minimum value
    # print(f"returns first position of minimum value:\n{model_min}")

    relative_likelihood = (np.exp((info_crit_min - info_crit) / 2))
    
    # print(f"relative likelihood:\n{relative_likelihood}")
    r = { 'column': colname, 'trend': trend, 'minlag': model_min + 1}

    model_fit = ARIMA(endog=endog, order=(model_min + 1, 1, 0), trend=trend).fit()
    # print(model_fit.summary())
    # print(f"pvalues:\n{model_fit.pvalues}")
    # print(f"tvalues:\n{model_fit.tvalues}")
    # print(f"BIC: {info_crit_min} AR({model_min+1}) pvalue: {model_fit.pvalues[model_min]} tstat: {model_fit.tvalues[model_min]}")

    r = { 'column': colname, 'trend': trend, 'minlag': model_min + 1, "BIC": info_crit_min, "pvalue": model_fit.pvalues[model_min], "tstat": model_fit.tvalues[model_min] }
    return r, model_min + 1, relative_likelihood

df = get_ytw()

# from statsmodels.tsa.arima.model import ARIMA
# print(ARIMA(endog=df['CS-Aaa-3MO'], order=(3, 0, 0), trend=None).fit().summary())

import pandas as pd
t = pd.DataFrame(columns=['column', 'trend', 'minlag', 'BIC', 'pvalue', 'tstat'])

rating = ["Aaa", "Aa", "A", "Baa"]
maturity = ["3MO", "1YR", "5YR"]

for r in rating:
    for m in maturity:
        colname, trend = f"CS-{r}-{m}", None # check for ConvergenceWarning on 'ct'
        result, min_model, relative_likelihood = lag_ARIMA_selection(column=df[colname], colname=colname, trend=trend)
        print(f"{colname} minimum parameters model for AR: {min_model} relative likelihoods {relative_likelihood}")
        t = t.append(result, ignore_index=True)

for m in maturity:
    colname, trend = f"TB-{m}-TY", None # check for ConvergenceWarning on 'ct'
    result, min_model, relative_likelihood = lag_ARIMA_selection(column=df[colname], colname=colname, trend=trend)
    print(f"{colname} minimum parameters model for AR: {min_model} relative likelihoods {relative_likelihood}")
    t = t.append(result, ignore_index=True)

print(t)


CS-Aaa-3MO checking (1, 1, 0) trend None
CS-Aaa-3MO checking (2, 1, 0) trend None
CS-Aaa-3MO checking (3, 1, 0) trend None
CS-Aaa-3MO checking (4, 1, 0) trend None
CS-Aaa-3MO checking (5, 1, 0) trend None
CS-Aaa-3MO checking (6, 1, 0) trend None
CS-Aaa-3MO checking (7, 1, 0) trend None
CS-Aaa-3MO checking (8, 1, 0) trend None
CS-Aaa-3MO checking (9, 1, 0) trend None
CS-Aaa-3MO checking (10, 1, 0) trend None
CS-Aaa-3MO minimum parameters model for AR: 2 relative likelihoods [[1.71201075e-01]
 [1.00000000e+00]
 [6.25121449e-01]
 [6.16999816e-02]
 [8.23205303e-03]
 [9.46670145e-04]
 [1.53117997e-04]
 [1.70051083e-05]
 [3.66038534e-06]
 [1.30476073e-06]]
CS-Aaa-1YR checking (1, 1, 0) trend None
CS-Aaa-1YR checking (2, 1, 0) trend None
CS-Aaa-1YR checking (3, 1, 0) trend None
CS-Aaa-1YR checking (4, 1, 0) trend None
CS-Aaa-1YR checking (5, 1, 0) trend None
CS-Aaa-1YR checking (6, 1, 0) trend None
CS-Aaa-1YR checking (7, 1, 0) trend None
CS-Aaa-1YR checking (8, 1, 0) trend None
CS-Aaa-1YR ch