Import packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats.distributions import chi2 
from math import sqrt
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings("ignore")

Pre-processing

In [2]:
csv = pd.read_csv("Index2020.csv") 
dataframe =csv.copy()
dataframe.date = pd.to_datetime(dataframe.date, dayfirst = True) #change to timeserires data
dataframe.set_index("date", inplace=True) #setting timeseries data to index
dataframe = dataframe.asfreq('b') #set frequency to business days
dataframe = dataframe.fillna(method='ffill') #front fill missing data

In [3]:
dataframe['market_value'] = dataframe.spx

In [4]:
size = int(len(dataframe)*0.8) #split data 80%
df_train, df_test = dataframe.iloc[:size], dataframe.iloc[size:]

In [5]:
def LLR_test(mod_1, mod_2, DF = 1):
    L1 = mod_1.llf
    L2 = mod_2.llf
    LR = (2*(L2-L1))    
    p = chi2.sf(LR, DF).round(3)
    return p

In [13]:
df_train['returns'] = df_train.market_value.pct_change(1)*100

ARIMAX

In [7]:
armax = ARIMA(df_train.market_value, exog = df_train.dax, order=(1,1,1))
results_armax = armax.fit()
results_armax.summary()

0,1,2,3
Dep. Variable:,D.market_value,No. Observations:,4191.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-17248.042
Method:,css-mle,S.D. of innovations,14.829
Date:,"Fri, 06 Mar 2020",AIC,34506.085
Time:,15:39:39,BIC,34537.788
Sample:,01-04-2000,HQIC,34517.296
,- 01-26-2016,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.3460,0.617,-0.561,0.575,-1.556,0.864
dax,7.046e-05,9.2e-05,0.766,0.444,-0.000,0.000
ar.L1.D.market_value,0.6043,0.121,5.003,0.000,0.368,0.841
ma.L1.D.market_value,-0.6710,0.113,-5.964,0.000,-0.892,-0.450

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,1.6549,+0.0000j,1.6549,0.0000
MA.1,1.4903,+0.0000j,1.4903,0.0000


SARIMAX

In [8]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [10]:
sarimax = SARIMAX(df_train.market_value, exog = df_train.dax, order=(1,0,1), seasonal_order = (2,0,1,5))
results_sarimax = sarimax.fit()
results_sarimax.summary()

0,1,2,3
Dep. Variable:,market_value,No. Observations:,4192.0
Model:,"SARIMAX(1, 0, 1)x(2, 0, 1, 5)",Log Likelihood,-16216.081
Date:,"Fri, 06 Mar 2020",AIC,32446.163
Time:,16:10:12,BIC,32490.549
Sample:,01-03-2000,HQIC,32461.859
,- 01-26-2016,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
dax,0.1052,0.001,91.612,0.000,0.103,0.107
ar.L1,0.9998,0.000,2377.341,0.000,0.999,1.001
ma.L1,-0.2906,0.008,-35.325,0.000,-0.307,-0.274
ar.S.L5,0.9880,0.008,130.249,0.000,0.973,1.003
ar.S.L10,0.0117,0.004,2.701,0.007,0.003,0.020
ma.S.L5,-0.9995,0.007,-139.903,0.000,-1.013,-0.985
sigma2,134.0563,1.617,82.914,0.000,130.887,137.225

0,1,2,3
Ljung-Box (Q):,97.88,Jarque-Bera (JB):,3697.84
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,1.1,Skew:,-0.25
Prob(H) (two-sided):,0.09,Kurtosis:,7.57


In [11]:
sarimax_2 = SARIMAX(df_train.market_value, exog = df_train.dax, order=(2,0,2), seasonal_order = (1,0,1,5))
results_sarimax_2 = sarimax.fit()
results_sarimax_2.summary()

0,1,2,3
Dep. Variable:,market_value,No. Observations:,4192.0
Model:,"SARIMAX(1, 0, 1)x(2, 0, 1, 5)",Log Likelihood,-16216.081
Date:,"Fri, 06 Mar 2020",AIC,32446.163
Time:,16:10:42,BIC,32490.549
Sample:,01-03-2000,HQIC,32461.859
,- 01-26-2016,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
dax,0.1052,0.001,91.612,0.000,0.103,0.107
ar.L1,0.9998,0.000,2377.341,0.000,0.999,1.001
ma.L1,-0.2906,0.008,-35.325,0.000,-0.307,-0.274
ar.S.L5,0.9880,0.008,130.249,0.000,0.973,1.003
ar.S.L10,0.0117,0.004,2.701,0.007,0.003,0.020
ma.S.L5,-0.9995,0.007,-139.903,0.000,-1.013,-0.985
sigma2,134.0563,1.617,82.914,0.000,130.887,137.225

0,1,2,3
Ljung-Box (Q):,97.88,Jarque-Bera (JB):,3697.84
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,1.1,Skew:,-0.25
Prob(H) (two-sided):,0.09,Kurtosis:,7.57


In [12]:
sarimax_3 = SARIMAX(df_train.market_value, exog = df_train.dax, order=(3,0,4), seasonal_order = (3,0,2,5))
results_sarimax_3 = sarimax.fit()
results_sarimax_3.summary()

0,1,2,3
Dep. Variable:,market_value,No. Observations:,4192.0
Model:,"SARIMAX(1, 0, 1)x(2, 0, 1, 5)",Log Likelihood,-16216.081
Date:,"Fri, 06 Mar 2020",AIC,32446.163
Time:,16:11:47,BIC,32490.549
Sample:,01-03-2000,HQIC,32461.859
,- 01-26-2016,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
dax,0.1052,0.001,91.612,0.000,0.103,0.107
ar.L1,0.9998,0.000,2377.341,0.000,0.999,1.001
ma.L1,-0.2906,0.008,-35.325,0.000,-0.307,-0.274
ar.S.L5,0.9880,0.008,130.249,0.000,0.973,1.003
ar.S.L10,0.0117,0.004,2.701,0.007,0.003,0.020
ma.S.L5,-0.9995,0.007,-139.903,0.000,-1.013,-0.985
sigma2,134.0563,1.617,82.914,0.000,130.887,137.225

0,1,2,3
Ljung-Box (Q):,97.88,Jarque-Bera (JB):,3697.84
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,1.1,Skew:,-0.25
Prob(H) (two-sided):,0.09,Kurtosis:,7.57
