In [1]:
# Auto-ARIMA

In [9]:
import numpy as np
import pandas as pd
import sklearn
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance
from statsmodels.tsa.arima_model import ARIMA
from arch import arch_model
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')
sns.set()

In [11]:
raw_data = yfinance.download(tickers="^GSPC ^FTSE ^N225 ^GDAXI",
                             start='1994-01-07',
                             end='2018-01-29',
                             interval="1d",group_by = 'ticker',
                             auto_adjust=True,treads=True)

[*********************100%***********************]  4 of 4 completed


In [23]:
df_comp = raw_data.copy()

In [24]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [25]:
df_comp = df_comp.iloc[1:]

del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']

df_comp = df_comp.asfreq('b')
df_comp = df_comp.fillna(method='ffill')

In [28]:
# Creating returns
df_comp['ret_spx'] = df_comp.spx.pct_change(1).mul(100)
df_comp['ret_dax'] = df_comp.dax.pct_change(1).mul(100)
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1).mul(100)
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1).mul(100)

In [29]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

In [30]:
from pmdarima import auto_arima

In [31]:
model_auto = auto_arima(df.ret_ftse[1:]) # best default arima for an idea

In [32]:
model_auto

      with_intercept=False)

In [33]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(3, 0, 5)",Log Likelihood,-7883.727
Date:,"Wed, 07 Apr 2021",AIC,15785.454
Time:,12:11:25,BIC,15844.143
Sample:,0,HQIC,15806.02
,- 5019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.1262,0.078,-1.615,0.106,-0.279,0.027
ar.L2,-0.8762,0.020,-44.932,0.000,-0.914,-0.838
ar.L3,-0.2770,0.071,-3.876,0.000,-0.417,-0.137
ma.L1,0.1014,0.078,1.299,0.194,-0.052,0.254
ma.L2,0.8224,0.022,36.557,0.000,0.778,0.866
ma.L3,0.1640,0.070,2.354,0.019,0.027,0.301
ma.L4,-0.0087,0.009,-0.942,0.346,-0.027,0.009
ma.L5,-0.1117,0.008,-13.479,0.000,-0.128,-0.095
sigma2,1.3549,0.015,91.085,0.000,1.326,1.384

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,6242.08
Prob(Q):,1.0,Prob(JB):,0.0
Heteroskedasticity (H):,1.99,Skew:,-0.17
Prob(H) (two-sided):,0.0,Kurtosis:,8.45


In [35]:
# The rules of model selection are rather "rules of thumb" than "fixed"
# Auto ARIMA only considers a single feature, the AIC
# We could have easily overfitted while going through the models in our previous sections
# The default arguments of the method restrict the number of AR and MA components

In [34]:
model_auto = auto_arima(df_comp.ret_ftse[1:],
                        exogenous=df_comp[['ret_spx','ret_dax','ret_nikkei']][1:],
                        m = 5,
                        max_order=None,
                        max_p = 7,
                        max_q = 7,
                        max_d = 2,
                        max_P = 4,
                        max_Q = 4,
                        max_D = 2,
                        max_iter = 50,
                        alpha = 0.05,
                        n_jobs = -1,
                        trend = 'ct',
                        information_criterion='oob',
                        out_of_sample_size = int(len(df_comp)*0.2),
                        )

In [36]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6275.0
Model:,"SARIMAX(2, 0, 2)x(1, 0, 2, 5)",Log Likelihood,-6357.988
Date:,"Wed, 07 Apr 2021",AIC,12741.976
Time:,13:07:33,BIC,12829.652
Sample:,0,HQIC,12772.355
,- 6275,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0088,0.008,-1.082,0.279,-0.025,0.007
drift,6.202e-07,2.39e-06,0.259,0.795,-4.07e-06,5.31e-06
x1,0.0853,0.006,13.582,0.000,0.073,0.098
x2,0.5634,0.005,102.833,0.000,0.553,0.574
x3,0.0738,0.005,15.656,0.000,0.065,0.083
ar.L1,-0.1405,0.092,-1.522,0.128,-0.321,0.040
ar.L2,0.5456,0.054,10.041,0.000,0.439,0.652
ma.L1,0.0193,0.093,0.208,0.835,-0.163,0.201
ma.L2,-0.5749,0.056,-10.322,0.000,-0.684,-0.466

0,1,2,3
Ljung-Box (L1) (Q):,2.85,Jarque-Bera (JB):,12788.07
Prob(Q):,0.09,Prob(JB):,0.0
Heteroskedasticity (H):,0.55,Skew:,0.22
Prob(H) (two-sided):,0.0,Kurtosis:,9.98
