### Packages

In [1]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima_model import ARIMA
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()

### Loading the data

In [2]:
raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-29", 
                              interval = "1d", group_by = 'ticker', auto_adjust = True, treads = True)

[*********************100%***********************]  4 of 4 completed


In [3]:
df_comp = raw_data.copy()

In [4]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [5]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

### Creating Returns

In [6]:
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

### Splitting the Data

In [7]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting a Model

In [8]:
from pmdarima.arima import auto_arima

In [10]:
model_auto = auto_arima(df.ret_ftse[1:])

In [12]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-7881.995
Date:,"Wed, 23 Mar 2022",AIC,15783.99
Time:,11:51:12,BIC,15849.2
Sample:,0,HQIC,15806.841
,- 5019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.0137,0.082,0.168,0.867,-0.146,0.174
ar.L2,-0.6539,0.077,-8.440,0.000,-0.806,-0.502
ar.L3,-0.1617,0.071,-2.271,0.023,-0.301,-0.022
ar.L4,0.2021,0.074,2.717,0.007,0.056,0.348
ma.L1,-0.0373,0.081,-0.460,0.646,-0.196,0.122
ma.L2,0.6061,0.078,7.747,0.000,0.453,0.759
ma.L3,0.0616,0.069,0.899,0.369,-0.073,0.196
ma.L4,-0.1944,0.073,-2.658,0.008,-0.338,-0.051
ma.L5,-0.1051,0.009,-11.064,0.000,-0.124,-0.086

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,6371.0
Prob(Q):,0.95,Prob(JB):,0.0
Heteroskedasticity (H):,1.99,Skew:,-0.2
Prob(H) (two-sided):,0.0,Kurtosis:,8.51


### Important Arguments

In [13]:
# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# return_valid_fits -> whether or not the method should validate the results 
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple_size -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

model_auto = auto_arima(df_comp.ret_ftse[1:], exogenous=df_comp[['ret_spx', 'ret_dax', 'ret_nikkei']][1:], m=5, 
                        max_order = None, max_p=7, max_q=7, max_d=2, max_P=4, max_Q=4, max_D=2, maxiter=50, 
                        alpha=0.05, n_jobs=-1, trend='ct',
                        information_criterion='oob', out_of_sample_size=int(len(df_comp)*0.2))