In [6]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import statsmodels.tsa.stattools as sts
import statsmodels.graphics.tsaplots as sgt 
from scipy.stats.distributions import chi2
from statsmodels.tsa.arima_model import ARMA
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

In [12]:
raw_data = pd.read_csv("../data/Index2018.csv")
df_comp = raw_data.copy()
df_comp.date = pd.to_datetime(df_comp.date, dayfirst=True)
df_comp.set_index("date", inplace=True)
df_comp = df_comp.asfreq('b')
df_comp = df_comp.fillna(method='ffill')

In [13]:
df_comp['market_value'] = df_comp.ftse
del df_comp['spx']
del df_comp['dax']
del df_comp['ftse']
del df_comp['nikkei']

size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size].copy(), df_comp.iloc[size:].copy()

# LLR Test

In [14]:
def LLR_test(mod_1, mod_2, DF=1):
    # DF: degrees of freedom
    
    # log likelihood
    L1 = mod_1.fit().llf
    L2 = mod_2.fit().llf
    
    # test statistic
    LR = (2 * (L2 - L1))
    p = chi2.sf(LR, DF).round(3)
    
    return p

# The DF-Test

In [15]:
sts.adfuller(df.market_value)

(-1.904155141883687,
 0.330108932770283,
 6,
 5014,
 {'1%': -3.4316548765428174,
  '5%': -2.8621166146845334,
  '10%': -2.5670769326348926},
 54854.15304507557)

The p-value of 0.33 indicates that ftse does not come from a stationary process. We cannot use AR models to non-stationary process. Therefore, we introduce a new concept called Return which is more likely to be stationary.

# Using Returns

Returns in finance means the percentage change between two consecutive periods.

$$
\frac{P_t - P_{t-1}}{P_{t-1}} \times 100
$$

We can use this instead of the raw data to apply the AR models.

In [20]:
df['returns'] = df.market_value.pct_change(1).mul(100)
df = df.iloc[1:] # because the first value in returns is null

In [22]:
sts.adfuller(df.returns)

(-12.770265719497232,
 7.79805833603833e-24,
 32,
 4987,
 {'1%': -3.431661944885779,
  '5%': -2.8621197374408225,
  '10%': -2.5670785949998973},
 15696.767245697501)

Given the p-value of 7.798e-24, we can say the new series 'returns' is a stationary process.

# ACF and PACF of Returns