In [99]:
%run init_notebookspace.py

DATA_DIR is existant under: C:\Users\LukasGrahl\Documents\GIT\bayes_filter\data


In [100]:
import pandas as pd
import numpy as np

from pandas_datareader import fred
import yfinance as yf
import pandas_datareader as pdread

import matplotlib.pyplot as plt

from statsmodels.tsa.arima.model import ARIMA
from yahoo_fin import stock_info as ysi

from itertools import chain

In [101]:
from src.utils import apply_datetime_format

### get sp500 composits

In [11]:
# get sp500 composits & market cap
sp500_tickers = ysi.tickers_sp500()
df = pd.DataFrame(index=sp500_tickers, 
                  columns=['market_cap'], 
                  data=[pdread.get_quote_yahoo(item)['marketCap'].values[0] for item in sp500_tickers])
sp500_largest = df.sort_values('market_cap', ascending=False).index[:5].values
sp500_ticker = ['^GSPC']

In [12]:
start = '2020-01-01'
end = '2022-12-31'

In [13]:
df_prices = pd.DataFrame()
df_prices.index = pd.date_range(start, periods=(apply_datetime_format(end, '%Y-%m-%d') - apply_datetime_format(start, '%Y-%m-%d')).days)

for item in [*chain(sp500_largest, sp500_ticker)]:
    data = yf.download(item, start, end)
    data.columns = list([f'{item}_{x}' for x in data.columns])
    df_prices = df_prices.join(data)
    
# get closing price
df_c = df_prices[[item for item in df_prices.columns if 'Adj Close' in item]].copy()
df_c.columns = [item[:-10] for item in df_c.columns]
df_c.dropna(inplace=True)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [16]:
# get log returns
df_rets = np.log(df_c / df_c.shift(1)).dropna()
df_rets.sort_index(inplace=True)
df_rets.asfreq = "D"

df_rets['^GSPC_lead'] = df_rets['^GSPC'].shift(1)

df_rets.dropna(inplace=True)
df_rets.index.asfreq = 'd'

In [103]:
df_rets.to_csv(os.path.join(DATA_DIR, 'returns.csv'))

In [24]:
sp500_largest

array(['AAPL', 'MSFT', 'GOOGL', 'GOOG', 'AMZN'], dtype=object)

In [38]:
mod = ARIMA(endog=df_rets['^GSPC_lead'].values,
            exog=df_rets[['AAPL']].values,
            order=(2,0,2))
res = mod.fit()



In [39]:
res.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,754.0
Model:,"ARIMA(2, 0, 2)",Log Likelihood,2067.748
Date:,"Mon, 13 Mar 2023",AIC,-4121.496
Time:,10:22:52,BIC,-4089.118
Sample:,0,HQIC,-4109.023
,- 754,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0002,0.001,0.390,0.696,-0.001,0.001
x1,-0.0331,0.018,-1.889,0.059,-0.067,0.001
ar.L1,-0.3669,0.240,-1.527,0.127,-0.838,0.104
ar.L2,-0.1338,0.169,-0.790,0.429,-0.465,0.198
ma.L1,0.2173,0.240,0.905,0.365,-0.253,0.688
ma.L2,0.2460,0.136,1.811,0.070,-0.020,0.512
sigma2,0.0002,6.78e-06,35.820,0.000,0.000,0.000

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,2295.23
Prob(Q):,0.96,Prob(JB):,0.0
Heteroskedasticity (H):,0.61,Skew:,-0.68
Prob(H) (two-sided):,0.0,Kurtosis:,11.44


### designing a Kalman Filter

The below ARMA(2,2) process can be described as a state space equation
$$ x_t = c + \phi_1 x_{t-1} + \phi_2 x_{t-2} + \theta_1 u_{t-1} + \theta_2 u_{t-2} + \beta_1 a_{t-1} + u_t $$

In [40]:
import sympy as sp

In [41]:
syms = sp.symbols([*chain([f'x_{{t-{item}}}' for item in range(0, 3)], # endogs
                          [f'{item}_{{t}}' for item in ['a']], # exogs
                          [f'{item}_{{t-1}}' for item in ['a']], # exogs
                          [f'phi_{item}' for item in range(1, 4)], # endogs params
                          [f'beta_{item}' for item in range(1, 2)], # endogs params
                          ['theta_1', 'theta_2', 'u_{t-1}', 'u_{t-2}', 'u_t']
                          )])
x0, x1, x2, a, a1, phi1, phi2, phi3, beta1, theta1, theta2, u1, u2, u = syms

In [76]:
X = sp.Matrix([x0, x1, u, u1, a])

In [98]:
np.array([[*range(0, 11)], [*range(0, 11)]])

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [77]:
X

Matrix([
[x_{t-0}],
[x_{t-1}],
[    u_t],
[u_{t-1}],
[  a_{t}]])

In [78]:
X1 = sp.Matrix([x1, x2, u1, u2, a1])

In [79]:
X1

Matrix([
[x_{t-1}],
[x_{t-2}],
[u_{t-1}],
[u_{t-2}],
[a_{t-1}]])

In [80]:
Z = sp.Matrix(
    [
        [phi1, phi2, theta1, theta2, beta1],
        [1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1]
    ]
)

In [81]:
X1

Matrix([
[x_{t-1}],
[x_{t-2}],
[u_{t-1}],
[u_{t-2}],
[a_{t-1}]])

In [82]:
Z @ X1

Matrix([
[a_{t-1}*beta_1 + phi_1*x_{t-1} + phi_2*x_{t-2} + theta_1*u_{t-1} + theta_2*u_{t-2}],
[                                                                           x_{t-1}],
[                                                                                 0],
[                                                                           u_{t-1}],
[                                                                           a_{t-1}]])

In [83]:
R = sp.Matrix([1, 0, 1, 0, 0])
U = sp.Matrix([u])

In [85]:
X1

Matrix([
[x_{t-1}],
[x_{t-2}],
[u_{t-1}],
[u_{t-2}],
[a_{t-1}]])

In [84]:
state_space = sp.Eq(X, Z @ X1 + R @ U)
state_space

Eq(Matrix([
[x_{t-0}],
[x_{t-1}],
[    u_t],
[u_{t-1}],
[  a_{t}]]), Matrix([
[a_{t-1}*beta_1 + phi_1*x_{t-1} + phi_2*x_{t-2} + theta_1*u_{t-1} + theta_2*u_{t-2} + u_t],
[                                                                                 x_{t-1}],
[                                                                                     u_t],
[                                                                                 u_{t-1}],
[                                                                                 a_{t-1}]]))

In [174]:
Z_lamb = sp.lambdify([phi1, phi2, theta1, theta2, beta1, a1], Z)

In [177]:
Z_lamb(-0.36671969, -0.13347194,  0.21725022,  0.24566215, -0.03305849, 2)

array([[-0.36671969, -0.13347194,  0.21725022,  0.24566215, -0.06611698],
       [ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  1.        ]])