### Mean model with lags = [1,5,22] using pct_high (or pct_low?) gives higher log likelihood than GJR with pct_close

In [1]:
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://localhost',
                       connect_args={'read_default_file': '~/.myuseq.cnf'})
conn = engine.connect()

In [2]:
import pandas as pd

tck_qry = pd.read_sql_query('''select distinct us.symbol, avg(us.volume*us.close) as vwp from USEQ_2021 us
join useq.US_syms_with_options USWO on us.symbol = USWO.Symbol
    group by us.symbol having vwp > 50000000. order by vwp desc limit 50''',
                            conn)
tdf = pd.DataFrame(tck_qry, columns=['symbol'])
tickers = tdf['symbol']
prices = ["pct_open", "pct_high", "pct_low", "pct_close"]
# prices = ["pct_open", "pct_high", "pct_low", "pct_close",
#           'pct_hilo', 'pct_opclo']

In [3]:
def query_symbol(sym, conn):
    qry = pd.read_sql_query(
        '''select tradedate, pct_open, pct_high, pct_low, pct_close,
         pct_hilo, pct_opclo from USEQ_HIST
        where symbol="''' + sym + '''"
        and volume>0
         order by tradedate''',
        conn,
        index_col='tradedate'
    )
    return sym, pd.DataFrame(qry, columns=prices)

In [4]:
import math
def ann_var(var):
    return (math.sqrt((var)*252))


In [5]:
from arch.univariate import ARX, GARCH, StudentsT

from arch import arch_model
def gjr(df):
    return "gjr", arch_model(df, rescale=False, p=1, o=1, q=1, dist="StudentsT")
def arx(df):
    am=ARX(df, rescale=False, lags=[1,5,22], constant=True)
    am.volatility = GARCH(1,1,1)
    am.distribution = StudentsT()
    return "arx", am



In [6]:
rows = []
# tickers=pd.Series(data=['dfs'])
tickers=pd.concat([pd.Series(data='DFS'), tickers])
#tickers=pd.Series(data=['AMC']).append(tickers)
# tickers=pd.Series(data=['DFS']).append(tickers)

for tk in tickers:
    ticker, dft = query_symbol(tk, conn)
    for pr in prices:
        if(len(dft.index)<800):
            continue
        df = dft[pr]
        for str, am in (gjr(df), arx(df)):
            res = am.fit(disp='off')
            if res.convergence_flag != 0:
                continue
            forecasts = res.forecast(reindex=False)
            lhood = res.loglikelihood
            if str == 'arx':
                nnobs = res.nobs
                lhood = lhood*(nnobs/(nnobs - 22))
            row = [tk, str, pr, lhood, ann_var(forecasts.variance['h.1'].iloc[0])
               ,forecasts.mean['h.1'].iloc[0], res.nobs]
            rows.append(row)
    cmp = pd.DataFrame(rows, columns=['ticker', 'model', 'price', 'lhood', 'volatilty', 'mean', 'nobs'])

compare = cmp.set_index(['ticker', 'model', 'price'])

In [7]:
import numpy as np
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')
s2 = compare.style
s2.apply(highlight_max, props='color:red', axis=0)




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lhood,volatilty,mean,nobs
ticker,model,price,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DFS,gjr,pct_open,-9399.338106,42.312415,0.054313,4510
DFS,arx,pct_open,-9409.705248,42.631863,0.018429,4488
DFS,gjr,pct_high,-8824.546881,40.327438,0.078234,4510
DFS,arx,pct_high,-8815.018668,37.858606,0.53167,4488
DFS,gjr,pct_low,-9203.280274,42.256482,0.055787,4510
DFS,arx,pct_low,-9190.174836,40.188452,0.279505,4488
DFS,gjr,pct_close,-9472.969273,40.62732,0.073425,4510
DFS,arx,pct_close,-9479.803507,41.502741,0.016281,4488
SPY,gjr,pct_open,-7748.362427,29.599416,0.055104,5913
SPY,arx,pct_open,-7730.104191,31.657073,-0.202198,5891
