### Mean model with lags = [1,5,22] using pct_high gives lower log likelihood than GJR with pct_close

In [98]:
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://localhost',
                       connect_args={'read_default_file': '~/.mytest.cnf'})
conn = engine.connect()

In [99]:
import pandas as pd

tck_qry = pd.read_sql_query('''select distinct us.symbol, avg(us.volume*us.close) from USE_2021 us
join US_SymsWithOptions USWO on us.symbol = USWO.Symbol
where USWO.`Symbol Type` = 'Equity'
    group by us.symbol order by avg(us.volume*us.close) desc limit 50''',
                            conn)
tdf = pd.DataFrame(tck_qry, columns=['symbol'])
tickers = tdf['symbol']
prices = ["pct_open", "pct_high", "pct_low", "pct_close"]
# prices = ["pct_open", "pct_high", "pct_low", "pct_close",
#           'pct_hilo', 'pct_opclo']

In [100]:
def query_symbol(sym, conn):
    qry = pd.read_sql_query(
        '''select tradedate, pct_open, pct_high, pct_low, pct_close,
         pct_hilo, pct_opclo from USEQ_HIST
        where symbol="''' + sym + '''"
        and volume>0
         order by tradedate''',
        conn,
        index_col='tradedate'
    )
    return sym, pd.DataFrame(qry, columns=prices)

In [101]:
import math
def ann_var(var):
    return (math.sqrt((var)*252))


In [102]:
from arch.univariate import ARX, GARCH, StudentsT

from arch import arch_model
def gjr(df):
    return "gjr", arch_model(df, rescale=False, p=1, o=1, q=1, dist="StudentsT")
def arx(df):
    am=ARX(df, rescale=False, lags=[1,5,22], constant=True)
    am.volatility = GARCH(1,1,1)
    am.distribution = StudentsT()
    return "arx", am



In [103]:
rows = []

for tk in tickers:
    ticker, dft = query_symbol(tk, conn)
    for pr in prices:
        if(len(dft.index)<800):
            continue
        df = dft[pr]
        for str, am in (gjr(df), arx(df)):
            res = am.fit(disp='off')
            if res.convergence_flag != 0:
                continue
            forecasts = res.forecast(reindex=False)
            lhood = res.loglikelihood
            if str == 'arx':
                nnobs = res.nobs
                lhood = lhood*(nnobs/(nnobs - 22))
            row = [tk, str, pr, lhood, ann_var(forecasts.variance['h.1'].iloc[0])
               ,forecasts.mean['h.1'].iloc[0], res.nobs]
            rows.append(row)
    cmp = pd.DataFrame(rows, columns=['ticker', 'model', 'price', 'lhood', 'volatilty', 'mean', 'nobs'])

compare = cmp.set_index(['ticker', 'model', 'price'])
compare.head(40)



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lhood,volatilty,mean,nobs
ticker,model,price,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TSLA,gjr,pct_open,-3743.207729,47.734052,0.1616,1468
TSLA,arx,pct_open,-3744.960593,48.101799,0.18011,1446
TSLA,gjr,pct_high,-3591.41998,71.636216,0.168396,1468
TSLA,arx,pct_high,-3580.401205,67.559279,0.887676,1446
TSLA,gjr,pct_low,-3637.192561,42.350703,0.237877,1468
TSLA,arx,pct_low,-3619.478782,39.330864,0.466653,1446
TSLA,gjr,pct_close,-3709.211032,50.450455,0.19067,1468
TSLA,arx,pct_close,-3707.187974,51.618713,0.034125,1446
AAPL,gjr,pct_open,-2700.213162,21.779809,0.162901,1468
AAPL,arx,pct_open,-2686.278407,21.901612,0.257523,1446
