In [124]:
import pandas as pd
import numpy as np
from datetime import datetime
import os   
import sys
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.optimize import minimize
from scipy import interpolate
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import warnings
import statsmodels.api as sm
from scipy.optimize import fsolve
from scipy.stats import norm
from sklearn.decomposition import PCA
import warnings
from scipy.optimize import fsolve
import numpy.polynomial.polynomial as poly
warnings.filterwarnings(action='ignore')

In [125]:
industries = pd.read_csv('./industry.csv')

In [126]:
industries['industry'] = industries.industry.str.replace('CRSP ', '', regex=True)
industries['industry'] = industries.industry.str.replace(' Index', '', regex=True)
industries['top_tickers'] = industries['top_tickers'].str.split()
industries = industries.explode('top_tickers').reset_index(drop=True)
industries

Unnamed: 0,industry,top_tickers
0,US Consumer Discretionary,AMZN
1,US Consumer Discretionary,TSLA
2,US Consumer Discretionary,HD
3,US Consumer Discretionary,WMT
4,US Consumer Discretionary,MCD
...,...,...
105,US Utilities,AEP
106,US Utilities,VST
107,US Utilities,D
108,US Utilities,PCG


In [127]:
vols = pd.read_csv('../daily_vol_series.csv')
vols = vols[vols['SYM_ROOT'] == vols['symbol']]

In [128]:
vols = vols.merge(industries, left_on='SYM_ROOT', right_on='top_tickers', how='inner')
vols = vols[['DATE', 'SYM_ROOT', 'industry', 'ivol_t', 'ivol_q']]

In [129]:
vols.head()

Unnamed: 0,DATE,SYM_ROOT,industry,ivol_t,ivol_q
0,2010-01-04,AAPL,US Technology,6.140084e-09,4.3004e-10
1,2010-01-05,AAPL,US Technology,5.997564e-09,3.934783e-10
2,2010-01-06,AAPL,US Technology,5.785301e-09,5.765893e-10
3,2010-01-07,AAPL,US Technology,9.102045e-09,6.42518e-10
4,2010-01-08,AAPL,US Technology,6.838924e-09,5.694237e-10


In [130]:
vols = vols[vols['DATE'] <= '2023-08-31']

In [131]:
vols['month'] = pd.to_datetime(vols['DATE']).dt.month
vols['year'] = pd.to_datetime(vols['DATE']).dt.year
vols['ivol_q_log'] = np.log(vols['ivol_q'])

In [132]:
def moments(df):
    final_dfs = []
    for lag in range(1, 11):
        tmp = df.groupby(['SYM_ROOT', 'industry', 'year', 'month'])['ivol_q_log']\
            .apply(lambda x: (((x - x.shift(lag))**2).sum() / (x.count() - lag)) ).to_frame(f'lag{lag}_mom')
        final_dfs.append(tmp)
    final_df = pd.concat(final_dfs, axis=1)
    return final_df

In [133]:
final = moments(vols)
final.replace([np.inf, -np.inf], np.nan, inplace=True)
final.dropna(inplace=True)
final.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,lag1_mom,lag2_mom,lag3_mom,lag4_mom,lag5_mom,lag6_mom,lag7_mom,lag8_mom,lag9_mom,lag10_mom
SYM_ROOT,industry,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AAPL,US Technology,2010,1,0.124012,0.171654,0.142317,0.237785,0.328099,0.400266,0.529878,0.649329,0.675741,0.825034
AAPL,US Technology,2010,2,0.067225,0.125021,0.179476,0.24471,0.314723,0.386906,0.497555,0.595319,0.644346,0.776623
AAPL,US Technology,2010,3,0.177133,0.242053,0.198273,0.162382,0.257832,0.319519,0.22656,0.246481,0.26148,0.348716
AAPL,US Technology,2010,4,0.193675,0.272393,0.31374,0.403786,0.41108,0.405068,0.399077,0.55403,0.698151,0.725557
AAPL,US Technology,2010,5,15.892976,25.353659,28.885627,29.800796,15.897322,16.978581,17.95795,19.168883,20.298034,22.192187


In [134]:
betas = pd.DataFrame(columns=['Betas'], index=final.index)
for idx,_ in final.iterrows():
    y = final.loc[idx].values
    X = np.log(range(1, 11)).reshape(-1,1)
    # X = sm.add_constant(X)
    mod = sm.OLS(y, X).fit()
    betas.loc[idx, 'Betas'] = mod.params
betas.reset_index(inplace=True)
betas['Realized_H'] = betas['Betas'] * 0.5

In [135]:
betas[betas['Betas']>=0.5]

Unnamed: 0,SYM_ROOT,industry,year,month,Betas,Realized_H
4,AAPL,US Technology,2010,5,11.469401456045503,5.734701
67,AAPL,US Technology,2015,8,0.728213179339307,0.364107
97,AAPL,US Technology,2018,2,0.6139284992168421,0.306964
121,AAPL,US Technology,2020,2,0.7537411782020454,0.376871
122,AAPL,US Technology,2020,3,0.7505765646106021,0.375288
...,...,...,...,...,...,...
15250,WMT,US Consumer Discretionary,2018,12,0.6244070844211982,0.312204
15265,WMT,US Consumer Discretionary,2020,3,1.0741126779642438,0.537056
15299,WMT,US Consumer Discretionary,2023,1,0.6767466304778018,0.338373
15311,XOM,US Energy,2010,5,0.8256194789592809,0.41281


In [136]:
industry_H=betas.groupby(['industry'])['Realized_H'].agg(mean='mean', std='std').reset_index()
industry_H

Unnamed: 0,industry,mean,std
0,US Consumer Discretionary,0.084805,0.188473
1,US Consumer Staples,0.096751,0.154232
2,US Energy,0.082062,0.098727
3,US Financials,0.074574,0.083351
4,US Healthcare,0.101257,0.120607
5,US Industrials,0.111189,0.153151
6,US Materials,0.099205,0.113825
7,US Media & Communications,0.086026,0.216226
8,US Real Estate & REITs,0.130725,0.169417
9,US Technology,0.126389,0.684918


In [137]:
betas

Unnamed: 0,SYM_ROOT,industry,year,month,Betas,Realized_H
0,AAPL,US Technology,2010,1,0.27544088153898666,0.13772
1,AAPL,US Technology,2010,2,0.26158834513437185,0.130794
2,AAPL,US Technology,2010,3,0.1420987558389767,0.071049
3,AAPL,US Technology,2010,4,0.27536603200850096,0.137683
4,AAPL,US Technology,2010,5,11.469401456045503,5.734701
...,...,...,...,...,...,...
15466,XOM,US Energy,2023,4,0.04711815985121362,0.023559
15467,XOM,US Energy,2023,5,0.03321159773896469,0.016606
15468,XOM,US Energy,2023,6,0.1050287118205106,0.052514
15469,XOM,US Energy,2023,7,0.1370810235268246,0.068541


In [138]:
ticker_prices = pd.read_csv('monthly_ticker_prices.csv')
ticker_prices=ticker_prices[['date', 'TICKER', 'PRC']]
ticker_prices['date']=pd.to_datetime(ticker_prices['date'])
ticker_prices['year']=ticker_prices['date'].dt.year
ticker_prices['month']=ticker_prices['date'].dt.month
ticker_prices

Unnamed: 0,date,TICKER,PRC,year,month
0,2010-01-29,ORCL,23.06000,2010,1
1,2010-02-26,ORCL,24.65000,2010,2
2,2010-03-31,ORCL,25.71000,2010,3
3,2010-04-30,ORCL,25.86750,2010,4
4,2010-05-28,ORCL,22.57000,2010,5
...,...,...,...,...,...
17752,2023-04-28,TSLA,164.31000,2023,4
17753,2023-05-31,TSLA,203.92999,2023,5
17754,2023-06-30,TSLA,261.76999,2023,6
17755,2023-07-31,TSLA,267.42999,2023,7


In [140]:
Realized_H_Data=pd.merge(betas,ticker_prices, left_on=['SYM_ROOT','year','month'], right_on=['TICKER','year','month'], how='inner')
Realized_H_Data.drop(['SYM_ROOT','industry','year','month','Betas'], axis=1, inplace=True)
Realized_H_Data.rename(columns={'PRC':'price','TICKER':'ticker'}, inplace=True)
Realized_H_Data


Unnamed: 0,Realized_H,date,ticker,price
0,0.13772,2010-01-29,AAPL,192.063
1,0.130794,2010-02-26,AAPL,204.620
2,0.071049,2010-03-31,AAPL,235.000
3,0.137683,2010-04-30,AAPL,261.090
4,5.734701,2010-05-28,AAPL,256.880
...,...,...,...,...
15457,0.023559,2023-04-28,XOM,118.340
15458,0.016606,2023-05-31,XOM,102.180
15459,0.052514,2023-06-30,XOM,107.250
15460,0.068541,2023-07-31,XOM,107.240


In [141]:
# Realized_H_Data.sort_values(['ticker','date']).set_index('date').to_csv('Realized_H_Data.csv')