### CFM 301 Data Assignment 3 Q2
#### Jeongseop Yi (j22yi)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.regression.rolling import RollingOLS

In [2]:
# Read the data
stock_d = pd.read_sas('../downloads/stock_d.sas7bdat')
stock_d.columns = map(str.upper, stock_d.columns)
stock_d["DATE"] = pd.to_datetime(stock_d["DATE"])
stock_d["MONTHID"] = (stock_d['DATE'].dt.year - 2000) * 12 + stock_d['DATE'].dt.month
stock_d.drop(stock_d[stock_d['RET'].isnull()].index, inplace=True)

# Read ff_factors
ff_factors = pd.read_sas('../downloads/ff_factors_d.sas7bdat')
ff_factors.columns = map(str.upper, ff_factors.columns)
ff_factors["DATE"] = pd.to_datetime(ff_factors["DATE"])

# Merge the two datasets
q2_data = pd.merge(stock_d, ff_factors, on="DATE", how = "left")

In [3]:
q2_data['XRET'] = q2_data['RET'] - q2_data['RF']
q2_data['RESIDUAL'] = np.nan
q2_data['IVOL'] = np.nan

q2_data.set_index(['DATE'], inplace=True)

# Calculate the IVOL
resid = q2_data.groupby(['PERMNO', 'MONTHID']).apply(lambda x: sm.OLS(x['XRET'], sm.add_constant(x[['MKTRF', 'SMB', 'HML']])).fit(params_only=True).resid).reset_index()

q2_data.reset_index(inplace=True)

In [4]:
resid.columns = ['PERMNO', 'MONTHID', 'DATE', 'RESIDUAL']
resid.set_index(['PERMNO', 'MONTHID', 'DATE'], inplace=True)
resid['RESIDUAL'] = resid['RESIDUAL'].astype(float)

resid_filter = resid.groupby(['PERMNO', 'MONTHID'])['RESIDUAL'].filter(lambda x: len(x) >= 10).reset_index()
resid.reset_index(inplace=True)

IVOL = resid_filter.groupby(['PERMNO', 'MONTHID'])['RESIDUAL'].apply(lambda x: x.std()).reset_index()
IVOL.columns = ['PERMNO', 'MONTHID', 'IVOL']

In [5]:
display(IVOL[['IVOL']].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]))

Unnamed: 0,IVOL
count,20775.0
mean,0.016686
std,0.012257
min,0.001331
1%,0.004099
5%,0.005521
25%,0.008871
50%,0.013138
75%,0.020306
95%,0.040426


In [6]:
with pd.ExcelWriter("../DA3_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
    IVOL.to_excel(writer, sheet_name="Q2", index=False)