In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.regression.rolling import RollingOLS

In [None]:
# Read the data
stock_d = pd.read_sas('../downloads/stock_d.sas7bdat')
stock_d.columns = map(str.upper, stock_d.columns)
stock_d["DATE"] = pd.to_datetime(stock_d["DATE"])
stock_d["MONTHID"] = (stock_d['DATE'].dt.year - 2000) * 12 + stock_d['DATE'].dt.month
stock_d.drop(stock_d[stock_d['RET'].isnull()].index, inplace=True)

# Read ff_factors
ff_factors = pd.read_sas('../downloads/ff_factors_d.sas7bdat')
ff_factors.columns = map(str.upper, ff_factors.columns)
ff_factors["DATE"] = pd.to_datetime(ff_factors["DATE"])

# Merge the two datasets
q2_data = pd.merge(stock_d, ff_factors, on="DATE", how = "left")

In [None]:
for permno in q2_data['PERMNO'].unique():
    for t in range(1, q2_data['MONTHID'].max() + 1):
        sub_data = q2_data[(q2_data['PERMNO'] == permno) & (q2_data['MONTHID'] == t)]
        if len(sub_data) >= 10:
            x = sub_data[['MKTRF', 'SMB', 'HML']]
            x = sm.add_constant(x)
            y = sub_data['XRET']
            model = sm.OLS(y, x).fit(params_only=True)
            q2_data.loc[(q2_data['PERMNO'] == permno) & (q2_data['MONTHID'] == t), 'RESIDUAL'] = model.resid

# is this slow?
q2_ivol = {"MONTHID" : [], "PERMNO": [], "IVOL" : []}
for permno in q2_data['PERMNO'].unique():
    for t in range(1, q2_data['MONTHID'].max() + 1):
        sub_data = q2_data[(q2_data['PERMNO'] == permno) & (q2_data['MONTHID'] == t)]
        q2_ivol["MONTHID"].append(t)
        q2_ivol["PERMNO"].append(permno)
        if len(sub_data) >= 10:
            q2_ivol["IVOL"].append(np.std(sub_data['RESIDUAL']))
        else:
            q2_ivol["IVOL"].append(np.nan)

q2_ivol = pd.DataFrame(q2_ivol)