In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import linear_model
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

In [2]:
# Read the data
stock_monthly = pd.read_sas('../downloads/stock_m.sas7bdat')
stock_monthly.columns = map(str.upper, stock_monthly.columns)
stock_monthly["DATE"] = pd.to_datetime(stock_monthly["DATE"])
stock_monthly["MONTHID"] = (stock_monthly['DATE'].dt.year - 2000) * 12 + stock_monthly['DATE'].dt.month

# Read ff_factors
ff_factors = pd.read_sas('../downloads/ff_factors_m.sas7bdat')
ff_factors.columns = map(str.upper, ff_factors.columns)
ff_factors["DATEFF"] = pd.to_datetime(ff_factors["DATEFF"])
ff_factors["MONTHID"] = (ff_factors['DATEFF'].dt.year - 2000) * 12 + ff_factors['DATEFF'].dt.month

# Merge the two datasets
q1_data = pd.merge(stock_monthly, ff_factors, on = "MONTHID", how = "left")
# q1_data.drop(q1_data[q1_data['RET'].isnull()].index, inplace = True)
q1_data['XRET'] = q1_data['RET'] - q1_data['RF']

In [7]:
# Calculate the beta
q1_data['BETA'] = np.nan
for permno in q1_data['PERMNO'].unique():
    for t in range(1, q1_data['MONTHID'].max() + 1):
        sub_data = q1_data[(q1_data['PERMNO'] == permno) & (q1_data['MONTHID'] <= t) & (q1_data['MONTHID'] >= t - 35)]
        if sub_data['XRET'].count() >= 12:
            x = sub_data[['MKTRF']]
            x = sm.add_constant(x)
            y = sub_data['XRET']
            model = sm.OLS(y, x, missing='drop').fit()
            q1_data.loc[(q1_data['PERMNO'] == permno) & (q1_data['MONTHID'] == t), 'BETA'] = model.params['MKTRF']
display(pd.DataFrame(q1_data['BETA'].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])))

Unnamed: 0,BETA
count,20308.0
mean,1.257083
std,0.795173
min,-2.228347
1%,-0.116178
5%,0.267463
25%,0.773262
50%,1.111727
75%,1.561689
95%,2.812702
