In [71]:
import pandas as pd
from pandas_datareader import DataReader as pdr
import statsmodels.formula.api as smf
from statsmodels.regression.rolling import RollingOLS

### Example data

A small data set with acc=accruals and agr=asset growth, monthly data since 2010, roughly 2,000 stocks per month.

In [65]:
data = pd.read_csv("https://www.dropbox.com/s/012c6y4gxsxss6y/ghz.csv?dl=1", parse_dates=["date"])
data.permno = data.permno.astype(int)
data['date'] = data.date.dt.to_period('M')
data = data.sort_values(by=['permno', 'date']).reset_index(drop=True)
data.head()

Unnamed: 0,permno,date,ret,acc,agr
0,10025,2012-10,0.055125,-0.028995,0.184931
1,10025,2012-11,-0.053652,-0.028995,0.184931
2,10025,2012-12,-0.020992,-0.028995,0.184931
3,10025,2013-01,0.086949,-0.028995,0.184931
4,10025,2013-02,0.095527,-0.028995,0.184931


### Fama-MacBeth regressions

In [66]:
def xreg(df):
    model = smf.ols("ret ~ acc + agr", data=df)
    result = model.fit()
    return result.params

fm = data.groupby('date').apply(xreg)
fm.head()

Unnamed: 0_level_0,Intercept,acc,agr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01,-0.029367,0.02165,-0.011795
2010-02,0.040089,-0.052454,-0.003666
2010-03,0.081821,0.037491,-0.024873
2010-04,0.042478,-0.090209,-0.009522
2010-05,-0.074971,0.008434,0.005427


### t tests with Newey-West standard errors

In [67]:
def ttest(name, data):
    model = smf.ols(name + " ~ 1", data=data)
    result = model.fit(cov_type='HAC', cov_kwds={"kernel": "bartlett", "maxlags": 12})
    return result.summary().tables[1]

print(ttest('acc', data=fm))
print(ttest('agr', data=fm))

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0041      0.009      0.438      0.661      -0.014       0.022
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0028      0.002     -1.575      0.115      -0.006       0.001


### Fama-French factors

In [68]:
ff = pdr("F-F_Research_Data_Factors", "famafrench", start=2000)[0] / 100
data = data.merge(ff, left_on='date', right_index=True, how="left") 
data = data.rename(columns={"Mkt-RF": "Mkt_RF"})
data["ret_RF"] = data.ret - data.RF

### Rolling window betas

In [76]:
def rolling_betas(df):
    n = df.shape[0]
    if n >= 24:
        data = df.set_index("date") 
        model = RollingOLS.from_formula(
            "ret_RF ~ Mkt_RF + SMB + HML",
            window=min(n, 60),
            min_nobs=24,
            expanding=True,
            data=data
        )
        result = model.fit()
        return result.params[['Mkt_RF', 'SMB', 'HML']].dropna()
    else:
        pass
    
    
betas = data.groupby("permno").apply(rolling_betas)
betas.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt_RF,SMB,HML
permno,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10026,2011-12,0.174161,1.804534,-0.241938
10026,2012-01,0.114215,1.73063,-0.122765
10026,2012-02,0.125485,1.700585,-0.119297
10026,2012-03,0.154238,1.604118,-0.050139
10026,2012-04,0.136965,1.589787,-0.049729


### Fama-MacBeth regressions on characteristics and betas

You don't really want to do this, because there is a huge errors-in-variables problem.

In [77]:
betas.columns = ['beta_' + c for c in betas.columns]
data = data.merge(betas, left_on=["permno", "date"], right_index=True, how="inner")

def xreg(df):
    model = smf.ols("ret ~ acc + agr + beta_Mkt_RF + beta_SMB + beta_HML", data=df)
    result = model.fit()
    return result.params

fm = data.groupby('date').apply(xreg)
fm.head()

Unnamed: 0_level_0,Intercept,acc,agr,beta_Mkt_RF,beta_SMB,beta_HML
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12,0.023291,-0.019703,-0.012003,-0.016053,-0.008991,0.021857
2012-01,-0.021065,0.018477,0.01299,0.070754,0.028548,-0.00471
2012-02,0.013333,-0.065099,-0.002168,0.02994,-0.019291,0.009224
2012-03,0.02648,0.008151,-0.012212,0.002125,-0.010058,0.014605
2012-04,0.026504,0.07771,-0.011132,-0.029257,-0.008745,-0.008338


In [78]:
for x in ["acc", "agr", "beta_Mkt_RF", "beta_SMB", "beta_HML"]:
    print(x)
    print(ttest(x, fm))

acc
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0257      0.008      3.090      0.002       0.009       0.042
agr
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0004      0.001     -0.302      0.763      -0.003       0.002
beta_Mkt_RF
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0021      0.004      0.581      0.562      -0.005       0.009
beta_SMB
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0027      0.003      0.827      0.408      -0.004       0.009
beta_HML
              