<a href="https://colab.research.google.com/github/kerryback/2022-BUSI520/blob/main/AssetPricingRegressions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
import numpy as np
import pandas as pd
from pandas_datareader import DataReader as pdr
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from statsmodels.regression.rolling import RollingOLS

### Example data

A small data set with acc=accruals and agr=asset growth, monthly data since 2010, roughly 2,000 stocks per month.

In [55]:
data = pd.read_csv("https://www.dropbox.com/s/012c6y4gxsxss6y/ghz.csv?dl=1", parse_dates=["date"])
data.permno = data.permno.astype(int)
data['date'] = data.date.dt.to_period('M')
data = data.sort_values(by=['permno', 'date']).reset_index(drop=True)
# data.head(3)

### View cross-sections

In [56]:
"""
date = input("Input a date: ")
df = data[data.date==date]
fig = plt.boxplot([df.acc, df.agr]) 
"""

'\ndate = input("Input a date: ")\ndf = data[data.date==date]\nfig = plt.boxplot([df.acc, df.agr]) \n'

### Standardize cross-sections

In [57]:
data.agr = np.log(1+data.agr)

def winsorize(ser):
    return ser.clip(lower=ser.quantile(0.01), upper=ser.quantile(0.99))


for char in ["acc", "agr"]:
    data[char] = data.groupby("date")[char].apply(winsorize)
    data[char] = data.groupby("date")[char].apply(lambda x: (x-x.mean()) / x.std())

### Fama-MacBeth regressions

In [58]:
def xreg(df):
    model = smf.ols("ret ~ acc + agr", data=df)
    result = model.fit()
    return result.params

fm = data.groupby('date').apply(xreg)
# fm.head(3)

### t tests with Newey-West standard errors

In [59]:
def ttest(name, data):
    model = smf.ols(name + " ~ 1", data=data)
    result = model.fit(cov_type='HAC', cov_kwds={"kernel": "bartlett", "maxlags": 12})
    return result.summary2().tables[1]

# ttest("acc", data=fm)

"""
table = None
for char in ["acc", "agr"]:
    result = ttest(char, data=fm)
    result.index = [char]
    table = pd.concat((table, result))

print(table.round(3).to_latex())
"""

'\ntable = None\nfor char in ["acc", "agr"]:\n    result = ttest(char, data=fm)\n    result.index = [char]\n    table = pd.concat((table, result))\n\nprint(table.round(3).to_latex())\n'

### Fama-French factors

In [60]:
ff = pdr("F-F_Research_Data_Factors", "famafrench", start=2000)[0] / 100
data = data.merge(ff, left_on='date', right_index=True, how="left") 
data = data.rename(columns={"Mkt-RF": "Mkt_RF"})
data["ret_RF"] = data.ret - data.RF

### Rolling window betas

It is customary to use 60 months as the window but to include all stock/months for which 24 past months were available in the prior 60 months.  We do that with window=60, min_nobs=24, and expanding=True.

The RollingOLS function crashes if you specify a window size that is larger than the number of rows in the data frame.  So, we construct a function to "pass" if the number of rows is less than 24 and specify the window size as the smaller of 60 and the number of rows.

In [61]:
def rolling_betas(df):
    n = df.shape[0]
    if n >= 24:
        data = df.set_index("date") 
        model = RollingOLS.from_formula(
            "ret_RF ~ Mkt_RF + SMB + HML",
            window=min(n, 60),
            min_nobs=24,
            expanding=True,
            data=data
        )
        result = model.fit()
        return result.params[['Mkt_RF', 'SMB', 'HML']].dropna()
    else:
        pass
    
    
betas = data.groupby("permno").apply(rolling_betas)
# betas.head(3)

### Quantile sorts

This is not regression but it is a standard thing to do in asset pricing.  We calculate the returns of equally weighted decile portfolios (meaning stocks are equally weighted within each decile).  To compute value-weighted returns, assume there is a column called mve that contains market equity (as of the end of the prior month) and use

    df.groupby(['date', 'decile']).apply(lambda d: (d.ret*d.mve)/d.mve.sum())

In [62]:
df['decile'] = df.groupby("date").acc.apply(lambda x: pd.qcut(x, 10, labels=range(1,11)))
rets_acc = df.groupby(['date', 'decile']).ret.mean()

df['decile'] = df.groupby("date").agr.apply(lambda x: pd.qcut(x, 10, labels=range(1,11)))
rets_agr = df.groupby(['date', 'decile']).ret.mean()

# rets_agr.head(3)