<a href="https://colab.research.google.com/github/kerryback/2022-BUSI520/blob/main/AssetPricingRegressions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# uncomment and execute the following if necessary

!pip install linearmodels

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
python -m pip install --upgrade pip

In [24]:
import numpy as np
import pandas as pd
from pandas_datareader import DataReader as pdr
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from statsmodels.regression.rolling import RollingOLS
from linearmodels import FamaMacBeth

### Example data

A small data set with acc=accruals and agr=asset growth, monthly data since 2010, roughly 2,000 stocks per month.

In [2]:
data = pd.read_csv("https://www.dropbox.com/s/012c6y4gxsxss6y/ghz.csv?dl=1", parse_dates=["date"])
data.permno = data.permno.astype(int)
data['date'] = data.date.dt.to_period('M')
data = data.sort_values(by=['permno', 'date']).reset_index(drop=True)
# data.head(3)

### View cross-sections

In [3]:
"""
date = input("Input a date: ")
df = data[data.date==date]
fig = plt.boxplot([df.acc, df.agr]) 
"""

'\ndate = input("Input a date: ")\ndf = data[data.date==date]\nfig = plt.boxplot([df.acc, df.agr]) \n'

### Standardize cross-sections

In [4]:
data.agr = np.log(1+data.agr)

def winsorize(ser):
    return ser.clip(lower=ser.quantile(0.01), upper=ser.quantile(0.99))


for char in ["acc", "agr"]:
    data[char] = data.groupby("date")[char].apply(winsorize)
    data[char] = data.groupby("date")[char].apply(lambda x: (x-x.mean()) / x.std())

### Fama-MacBeth regressions

In [5]:
def xreg(df):
    model = smf.ols("ret ~ acc + agr", data=df)
    result = model.fit()
    return result.params

fm = data.groupby('date').apply(xreg)
# fm.head(3)

### t tests with Newey-West standard errors

In [6]:
def ttest(name, data):
    model = smf.ols(name + " ~ 1", data=data)
    result = model.fit(cov_type='HAC', cov_kwds={"kernel": "bartlett", "maxlags": 12})
    return result.summary2().tables[1]

# ttest("acc", data=fm)

"""
table = None
for char in ["acc", "agr"]:
    result = ttest(char, data=fm)
    result.index = [char]
    table = pd.concat((table, result))

print(table.round(3).to_latex())
"""

'\ntable = None\nfor char in ["acc", "agr"]:\n    result = ttest(char, data=fm)\n    result.index = [char]\n    table = pd.concat((table, result))\n\nprint(table.round(3).to_latex())\n'

### Fama-MacBeth with linearmodels

In [21]:
data.dtypes

permno        int64
date      period[M]
ret         float64
acc         float64
agr         float64
Mkt_RF      float64
SMB         float64
HML         float64
RF          float64
ret_RF      float64
dtype: object

In [25]:
# data.date = data.date.dt.to_timestamp()
model = FamaMacBeth.from_formula("ret ~ 1 + acc + agr", data=data.set_index(["permno", "date"]))
result = model.fit(cov_type="HAC", cov_kwds={"kernel": "bartlett", "maxlags": 12})
result.summary()

KeyError: ignored

In [27]:
from linearmodels.panel import generate_panel_data
panel_data = generate_panel_data()
mod = FamaMacBeth.from_formula("y ~ 1 + x1", panel_data.data)
result = mod.fit()
result.summary()

KeyError: ignored

In [26]:
data.set_index(["permno", "date"])

Unnamed: 0_level_0,Unnamed: 1_level_0,ret,acc,agr,Mkt_RF,SMB,HML,RF,ret_RF
permno,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10025,2012-10-01,0.055125,0.235141,0.323966,-0.0176,-0.0117,0.0356,0.0001,0.055025
10025,2012-11-01,-0.053652,0.230652,0.323870,0.0078,0.0061,-0.0083,0.0001,-0.053752
10025,2012-12-01,-0.020992,0.228728,0.323519,0.0118,0.0152,0.0353,0.0001,-0.021092
10025,2013-01-01,0.086949,0.230190,0.330866,0.0557,0.0031,0.0095,0.0000,0.086949
10025,2013-02-01,0.095527,0.237556,0.337472,0.0129,-0.0033,0.0010,0.0000,0.095527
...,...,...,...,...,...,...,...,...,...
93436,2021-08-01,0.070605,-0.646157,1.247115,0.0291,-0.0046,-0.0015,0.0000,0.070605
93436,2021-09-01,0.054042,-0.647764,1.245101,-0.0437,0.0075,0.0506,0.0000,0.054042
93436,2021-10-01,0.436530,-0.642846,1.262135,0.0665,-0.0230,-0.0048,0.0000,0.436530
93436,2021-11-01,0.027612,-0.645973,1.298456,-0.0155,-0.0136,-0.0042,0.0000,0.027612


### Fama-French factors

In [7]:
ff = pdr("F-F_Research_Data_Factors", "famafrench", start=2000)[0] / 100
data = data.merge(ff, left_on='date', right_index=True, how="left") 
data = data.rename(columns={"Mkt-RF": "Mkt_RF"})
data["ret_RF"] = data.ret - data.RF

### Rolling window betas

In [8]:
def rolling_betas(df):
    n = df.shape[0]
    if n >= 24:
        data = df.set_index("date") 
        model = RollingOLS.from_formula(
            "ret_RF ~ Mkt_RF + SMB + HML",
            window=min(n, 60),
            min_nobs=24,
            expanding=True,
            data=data
        )
        result = model.fit()
        return result.params[['Mkt_RF', 'SMB', 'HML']].dropna()
    else:
        pass
    
    
betas = data.groupby("permno").apply(rolling_betas)
# betas.head(3)

### Quantile sorts

In [9]:
def decile(df, char):
    deciles = pd.qcut(df[char], 10, labels=range(1,11))
    return df.groupby(deciles).ret.mean()

rets_acc = data.groupby("date").apply(lambda d: decile(d, "acc"))
rets_agr = data.groupby("date").apply(lambda d: decile(d, "agr"))

# rets_agr.head(3)