In [1]:
import pandas as pd
import numpy as np
import sqlite3
import statsmodels.formula.api as smf

In [2]:
fintech_research_sql = sqlite3.connect(database="../data/fintech_research.sqlite")

crsp_monthly = pd.read_sql_query(
  sql="SELECT permno, gvkey, month, ret_excess, mktcap FROM crsp_monthly",
  con=fintech_research_sql,
  parse_dates={"month"}
)

compustat = pd.read_sql_query(
  sql="SELECT datadate, gvkey, be FROM compustat",
  con=fintech_research_sql,
  parse_dates={"datadate"}
)

beta = pd.read_sql_query(
  sql="SELECT month, permno, beta_monthly FROM beta",
  con=fintech_research_sql,
  parse_dates={"month"}
)


In [4]:
characteristics = (compustat
  .assign(month=lambda x: x["datadate"].dt.to_period("M").dt.to_timestamp())
  .merge(crsp_monthly, how="left", on=["gvkey", "month"], )
  .merge(beta, how="left", on=["permno", "month"])
  .assign(
    bm=lambda x: x["be"]/x["mktcap"],
    log_mktcap=lambda x: np.log(x["mktcap"]),
    sorting_date=lambda x: x["month"]+pd.DateOffset(months=6)
  )
  .get(["gvkey", "bm", "log_mktcap", "beta_monthly", "sorting_date"])
  .rename(columns={"beta_monthly": "beta"})
)

data_fama_macbeth = (crsp_monthly
  .merge(characteristics, 
         how="left",
         left_on=["gvkey", "month"], right_on=["gvkey", "sorting_date"])
  .sort_values(["month", "permno"])
  .groupby("permno")
  .apply(lambda x: x.assign(
      beta=x["beta"].fillna(method="ffill"),
      bm=x["bm"].fillna(method="ffill"),
      log_mktcap=x["log_mktcap"].fillna(method="ffill")
    )
  )
  .reset_index(drop=True)  
)

data_fama_macbeth_lagged = (data_fama_macbeth
  .assign(month=lambda x: x["month"]-pd.DateOffset(months=1))
  .get(["permno", "month", "ret_excess"])
  .rename(columns={"ret_excess": "ret_excess_lead"})
)

data_fama_macbeth = (data_fama_macbeth
  .merge(data_fama_macbeth_lagged, how="left", on=["permno", "month"])
  .get(["permno", "month", "ret_excess_lead", "beta", "log_mktcap", "bm"])
  .dropna()
)

data_fama_macbeth.reset_index(drop=True,inplace=True)

  beta=x["beta"].fillna(method="ffill"),
  bm=x["bm"].fillna(method="ffill"),
  log_mktcap=x["log_mktcap"].fillna(method="ffill")
  .apply(lambda x: x.assign(


Unnamed: 0,permno,month,ret_excess_lead,beta,log_mktcap,bm
86,10001,1991-12-01,-0.055124,0.071625,2.421834,0.924067
87,10001,1992-01-01,-0.202800,0.071625,2.421834,0.924067
88,10001,1992-02-01,0.078715,0.071625,2.421834,0.924067
89,10001,1992-03-01,0.007438,0.071625,2.421834,0.924067
90,10001,1992-04-01,0.007726,0.071625,2.421834,0.924067
...,...,...,...,...,...,...
3109995,93436,2022-07-01,-0.074389,1.993018,13.903696,0.027663
3109996,93436,2022-08-01,-0.039489,1.993018,13.903696,0.027663
3109997,93436,2022-09-01,-0.144468,1.993018,13.903696,0.027663
3109998,93436,2022-10-01,-0.147226,1.993018,13.903696,0.027663


In [6]:
data_fama_macbeth[data_fama_macbeth.month == '2022-07-01']

Unnamed: 0,permno,month,ret_excess_lead,beta,log_mktcap,bm
3535,10026,2022-07-01,0.097871,0.574225,7.978111,0.311078
4017,10028,2022-07-01,0.089829,0.271522,4.696698,0.252106
4579,10032,2022-07-01,-0.004242,1.126384,7.834326,0.409334
6302,10044,2022-07-01,-0.016693,1.074600,3.478034,0.585505
7167,10051,2022-07-01,0.001331,1.030612,6.553640,0.156985
...,...,...,...,...,...,...
3108350,93397,2022-07-01,-0.034260,0.957573,5.653398,0.698852
3109227,93423,2022-07-01,-0.024838,2.381711,8.205534,0.002317
3109376,93426,2022-07-01,0.098485,1.379503,6.148091,0.604816
3109823,93434,2022-07-01,0.345468,0.742299,4.896067,0.557719


In [7]:
risk_premiums = (data_fama_macbeth
  .groupby("month")
  .apply(lambda x: smf.ols(
      formula="ret_excess_lead ~ beta + log_mktcap + bm", 
      data=x
    ).fit()
    .params
  )
  .reset_index()
)

risk_premiums

  .apply(lambda x: smf.ols(


Unnamed: 0,month,Intercept,beta,log_mktcap,bm
0,1975-07-01,-0.104370,-0.042856,0.022335,0.009851
1,1975-08-01,0.098846,-0.034995,-0.008354,-0.023935
2,1975-09-01,-0.136747,-0.011310,0.036643,0.027918
3,1975-10-01,-0.000994,-0.005435,0.005202,0.014019
4,1975-11-01,-0.015983,0.005502,-0.000847,-0.004475
...,...,...,...,...,...
564,2022-07-01,0.054999,0.010165,-0.010303,-0.003062
565,2022-08-01,-0.145408,-0.020045,0.008190,-0.002178
566,2022-09-01,-0.068514,0.027130,0.013827,0.037009
567,2022-10-01,-0.098123,0.005851,0.015662,0.003702


In [8]:
price_of_risk = (risk_premiums
  .melt(id_vars="month", var_name="factor", value_name="estimate")
  .groupby("factor")["estimate"]
  .apply(lambda x: pd.Series({
      "risk_premium": 100*x.mean(),
      "t_statistic": x.mean()/x.std()*np.sqrt(len(x))
    })
  )
  .reset_index()
  .pivot(index="factor", columns="level_1", values="estimate")
  .reset_index()
)

In [9]:
price_of_risk_newey_west = (risk_premiums
  .melt(id_vars="month", var_name="factor", value_name="estimate")
  .groupby("factor")
  .apply(lambda x: (
      x["estimate"].mean()/ 
        smf.ols("estimate ~ 1", x)
        .fit(cov_type="HAC", cov_kwds={"maxlags": 6}).bse
    )
  )
  .reset_index()
  .rename(columns={"Intercept": "t_statistic_newey_west"})
)

(price_of_risk
  .merge(price_of_risk_newey_west, on="factor")
  .round(3)
)


  .apply(lambda x: (


Unnamed: 0,factor,risk_premium,t_statistic,t_statistic_newey_west
0,Intercept,1.336,4.945,4.301
1,beta,0.005,0.043,0.04
2,bm,0.119,2.353,2.023
3,log_mktcap,-0.094,-2.493,-2.429
