In [55]:
import polars as pl
import datetime as dt
import numpy as np

from finance_byu.regtables import Regtable
import statsmodels.formula.api as smf

from factor_momentum._loaders import _scan_monthly_factor_returns

In [59]:
from dotenv import load_dotenv
import os 

load_dotenv()

tmp = os.getenv('TMP')

deciles = pl.read_parquet(f'{tmp}/deciles.parquet')

In [65]:
f_daily = pl.read_parquet(f'{tmp}/fama_french_factors.parquet').rename({"mkt_minus_rf": "mrk_excess"})

f_monthly = (f_daily.with_columns(
    pl.col('date').dt.truncate("1mo").alias('month')
)
.group_by(['month']).agg(
    [
        (np.log1p(pl.col(col).mul(.01)).sum())
        for col in f_daily.columns[1:]
    ]
)
.sort('month')
)

In [66]:
merged = deciles.join(f_monthly, on="month", how="inner")
merged = merged.with_columns(
    pl.col('spread').sub(pl.col('rf')).alias('spread_excess')
)
merged.head()

month,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,spread,mrk_excess,smb,hml,rmw,cma,rf,spread_excess
date,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2000-01-01,-0.077941,-0.053759,-0.044435,-0.041373,-0.034395,-0.055832,-0.044371,-0.04678,-0.016186,-0.026718,0.051223,-0.048226,0.040915,-0.014719,-0.065891,0.043973,0.0042,0.047023
2000-02-01,-0.068878,-0.040562,-0.045713,-0.032525,-0.035441,-0.016583,0.026069,0.062049,0.177871,0.224034,0.292912,0.02422,0.167663,-0.086641,-0.172029,-0.02094,0.0044,0.288512
2000-03-01,-0.159115,-0.078571,-0.002715,0.021903,0.048849,0.074039,0.080464,0.092996,0.072201,0.081938,0.241053,0.050417,-0.157378,0.083194,0.11359,-0.017522,0.0046,0.236454
2000-04-01,-0.332546,-0.189313,-0.069091,-0.048914,-0.031352,-0.000109,0.014821,0.008579,0.015332,0.013506,0.346053,-0.065696,-0.049117,0.061543,0.073317,0.057408,0.004559,0.341493
2000-05-01,-0.212628,-0.136876,-0.092194,-0.058831,-0.039797,-0.005392,-0.006733,0.006231,0.012894,0.02639,0.239019,-0.044762,-0.040313,0.0428,0.03868,0.011783,0.005059,0.233959


In [69]:
df = merged.to_pandas()

regformulas =  ['spread_excess ~ mrk_excess',
                'spread_excess ~ mrk_excess + smb + hml',
                'spread_excess ~ mrk_excess + smb + hml + rmw + cma']

reg = [smf.ols(f, df).fit() for f in regformulas]
tbl = Regtable(reg)
tbl.render()

Unnamed: 0,spread_excess,spread_excess.1,spread_excess.2
Intercept,0.080,0.079,0.074
,(15.46),(15.42),(14.29)
mrk_excess,-0.644,-0.614,-0.355
,(-5.50),(-5.11),(-2.60)
smb,,-0.082,0.071
,,(-0.47),(0.37)
hml,,0.462,-0.138
,,(2.84),(-0.62)
rmw,,,0.588
,,,(2.41)


In [70]:
np.exp(0.08)

np.float64(1.0832870676749586)