In [None]:
import datetime as dt
import numpy as np
import polars as pl

from factor_momentum import RollingPCA, FactorMomentumSignal, FACTORS
from factor_momentum._loaders import _scan_monthly_pc_returns
from sf_quant.data import load_factors


In [None]:
start = dt.date(2001,1,1); end = dt.date(2020,1,1); factor_returns = load_factors(start,end, FACTORS).lazy()
pca_engine = RollingPCA(n_components=5, lookback_window=30)

In [None]:
pcs = pca_engine.fit_transform_rolling_monthly(start, end, factor_returns)
pcs

In [None]:
(pcs.unpivot(index='date', variable_name='factor', value_name='ret').lazy()
.with_columns(
    pl.col('date').dt.truncate('1mo').alias('mo'),
    pl.col('ret').shift(1).over('factor').alias('lag_ret')
)
.group_by(['factor', 'mo']).agg(
    pl.col('date').first().alias('month'),
    (np.log(1 + pl.col('ret')*.01).sum()).alias('ret'),
    (np.log(1 + pl.col('lag_ret')*.01).sum()).alias('lag_ret')
)
.drop('mo')
.sort(['factor', 'month'])
.collect()
)

In [6]:
_scan_monthly_pc_returns(start, end, 5, 100).collect()

Fitting rolling PCA...


Rolling PCA: 100%|██████████| 227/227 [00:00<00:00, 540.04it/s]


Transforming rolling PCA...


Transforming PCA: 100%|██████████| 227/227 [00:00<00:00, 589.64it/s]


factor,month,ret,lag_ret
str,date,f64,f64
"""pc0""",2001-02-01,-0.087645,-0.075076
"""pc0""",2001-03-01,-0.010698,-0.053183
"""pc0""",2001-04-02,0.141216,0.156321
"""pc0""",2001-05-01,-0.02549,-0.02214
"""pc0""",2001-06-01,0.03978,0.029743
…,…,…,…
"""pc4""",2019-08-01,-0.011225,0.004061
"""pc4""",2019-09-03,0.084635,0.083869
"""pc4""",2019-10-01,-0.014121,0.014998
"""pc4""",2019-11-01,0.05841,0.04814
