In [1]:
import datetime as dt
import numpy as np
import polars as pl

from factor_momentum import RollingPCA, FactorMomentumSignal, FACTORS
from factor_momentum._loaders import _scan_monthly_pc_returns
from sf_quant.data import load_factors


  from .autonotebook import tqdm as notebook_tqdm
2025-11-18 19:43:52,845	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
start = dt.date(2001,1,1); end = dt.date(2020,1,1); factor_returns = load_factors(start,end, FACTORS).lazy()
pca_engine = RollingPCA(n_components=5, lookback_window=100)

In [3]:
pc_returns = pca_engine.fit_transform_rolling_monthly(start, end, factor_returns)
pc_returns

Fitting rolling PCA...


Rolling PCA: 100%|██████████| 227/227 [00:00<00:00, 517.78it/s]


Transforming rolling PCA...


Rolling PCA: 100%|██████████| 227/227 [00:00<00:00, 577.43it/s]


date,pc0,pc1,pc2,pc3,pc4,state
date,f64,f64,f64,f64,f64,date
2001-02-01,-1.464667,-1.143598,0.180363,0.551172,1.143439,2001-02-01
2001-02-02,-1.481167,-0.150589,0.239467,0.383877,-0.276733,2001-02-01
2001-02-05,-0.997846,-0.57137,-0.960766,0.96356,0.80466,2001-02-01
2001-02-06,0.441192,-0.101307,-0.564052,-1.315537,-1.572022,2001-02-01
2001-02-07,0.071369,0.954979,-0.923436,1.117135,-1.701075,2001-02-01
…,…,…,…,…,…,…
2019-12-24,-0.116428,-1.345718,0.05031,-0.394584,-0.264584,2019-12-02
2019-12-26,1.138469,0.198301,1.053526,1.42063,0.196853,2019-12-02
2019-12-27,1.530432,0.588633,-0.567155,0.043286,-0.482651,2019-12-02
2019-12-30,0.845148,2.256438,-0.654312,-0.804148,1.330215,2019-12-02


In [None]:
pcs = (pc_returns.unpivot(index='date', variable_name='factor', value_name='ret').lazy()
.with_columns(
    pl.col('date').dt.truncate('1mo').alias('mo'),
)
.group_by(['factor', 'mo']).agg(
    pl.col('date').first().alias('month'),
    (np.log(1 + pl.col('ret')*.01).sum()).alias('ret'),
)
.drop('mo')
.sort(['factor', 'month'])
.with_columns(
    pl.col('ret').shift(1).over('factor').alias('lag_ret')
)
.collect()
)
pcs

In [None]:
pcs = _scan_monthly_pc_returns(start, end, 5, 100).collect()
pcs

In [None]:
signal = (pcs.with_columns(
    pl.col('lag_ret').rank('dense').over('month').alias('rank')
)
.with_columns(
    pl.when(pl.col('rank') < 3)
    .then(-1)
    .when(pl.col('rank') > 3)
    .then(1)
    .otherwise(0)
    .alias('signal')
)
)
ports = (signal
.group_by(['month', 'signal']).agg(
    pl.col('ret').sum()
)
.sort('month')
.pivot(on='signal', index='month')
)

ports

In [None]:
import matplotlib.pyplot as plt

In [None]:
dates = ports['month']

long = ports['1'].cum_sum()
short = ports['-1'].cum_sum()

plt.title("Logspace Cumulative PC Factor Returns")

plt.plot(dates, long, label="cross-sectional winners")
plt.plot(dates, short, label="cross-sectional losers")
plt.plot(dates, long-short, c='black', label="long-short")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
ret = (ports['1']-ports['-1']).mean() * 12
risk = (ports['1']-ports['-1']).std() * np.sqrt(12)

ret/risk

In [None]:
turnover = (signal.with_columns(
    pl.col('signal').shift(1).over('factor').alias('lag_signal')
)
.with_columns(
    pl.col('signal').sub(pl.col('lag_signal')).alias('turnover')
)
.group_by('month').agg(
    pl.col('turnover').abs().mean()
)
.sort('month')
)

turnover

In [None]:
mean_turnover = turnover['turnover']
dates = turnover['month']
plt.scatter(dates, mean_turnover)
plt.show()

In [4]:
pc_returns

date,pc0,pc1,pc2,pc3,pc4,state
date,f64,f64,f64,f64,f64,date
2001-02-01,-1.464667,-1.143598,0.180363,0.551172,1.143439,2001-02-01
2001-02-02,-1.481167,-0.150589,0.239467,0.383877,-0.276733,2001-02-01
2001-02-05,-0.997846,-0.57137,-0.960766,0.96356,0.80466,2001-02-01
2001-02-06,0.441192,-0.101307,-0.564052,-1.315537,-1.572022,2001-02-01
2001-02-07,0.071369,0.954979,-0.923436,1.117135,-1.701075,2001-02-01
…,…,…,…,…,…,…
2019-12-24,-0.116428,-1.345718,0.05031,-0.394584,-0.264584,2019-12-02
2019-12-26,1.138469,0.198301,1.053526,1.42063,0.196853,2019-12-02
2019-12-27,1.530432,0.588633,-0.567155,0.043286,-0.482651,2019-12-02
2019-12-30,0.845148,2.256438,-0.654312,-0.804148,1.330215,2019-12-02


In [10]:
(pc_returns.drop('state').unpivot(index='date', variable_name='factor', value_name='ret')
.with_columns(
    pl.col('ret').rank('dense').over('date').alias('rank')
)
.with_columns(
    pl.when(pl.col('rank') > 3)
    .then(1)
    .when(pl.col('rank') < 3)
    .then(-1)
    .otherwise(0)
    .alias('signal')
)
)

date,factor,ret,rank,signal
date,str,f64,u32,i32
2001-02-01,"""pc0""",-1.464667,1,-1
2001-02-02,"""pc0""",-1.481167,1,-1
2001-02-05,"""pc0""",-0.997846,1,-1
2001-02-06,"""pc0""",0.441192,5,1
2001-02-07,"""pc0""",0.071369,3,0
…,…,…,…,…
2019-12-24,"""pc4""",-0.264584,3,0
2019-12-26,"""pc4""",0.196853,1,-1
2019-12-27,"""pc4""",-0.482651,2,-1
2019-12-30,"""pc4""",1.330215,4,1


In [None]:
factor_returns.collect()