In [1]:
import datetime as dt
import numpy as np
import polars as pl

from factor_momentum import PcaEngine, FACTORS, TMP
from sf_quant.data import load_factors

  from .autonotebook import tqdm as notebook_tqdm
2026-02-09 18:00:43,903	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [None]:
from research.factor_momentum_service import FactorMomentumService

In [2]:
start = dt.date(2001,1,1); end = dt.date(2020,1,1); factor_returns = load_factors(start,end, FACTORS).lazy()
pca_engine = PcaEngine(n_components=5, lookback_window=252)

In [3]:
pc_rolling_returns = pca_engine.fit_transform_rolling_monthly(factor_returns)

Fitting rolling PCA...


Rolling PCA: 100%|██████████| 227/227 [00:00<00:00, 1041.39it/s]


Transforming rolling PCA...


Rolling PCA: 100%|██████████| 227/227 [00:00<00:00, 1674.03it/s]


In [None]:
pc_expanding_returns = pca_engine.fit_transform_expanding_monthly(start, factor_returns)

In [4]:
pcs = (pc_rolling_returns.drop('state').unpivot(index='date', variable_name='factor', value_name='ret').lazy()
.with_columns(
    pl.col('date').dt.truncate('1mo').alias('mo'),
)
.group_by(['factor', 'mo']).agg(
    pl.col('date').first().alias('month'),
    (np.log(1 + pl.col('ret')*.01).sum()).alias('ret'),
)
.drop('mo')
.sort(['factor', 'month'])
.with_columns(
    pl.col('ret').shift(1).over('factor').alias('lag_ret')
)
.collect()
)

In [None]:
service = FactorMomentumService(start=start, end=end)

pcs = service.get_rolling_pcs(n_components=5, lookback_window=252)

In [None]:
pcs

In [5]:
signal = (pcs.with_columns(
    pl.col('lag_ret').rank('dense').over('month').alias('rank')
)
.with_columns(
    pl.when(pl.col('rank') < 3)
    .then(-1)
    .when(pl.col('rank') > 3)
    .then(1)
    .otherwise(0)
    .alias('signal')
)
)
ports = (signal
.group_by(['month', 'signal']).agg(
    pl.col('ret').sum()
)
.sort('month')
.pivot(on='signal', index='month')
.with_columns(
    (pl.col('1') - pl.col('-1')).alias('ls')
)
)

In [8]:
ports

month,0,-1,1,ls
date,f64,f64,f64,f64
2001-02-01,-0.200769,,,
2001-03-01,0.059192,0.053941,-0.065376,-0.119316
2001-04-02,0.141216,-0.023559,0.180721,0.20428
2001-05-01,0.045299,-0.080984,-0.047048,0.033935
2001-06-01,0.043115,0.002455,0.165489,0.163034
…,…,…,…,…
2019-08-01,-0.114326,0.007111,-0.022918,-0.030028
2019-09-03,-0.018106,0.081426,0.093327,0.011901
2019-10-01,0.006172,0.042372,0.008778,-0.033594
2019-11-01,-0.001066,0.002433,-0.023621,-0.026054


In [None]:
signal.write_parquet(f"PCA_signals_expanding.parquet")

In [None]:
import matplotlib.pyplot as plt

In [None]:
dates = ports['month']

long = ports['1'].cum_sum()
short = ports['-1'].cum_sum()
ls = ports['ls'].cum_sum()

plt.figure(figsize=(10, 6))
plt.title("Logspace Cumulative PC Factor Returns (Rolling Window)")

plt.plot(dates, long, label="cross-sectional winners", c='green', linestyle='--')
plt.plot(dates, short, label="cross-sectional losers", c='red', linestyle='--')
plt.plot(dates, ls, label="long-short", c='black', linewidth=2)
plt.legend()
plt.grid(True)
plt.show()

In [None]:
ret = (ports['ls']).mean() * 12
risk = (ports['ls']).std() * np.sqrt(12)

ret/risk

In [None]:
pc_expanding_returns.describe()

In [None]:
pca_engine.states

In [None]:
dates = list(pca_engine.states.keys())
explained_var = [pca_engine.states[date]["explained_var"] for date in pca_engine.states.keys()]

In [None]:
plt.figure(figsize=(10, 3))
plt.plot(dates, explained_var)
plt.title("Explained Variance by each PC over Time")
plt.xlabel("Date")
plt.ylabel("Explained Variance")
plt.grid(True)
plt.show()