In [3]:
import polars as pl
import numpy as np
import sf_quant.optimizer as sfo
import sf_quant.data as sfd
import datetime as dt
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
2026-02-02 12:05:23,329	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [4]:
start = dt.date(1995, 6, 30)
end = dt.date(2024, 12, 31)

columns = [
    'date',
    'barrid',
    'ticker',
    'price',
    'return',
    'specific_risk',
    'predicted_beta'
]

data = sfd.load_assets(
    start=start,
    end=end,
    in_universe=True,
    columns=columns
)

data

date,barrid,ticker,price,return,specific_risk,predicted_beta
date,str,str,f64,f64,f64,f64
2013-07-31,"""USA06Z1""","""MDXG""",6.26,-0.1595,55.056916,0.34349
2013-08-01,"""USA06Z1""","""MDXG""",6.32,0.9585,55.028021,0.353329
2013-08-02,"""USA06Z1""","""MDXG""",6.31,-0.1582,54.807402,0.363624
2013-08-05,"""USA06Z1""","""MDXG""",6.45,2.2187,54.76671,0.356596
2013-08-06,"""USA06Z1""","""MDXG""",6.29,-2.4806,54.692162,0.399196
…,…,…,…,…,…,…
2024-12-24,"""USBQOR1""","""ECG""",70.58,2.5872,26.800417,1.287294
2024-12-26,"""USBQOR1""","""ECG""",73.61,4.293,27.172284,1.288943
2024-12-27,"""USBQOR1""","""ECG""",69.85,-5.108,27.468053,1.294801
2024-12-30,"""USBQOR1""","""ECG""",66.87,-4.2663,27.353407,1.272111


In [5]:
def computeFactors(data: pl.DataFrame) -> pl.LazyFrame:
    """
    Compute factor signals (12m momentum, 1m mean reversion, simple BAB)
    and volatility-adjust them using Barra's specific_risk column.
    """
    df = (
        data.lazy()
        .sort(["barrid", "date"])
        # --- Converting return and specific risk into fractional space ---
        .with_columns(
            pl.col('specific_risk').truediv(100)
        )
        .with_columns(
            pl.col('return').truediv(100)
        )
        .with_columns(
            pl.col('return').log1p().alias('log_return')
        )
        # --- 12-month momentum (t-12 to t-2) ---
        .with_columns([
            pl.col("log_return")
              .rolling_sum(window_size=230)
              .over("barrid")
              .alias("momentum_temp")
        ])
        .with_columns([
            pl.col("momentum_temp").shift(22).over("barrid").alias("momentum_12m")
        ])
        # --- 1-month mean reversion ---
        .with_columns([
            pl.col("log_return")
              .rolling_sum(window_size=22)
              .over("barrid")
              .alias("meanrev_temp")
        ])
        .with_columns([
            (-pl.col("meanrev_temp").shift(1).over("barrid")).alias("meanrev_1m")
        ])
        # --- BAB = -predicted_beta ---
        .with_columns([
            (-pl.col("predicted_beta")).alias("bab")
        ])
        # --- Vol adjustment using Barra's specific_risk ---
        .with_columns([
            (pl.col("momentum_12m") / pl.col("specific_risk")).alias("momentum_12m_voladj"),
            (pl.col("meanrev_1m") / pl.col("specific_risk")).alias("meanrev_1m_voladj"),
            (pl.col("bab") / pl.col("specific_risk")).alias("bab_voladj")
        ])
        # Keep all original columns and add only the vol-adjusted factors
        .drop(["momentum_temp", "meanrev_temp", "log_return", "momentum_12m", "meanrev_1m", "bab"])  # Remove temporary and intermediate columns
    )
    return df

In [6]:

# computeFactors returns a LazyFrame
lazy_factors = computeFactors(data)

# trigger execution
factors = lazy_factors.collect()

factors

date,barrid,ticker,price,return,specific_risk,predicted_beta,momentum_12m_voladj,meanrev_1m_voladj,bab_voladj
date,str,str,f64,f64,f64,f64,f64,f64,f64
2013-07-31,"""USA06Z1""","""MDXG""",6.26,-0.001595,0.550569,0.34349,,,-0.623882
2013-08-01,"""USA06Z1""","""MDXG""",6.32,0.009585,0.55028,0.353329,,,-0.642089
2013-08-02,"""USA06Z1""","""MDXG""",6.31,-0.001582,0.548074,0.363624,,,-0.663458
2013-08-05,"""USA06Z1""","""MDXG""",6.45,0.022187,0.547667,0.356596,,,-0.651118
2013-08-06,"""USA06Z1""","""MDXG""",6.29,-0.024806,0.546922,0.399196,,,-0.729896
…,…,…,…,…,…,…,…,…,…
2024-12-24,"""USBQOR1""","""ECG""",70.58,0.025872,0.268004,1.287294,,,-4.803261
2024-12-26,"""USBQOR1""","""ECG""",73.61,0.04293,0.271723,1.288943,,,-4.743595
2024-12-27,"""USBQOR1""","""ECG""",69.85,-0.05108,0.274681,1.294801,,,-4.713843
2024-12-30,"""USBQOR1""","""ECG""",66.87,-0.042663,0.273534,1.272111,,,-4.650649


In [7]:
# BAB factor
factors_bab = (
    factors
    .select(["date", "barrid", "ticker", "price", "return", "specific_risk", "predicted_beta", "bab_voladj"])
    .rename({"bab_voladj": "signal_value"})
)

# Momentum factor
factors_mom = (
    factors
    .select(["date", "barrid", "ticker", "price", "return", "specific_risk", "predicted_beta", "momentum_12m_voladj"])
    .rename({"momentum_12m_voladj": "signal_value"})
)

# Mean reversion factor
factors_meanrev = (
    factors
    .select(["date", "barrid", "ticker", "price", "return", "specific_risk", "predicted_beta", "meanrev_1m_voladj"])
    .rename({"meanrev_1m_voladj": "signal_value"})
)

In [8]:
def task_compute_alphas(signal: pl.DataFrame) -> pl.DataFrame:
    """ 
    Compute the alphas for each security and date combo.

    Args:
        momentum (pl.DataFrame): Data frame containing barrid, date, specific_risk, and momentum columns.
    
    Returns:
        pl.DataFrame: Data frame containing barrid, date, specific_risk, momentum, score, and alpha columns.
    """
    return (
        signal
        .sort('barrid', 'date')
        .with_columns(
            pl.col('specific_risk').fill_null(strategy='forward').over('barrid')
        )
        .with_columns(
            pl.col('signal_value')
            .sub(pl.col('signal_value').mean())
            .truediv(pl.col('signal_value').std())
            .over('date')
            .alias('score')
        )
        .with_columns(
            pl.lit(0.05).mul('score').mul('specific_risk').alias('alpha')
        )
        .sort('barrid', 'date')
        .drop(["price", "return", "ticker", "score", "signal_value", "specific_risk"])

    )

mom_alpha = task_compute_alphas(factors_mom)

meanrev_alpha = task_compute_alphas(factors_meanrev)

bab_alpha = task_compute_alphas(factors_bab)

In [9]:
mom_alpha

date,barrid,predicted_beta,alpha
date,str,f64,f64
2013-07-31,"""USA06Z1""",0.34349,
2013-08-01,"""USA06Z1""",0.353329,
2013-08-02,"""USA06Z1""",0.363624,
2013-08-05,"""USA06Z1""",0.356596,
2013-08-06,"""USA06Z1""",0.399196,
…,…,…,…
2024-12-24,"""USBQOR1""",1.287294,
2024-12-26,"""USBQOR1""",1.288943,
2024-12-27,"""USBQOR1""",1.294801,
2024-12-30,"""USBQOR1""",1.272111,


In [10]:
meanrev_alpha

date,barrid,predicted_beta,alpha
date,str,f64,f64
2013-07-31,"""USA06Z1""",0.34349,
2013-08-01,"""USA06Z1""",0.353329,
2013-08-02,"""USA06Z1""",0.363624,
2013-08-05,"""USA06Z1""",0.356596,
2013-08-06,"""USA06Z1""",0.399196,
…,…,…,…
2024-12-24,"""USBQOR1""",1.287294,
2024-12-26,"""USBQOR1""",1.288943,
2024-12-27,"""USBQOR1""",1.294801,
2024-12-30,"""USBQOR1""",1.272111,


In [11]:
bab_alpha

date,barrid,predicted_beta,alpha
date,str,f64,f64
2013-07-31,"""USA06Z1""",0.34349,0.055818
2013-08-01,"""USA06Z1""",0.353329,0.054927
2013-08-02,"""USA06Z1""",0.363624,0.054472
2013-08-05,"""USA06Z1""",0.356596,0.054335
2013-08-06,"""USA06Z1""",0.399196,0.053613
…,…,…,…
2024-12-24,"""USBQOR1""",1.287294,-0.007353
2024-12-26,"""USBQOR1""",1.288943,-0.006862
2024-12-27,"""USBQOR1""",1.294801,-0.006638
2024-12-30,"""USBQOR1""",1.272111,-0.006288


In [26]:
signals = (mom_alpha
           .join(
               bab_alpha, on=["date", "barrid", "predicted_beta"], how="full"
               )
               .rename({"alpha": "momentum", "alpha_right": "betting_against_beta"})
               .select(
                   ["date", "barrid", "momentum", "betting_against_beta"]
               )
            .join(
               meanrev_alpha, on=["date", "barrid"], how="full"
               )
               .rename({"alpha": "reversal"})
               .select(
                   ["date", "barrid", "momentum", "betting_against_beta", "reversal"]
               )
            .fill_null(0)
            )
signals

date,barrid,momentum,betting_against_beta,reversal
date,str,f64,f64,f64
2013-07-31,"""USA06Z1""",0.0,0.055818,0.0
2013-08-01,"""USA06Z1""",0.0,0.054927,0.0
2013-08-02,"""USA06Z1""",0.0,0.054472,0.0
2013-08-05,"""USA06Z1""",0.0,0.054335,0.0
2013-08-06,"""USA06Z1""",0.0,0.053613,0.0
…,…,…,…,…
2024-12-24,"""USBQOR1""",0.0,-0.007353,0.0
2024-12-26,"""USBQOR1""",0.0,-0.006862,0.0
2024-12-27,"""USBQOR1""",0.0,-0.006638,0.0
2024-12-30,"""USBQOR1""",0.0,-0.006288,0.0


In [27]:
signals.write_parquet("signals.parquet")