In [1]:
import sf_quant.data as sfd
import polars as pl
import datetime as dt

start = dt.date(1995, 6, 30)
end = dt.date(2024, 12, 31)

columns = [
    'date',
    'barrid',
    'ticker',
    'price',
    'return',
    'specific_risk',
    'predicted_beta'
]

data = sfd.load_assets(
    start=start,
    end=end,
    in_universe=True,
    columns=columns
)

data

  from .autonotebook import tqdm as notebook_tqdm
2026-02-02 11:53:14,984	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


date,barrid,ticker,price,return,specific_risk,predicted_beta
date,str,str,f64,f64,f64,f64
2013-07-31,"""USA06Z1""","""MDXG""",6.26,-0.1595,55.056916,0.34349
2013-08-01,"""USA06Z1""","""MDXG""",6.32,0.9585,55.028021,0.353329
2013-08-02,"""USA06Z1""","""MDXG""",6.31,-0.1582,54.807402,0.363624
2013-08-05,"""USA06Z1""","""MDXG""",6.45,2.2187,54.76671,0.356596
2013-08-06,"""USA06Z1""","""MDXG""",6.29,-2.4806,54.692162,0.399196
…,…,…,…,…,…,…
2024-12-24,"""USBQOR1""","""ECG""",70.58,2.5872,26.800417,1.287294
2024-12-26,"""USBQOR1""","""ECG""",73.61,4.293,27.172284,1.288943
2024-12-27,"""USBQOR1""","""ECG""",69.85,-5.108,27.468053,1.294801
2024-12-30,"""USBQOR1""","""ECG""",66.87,-4.2663,27.353407,1.272111


In [2]:
def computeFactors(data: pl.DataFrame) -> pl.LazyFrame:
    """
    Compute factor signals (12m momentum, 1m mean reversion, simple BAB)
    and volatility-adjust them using Barra's specific_risk column.
    """
    df = (
        data.lazy()
        .sort(["barrid", "date"])
        # --- Converting return and specific risk into fractional space ---
        .with_columns(
            pl.col('specific_risk').truediv(100)
        )
        .with_columns(
            pl.col('return').truediv(100)
        )
        .with_columns(
            pl.col('return').log1p().alias('log_return')
        )
        # --- 12-month momentum (t-12 to t-2) ---
        .with_columns([
            pl.col("log_return")
              .rolling_sum(window_size=230)
              .over("barrid")
              .alias("momentum_temp")
        ])
        .with_columns([
            pl.col("momentum_temp").shift(22).over("barrid").alias("momentum_12m")
        ])
        # --- 1-month mean reversion ---
        .with_columns([
            pl.col("log_return")
              .rolling_sum(window_size=22)
              .over("barrid")
              .alias("meanrev_temp")
        ])
        .with_columns([
            (-pl.col("meanrev_temp").shift(1).over("barrid")).alias("meanrev_1m")
        ])
        # --- BAB = -predicted_beta ---
        .with_columns([
            (-pl.col("predicted_beta")).alias("bab")
        ])
        # --- Vol adjustment using Barra's specific_risk ---
        .with_columns([
            (pl.col("momentum_12m") / pl.col("specific_risk")).alias("momentum_12m_voladj"),
            (pl.col("meanrev_1m") / pl.col("specific_risk")).alias("meanrev_1m_voladj"),
            (pl.col("bab") / pl.col("specific_risk")).alias("bab_voladj")
        ])
        # Keep all original columns and add only the vol-adjusted factors
        .drop(["momentum_temp", "meanrev_temp", "log_return", "momentum_12m", "meanrev_1m", "bab"])  # Remove temporary and intermediate columns
    )
    return df

In [4]:
# computeFactors returns a LazyFrame
lazy_factors = computeFactors(data)

# trigger execution
factors = lazy_factors.collect().drop_nans().drop_nulls()

factors

date,barrid,ticker,price,return,specific_risk,predicted_beta,momentum_12m_voladj,meanrev_1m_voladj,bab_voladj
date,str,str,f64,f64,f64,f64,f64,f64,f64
2014-07-30,"""USA06Z1""","""MDXG""",7.1,0.009957,0.489261,1.596398,0.207508,-0.070999,-3.262879
2014-07-31,"""USA06Z1""","""MDXG""",6.91,-0.026761,0.488345,1.567075,0.254954,-0.046676,-3.208952
2014-08-01,"""USA06Z1""","""MDXG""",6.81,-0.014472,0.486591,1.571838,0.343565,0.052848,-3.230305
2014-08-04,"""USA06Z1""","""MDXG""",7.08,0.039648,0.487891,1.601876,0.326598,0.189598,-3.283266
2014-08-05,"""USA06Z1""","""MDXG""",7.05,-0.004237,0.486467,1.59584,0.293525,0.090871,-3.280472
…,…,…,…,…,…,…,…,…,…
2024-12-27,"""USBPJV1""","""NLOP""",30.81,-0.019414,0.5150243,0.910719,1.024101,-0.019879,-1.768303
2024-12-30,"""USBPJV1""","""NLOP""",31.08,0.008763,0.513507,0.87324,1.018123,0.047452,-1.700543
2024-12-31,"""USBPJV1""","""NLOP""",31.21,0.004183,0.512844,0.890855,1.077896,0.016244,-1.737087
2024-12-30,"""USBPM41""","""WS""",31.32,-0.021556,0.348714,1.578052,1.188024,0.967815,-4.525346
