# SIZE & LIQUIDITY Factor Check
This notebook evaluates SIZE and LIQUIDITY using IC/RankIC and quantile group returns.

In [None]:
import os
import sys
import numpy as np
import pandas as pd

sys.path.insert(0, '/home/ubuntu/code/cb_cache')
import cb_cache as cbc


In [None]:
# Parameters
start_date = 20140102
end_date = 20251224
holding_days = 20  # forward return window
quantiles = 10
root_path = '/home/ubuntu/scripts/cache_all'


In [None]:
# Load data with cb_cache
symbols = cbc.get_all_symbols()
cache = cbc.EqCache(symbols, start_date, end_date, root_path=root_path)

dates = cache.daily.dates
close = cache.daily.close
size = cache.daily.SIZE
liquidity = cache.daily.LIQUIDITY


In [None]:
# Compute forward returns
fwd_ret = np.full_like(close, np.nan, dtype=np.float64)
if holding_days < close.shape[0]:
    fwd_ret[:-holding_days, :] = close[holding_days:, :] / close[:-holding_days, :] - 1.0

# Align for valid rows
valid_idx = np.arange(close.shape[0] - holding_days)
dates_eval = dates[valid_idx]
size_eval = size[valid_idx, :]
liq_eval = liquidity[valid_idx, :]
ret_eval = fwd_ret[valid_idx, :]


In [None]:
def rank_ic(factor_row, ret_row):
    mask = np.isfinite(factor_row) & np.isfinite(ret_row)
    if mask.sum() < 10:
        return np.nan, np.nan
    f = pd.Series(factor_row[mask]).rank(pct=True)
    r = pd.Series(ret_row[mask]).rank(pct=True)
    return f.corr(r), pd.Series(factor_row[mask]).corr(pd.Series(ret_row[mask]))

def quantile_returns(factor_row, ret_row, q=10):
    mask = np.isfinite(factor_row) & np.isfinite(ret_row)
    if mask.sum() < q:
        return np.full(q, np.nan)
    f = pd.Series(factor_row[mask])
    r = pd.Series(ret_row[mask])
    bins = pd.qcut(f.rank(method='first'), q, labels=False)
    return r.groupby(bins).mean().reindex(range(q)).to_numpy()


In [None]:
# IC / RankIC time series
size_ic, size_ric = [], []
liq_ic, liq_ric = [], []

for i in range(len(dates_eval)):
    ric, ic = rank_ic(size_eval[i], ret_eval[i])
    size_ric.append(ric)
    size_ic.append(ic)

    ric, ic = rank_ic(liq_eval[i], ret_eval[i])
    liq_ric.append(ric)
    liq_ic.append(ic)

ic_df = pd.DataFrame({
    'date': dates_eval,
    'size_ic': size_ic,
    'size_rank_ic': size_ric,
    'liq_ic': liq_ic,
    'liq_rank_ic': liq_ric,
})

ic_df.head()


In [None]:
# IC summary
def ic_summary(series):
    s = series.dropna()
    return pd.Series({
        'mean': s.mean(),
        'std': s.std(),
        'icir': s.mean() / s.std() if s.std() > 0 else np.nan,
        'count': s.size,
    })

summary = pd.DataFrame({
    'size_ic': ic_summary(ic_df['size_ic']),
    'size_rank_ic': ic_summary(ic_df['size_rank_ic']),
    'liq_ic': ic_summary(ic_df['liq_ic']),
    'liq_rank_ic': ic_summary(ic_df['liq_rank_ic']),
})
summary


In [None]:
# Quantile group returns (average across dates)
size_groups = []
liq_groups = []

for i in range(len(dates_eval)):
    size_groups.append(quantile_returns(size_eval[i], ret_eval[i], q=quantiles))
    liq_groups.append(quantile_returns(liq_eval[i], ret_eval[i], q=quantiles))

size_groups = np.nanmean(np.vstack(size_groups), axis=0)
liq_groups = np.nanmean(np.vstack(liq_groups), axis=0)

group_df = pd.DataFrame({
    'group': np.arange(1, quantiles + 1),
    'size_group_ret': size_groups,
    'liq_group_ret': liq_groups,
})
group_df


In [None]:
# Save outputs
ic_df.to_csv('/home/ubuntu/scripts/cache_all/size_liquidity_ic.csv', index=False)
group_df.to_csv('/home/ubuntu/scripts/cache_all/size_liquidity_group_returns.csv', index=False)
'/home/ubuntu/scripts/cache_all/size_liquidity_ic.csv', '/home/ubuntu/scripts/cache_all/size_liquidity_group_returns.csv'
