# Ridge vs PCR vs PLS vs Baseline

Compares pooled and cross-sectional IC results using report CSV outputs.

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

REPORT_DIR = Path('../data/processed/reports/signal_ic') if Path('../data/processed/reports/signal_ic').exists() else Path('data/processed/reports/signal_ic')
assert REPORT_DIR.exists(), f'Missing report dir: {REPORT_DIR}'
REPORT_DIR

In [None]:
ridge_pooled = pd.read_csv(REPORT_DIR / 'ridge_pooled_ic.csv')
pcr_pooled = pd.read_csv(REPORT_DIR / 'pcr_pooled_ic.csv')
pls_pooled = pd.read_csv(REPORT_DIR / 'pls_regression_pooled_ic.csv')
robust_pcr_pooled = pd.read_csv(REPORT_DIR / 'robust_pcr_pooled_ic.csv')

ridge_cs = pd.read_csv(REPORT_DIR / 'ridge_cross_sectional_ic.csv')
pcr_cs = pd.read_csv(REPORT_DIR / 'pcr_cross_sectional_ic.csv')
pls_cs = pd.read_csv(REPORT_DIR / 'pls_regression_cross_sectional_ic.csv')
robust_pcr_cs = pd.read_csv(REPORT_DIR / 'robust_pcr_cross_sectional_ic.csv')

In [None]:
pooled = ridge_pooled[['Entry Time', 'Baseline']].copy()
pooled = pooled.merge(ridge_pooled[['Entry Time', 'Ridge', 'Improvement']].rename(columns={'Improvement': 'Ridge Improvement'}), on='Entry Time')
pooled = pooled.merge(pcr_pooled[['Entry Time', 'PCR', 'Improvement']].rename(columns={'Improvement': 'PCR Improvement'}), on='Entry Time')
pooled = pooled.merge(pls_pooled[['Entry Time', 'PLS Regression', 'Improvement']].rename(columns={'Improvement': 'PLS Improvement'}), on='Entry Time')
pooled = pooled.merge(robust_pcr_pooled[['Entry Time', 'Robust PCR', 'Improvement']].rename(columns={'Improvement': 'Robust PCR Improvement'}), on='Entry Time')
pooled

In [None]:
cs = ridge_cs[['Entry Time', 'Baseline IC']].copy()
cs = cs.merge(ridge_cs[['Entry Time', 'Ridge IC', 'Improvement']].rename(columns={'Improvement': 'Ridge Improvement'}), on='Entry Time')
cs = cs.merge(pcr_cs[['Entry Time', 'PCR IC', 'Improvement']].rename(columns={'Improvement': 'PCR Improvement'}), on='Entry Time')
cs = cs.merge(pls_cs[['Entry Time', 'PLS Regression IC', 'Improvement']].rename(columns={'Improvement': 'PLS Improvement'}), on='Entry Time')
cs = cs.merge(robust_pcr_cs[['Entry Time', 'Robust PCR IC', 'Improvement']].rename(columns={'Improvement': 'Robust PCR Improvement'}), on='Entry Time')
cs

In [None]:
summary = pd.DataFrame({
    'Model': ['Ridge', 'PCR', 'PLS', 'Robust PCR'],
    'Mean Pooled Improvement': [
        ridge_pooled['Improvement'].mean(),
        pcr_pooled['Improvement'].mean(),
        pls_pooled['Improvement'].mean(),
        robust_pcr_pooled['Improvement'].mean(),
    ],
    'Mean Cross-Sectional Improvement': [
        ridge_cs['Improvement'].mean(),
        pcr_cs['Improvement'].mean(),
        pls_cs['Improvement'].mean(),
        robust_pcr_cs['Improvement'].mean(),
    ],
})
summary.sort_values('Mean Cross-Sectional Improvement', ascending=False)

In [None]:
ridge_ticker = pd.read_csv(REPORT_DIR / 'ridge_per_ticker_1430.csv', index_col=0)
pcr_ticker = pd.read_csv(REPORT_DIR / 'pcr_per_ticker_1430.csv', index_col=0)
pls_ticker = pd.read_csv(REPORT_DIR / 'pls_regression_per_ticker_1430.csv', index_col=0)
robust_pcr_ticker = pd.read_csv(REPORT_DIR / 'robust_pcr_per_ticker_1430.csv', index_col=0)

per_ticker = ridge_ticker[['Baseline IC', 'Ridge IC']].copy()
per_ticker = per_ticker.join(pcr_ticker[['PCR IC']], how='outer')
per_ticker = per_ticker.join(pls_ticker[['PLS Regression IC']], how='outer')
per_ticker = per_ticker.join(robust_pcr_ticker[['Robust PCR IC']], how='outer')
per_ticker['Ridge Improvement'] = per_ticker['Ridge IC'] - per_ticker['Baseline IC']
per_ticker['PCR Improvement'] = per_ticker['PCR IC'] - per_ticker['Baseline IC']
per_ticker['PLS Improvement'] = per_ticker['PLS Regression IC'] - per_ticker['Baseline IC']
per_ticker['Robust PCR Improvement'] = per_ticker['Robust PCR IC'] - per_ticker['Baseline IC']
per_ticker = per_ticker.sort_index()
per_ticker

In [None]:
per_ticker_summary = pd.DataFrame({
    'Model': ['Ridge', 'PCR', 'PLS', 'Robust PCR'],
    'Mean Improvement': [
        per_ticker['Ridge Improvement'].mean(),
        per_ticker['PCR Improvement'].mean(),
        per_ticker['PLS Improvement'].mean(),
        per_ticker['Robust PCR Improvement'].mean(),
    ],
    'Tickers Improved (>0)': [
        (per_ticker['Ridge Improvement'] > 0).sum(),
        (per_ticker['PCR Improvement'] > 0).sum(),
        (per_ticker['PLS Improvement'] > 0).sum(),
        (per_ticker['Robust PCR Improvement'] > 0).sum(),
    ]
})

best_model_by_ticker = pd.DataFrame(index=per_ticker.index)
best_cols = ['Ridge Improvement', 'PCR Improvement', 'PLS Improvement', 'Robust PCR Improvement']
best_model_by_ticker['Best Model'] = per_ticker[best_cols].idxmax(axis=1).str.replace(' Improvement', '', regex=False)
best_model_by_ticker['Best Improvement'] = per_ticker[best_cols].max(axis=1)

display(per_ticker_summary.sort_values('Mean Improvement', ascending=False))
display(best_model_by_ticker.sort_values('Best Improvement', ascending=False))

In [None]:
fig, ax = plt.subplots(figsize=(9, 4))
ax.plot(pooled['Entry Time'], pooled['Baseline'], marker='o', label='Baseline')
ax.plot(pooled['Entry Time'], pooled['Ridge'], marker='o', label='Ridge')
ax.plot(pooled['Entry Time'], pooled['PCR'], marker='o', label='PCR')
ax.plot(pooled['Entry Time'], pooled['PLS Regression'], marker='o', label='PLS')
ax.plot(pooled['Entry Time'], pooled['Robust PCR'], marker='o', label='Robust PCR')
ax.set_title('Pooled IC by Entry Time')
ax.set_ylabel('IC')
ax.legend()
ax.grid(alpha=0.2)
plt.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(9, 4))
ax.plot(pooled['Entry Time'], pooled['Ridge Improvement'], marker='o', label='Ridge')
ax.plot(pooled['Entry Time'], pooled['PCR Improvement'], marker='o', label='PCR')
ax.plot(pooled['Entry Time'], pooled['PLS Improvement'], marker='o', label='PLS')
ax.plot(pooled['Entry Time'], pooled['Robust PCR Improvement'], marker='o', label='Robust PCR')
ax.axhline(0.0, color='black', linewidth=1)
ax.set_title('Pooled IC Improvement vs Baseline')
ax.set_ylabel('Improvement')
ax.legend()
ax.grid(alpha=0.2)
plt.tight_layout()