In [6]:
#!/usr/bin/env python

import os
import pandas as pd
import numpy as np

# ============================================================
# CONFIG
# ============================================================

OUTPUT_DIR_TRADES = "./13-trading_output_regression_insp500_spyfilter_cap15"
OUTPUT_DIR_ANALYSIS = "./13d-ranking_analysis"
os.makedirs(OUTPUT_DIR_ANALYSIS, exist_ok=True)

RANKINGS_FILE = os.path.join(OUTPUT_DIR_TRADES, "13-weekly_rankings_pre_filter_cap15.parquet")
TRADES_FILE = os.path.join(OUTPUT_DIR_TRADES, "13-trades_regression_insp500_spyfilter_cap15.parquet")

print("=== ANALYZING WEEKLY RANKINGS VS ACTUAL TRADES ===\n")

# ============================================================
# LOAD DATA
# ============================================================

print("Loading data...")
rankings = pd.read_parquet(RANKINGS_FILE)
trades = pd.read_parquet(TRADES_FILE)

print(f"Loaded {len(rankings):,} ranking records")
print(f"Loaded {len(trades):,} trade records\n")

# ============================================================
# PREPARE TRADES DATA
# ============================================================

# Create a simplified trades table with key info
trades_summary = trades.copy()
trades_summary = trades_summary[trades_summary['type'].isin(['BUY', 'SELL'])]

# Group by signal_date and ticker to get trade info
trades_agg = trades_summary.groupby(['signal_date', 'ticker']).agg({
    'type': lambda x: ', '.join(x.unique()),  # BUY, SELL, or both
    'shares': 'sum',  # net shares traded
    'value': 'sum',   # total trade value
    'reason': lambda x: ', '.join(x.unique()),  # reasons
}).reset_index()

trades_agg.columns = ['signal_date', 'ticker', 'trade_type', 'trade_shares', 'trade_value', 'trade_reason']

# ============================================================
# MERGE RANKINGS WITH TRADES
# ============================================================

print("Merging rankings with trades...")

# Merge rankings with trades (left join to keep all rankings)
combined = rankings.merge(
    trades_agg,
    on=['signal_date', 'ticker'],
    how='left',
    indicator=True
)

# Add a clear indicator column
combined['traded'] = combined['_merge'] == 'both'
combined['traded_flag'] = combined['traded'].map({True: 'TRADED', False: 'NOT_TRADED'})

# Clean up the merge indicator column
combined = combined.drop('_merge', axis=1)

# Add some helpful analysis columns
combined['weight_change'] = combined['target_weight'] - combined['current_weight']
combined['shares_change'] = combined['target_shares'] - combined['current_shares']

# Sort by signal_date and rank
combined = combined.sort_values(['signal_date', 'slope_rank'])

print(f"Combined dataset: {len(combined):,} records\n")

# ============================================================
# SUMMARY STATISTICS
# ============================================================

print("=== SUMMARY STATISTICS ===\n")

total_weeks = combined['signal_date'].nunique()
total_rankings = len(combined)
total_traded = combined['traded'].sum()
total_not_traded = (~combined['traded']).sum()

print(f"Total weeks analyzed:           {total_weeks:,}")
print(f"Total stock rankings:           {total_rankings:,}")
print(f"Rankings that traded:           {total_traded:,} ({100*total_traded/total_rankings:.1f}%)")
print(f"Rankings that did NOT trade:    {total_not_traded:,} ({100*total_not_traded/total_rankings:.1f}%)\n")

# By SPY regime
print("=== BY SPY REGIME ===")
regime_summary = combined.groupby(['spy_above_200dma', 'traded_flag']).size().unstack(fill_value=0)
print(regime_summary)
print()

# Weekly summary
weekly_summary = combined.groupby('signal_date').agg({
    'ticker': 'count',
    'traded': 'sum',
}).reset_index()
weekly_summary.columns = ['signal_date', 'total_ranked', 'total_traded']
weekly_summary['pct_traded'] = 100 * weekly_summary['total_traded'] / weekly_summary['total_ranked']
weekly_summary['total_not_traded'] = weekly_summary['total_ranked'] - weekly_summary['total_traded']

print("=== SAMPLE WEEKLY BREAKDOWN (First 10 weeks) ===")
print(weekly_summary.head(10).to_string(index=False))
print()

# ============================================================
# ANALYZE REASONS FOR NOT TRADING
# ============================================================

print("=== WHY STOCKS DIDN'T TRADE ===\n")

not_traded = combined[~combined['traded']].copy()

# Categorize reasons for not trading
def categorize_no_trade(row):
    """Determine why a stock didn't trade"""
    
    # Already at target (within drift threshold)
    if abs(row['weight_change']) < 0.05:  # DRIFT_THRESHOLD
        return 'within_drift_threshold'
    
    # New position but too small
    if row['current_shares'] == 0 and row['target_weight'] < 0.003:  # MIN_NEW_POSITION_WEIGHT
        return 'new_position_too_small'
    
    # Trade value too small
    if abs(row['shares_change'] * row['close_adj']) < 10000:  # MIN_TRADE_VALUE
        return 'trade_value_too_small'
    
    # Would buy but SPY regime prevents it
    if row['shares_change'] > 0 and not row['spy_above_200dma']:
        return 'spy_regime_prevented_buy'
    
    # Insufficient cash
    if row['shares_change'] > 0:
        return 'insufficient_cash'
    
    return 'other'

not_traded['no_trade_reason'] = not_traded.apply(categorize_no_trade, axis=1)

reason_counts = not_traded['no_trade_reason'].value_counts()
print("Reasons stocks didn't trade:")
for reason, count in reason_counts.items():
    pct = 100 * count / len(not_traded)
    print(f"  {reason:30s}: {count:6,} ({pct:5.1f}%)")
print()

# ============================================================
# SAVE OUTPUT FILES
# ============================================================

print("=== SAVING OUTPUT FILES ===\n")

# Main combined file with all rankings and trade status
combined_file = os.path.join(OUTPUT_DIR_ANALYSIS, "13d-rankings_with_trade_status.parquet")
combined.to_parquet(combined_file, index=False)
print(f"✓ Saved: {combined_file}")

# CSV version for easy viewing in Excel
combined_csv = os.path.join(OUTPUT_DIR_ANALYSIS, "13d-rankings_with_trade_status.csv")
combined.to_csv(combined_csv, index=False)
print(f"✓ Saved: {combined_csv}")

# Weekly summary
weekly_file = os.path.join(OUTPUT_DIR_ANALYSIS, "13d-weekly_summary.csv")
weekly_summary.to_csv(weekly_file, index=False)
print(f"✓ Saved: {weekly_file}")

# Not traded with reasons
not_traded_file = os.path.join(OUTPUT_DIR_ANALYSIS, "13d-not_traded_with_reasons.csv")
not_traded.to_csv(not_traded_file, index=False)
print(f"✓ Saved: {not_traded_file}")

# Summary statistics
summary_stats = pd.DataFrame([{
    'total_weeks': total_weeks,
    'total_rankings': total_rankings,
    'total_traded': total_traded,
    'total_not_traded': total_not_traded,
    'pct_traded': 100 * total_traded / total_rankings,
    'pct_not_traded': 100 * total_not_traded / total_rankings,
}])

summary_file = os.path.join(OUTPUT_DIR_ANALYSIS, "13d-summary_statistics.csv")
summary_stats.to_csv(summary_file, index=False)
print(f"✓ Saved: {summary_file}")

print("\n=== COMPLETE ===")
print("\nMain output file columns:")
print("  - traded_flag: 'TRADED' or 'NOT_TRADED'")
print("  - All ranking info (slope, weights, targets)")
print("  - Trade info (if traded): trade_type, trade_shares, trade_value, trade_reason")
print("  - Analysis columns: weight_change, shares_change")

=== ANALYZING WEEKLY RANKINGS VS ACTUAL TRADES ===

Loading data...
Loaded 36,057 ranking records
Loaded 3,022 trade records

Merging rankings with trades...
Combined dataset: 36,057 records

=== SUMMARY STATISTICS ===

Total weeks analyzed:           1,392
Total stock rankings:           36,057
Rankings that traded:           1,690 (4.7%)
Rankings that did NOT trade:    34,367 (95.3%)

=== BY SPY REGIME ===
traded_flag       NOT_TRADED  TRADED
spy_above_200dma                    
False                   9008      31
True                   25359    1659

=== SAMPLE WEEKLY BREAKDOWN (First 10 weeks) ===
signal_date  total_ranked  total_traded  pct_traded  total_not_traded
 1999-01-06            25             8        32.0                17
 1999-01-13            25             0         0.0                25
 1999-01-20            25             2         8.0                23
 1999-01-27            25             1         4.0                24
 1999-02-03            25             1 

In [8]:
# After running the analysis script
import pandas as pd

# Load the main output
df = pd.read_parquet("./13d-ranking_analysis/13d-rankings_with_trade_status.parquet")

# Filter specific week
week = df[df['signal_date'] == '2010-11-24']
print(week[['ticker', 'slope_rank', 'traded_flag', 'target_weight']])

# See only stocks that didn't trade
not_traded = df[df['traded_flag'] == 'NOT_TRADED']

# Compare traded vs not traded
print(df.groupby('traded_flag')['target_weight'].describe())

      ticker  slope_rank traded_flag  target_weight
15878    KG1           1  NOT_TRADED       0.119809
15879    KMX           2  NOT_TRADED       0.027537
15880   BKNG           3  NOT_TRADED       0.001933
15881     CF           4  NOT_TRADED       0.035123
15882    NOV           5  NOT_TRADED       0.017310
15883    FCX           6  NOT_TRADED       0.018035
15884  CPPRQ           7  NOT_TRADED       0.023502
15885    TPR           8  NOT_TRADED       0.027990
15886    MEE           9  NOT_TRADED       0.011604
15887   EBAY          10  NOT_TRADED       0.092139
15888   AMZN          11  NOT_TRADED       0.102951
15889    TIF          12  NOT_TRADED       0.018011
15890    EMN          13  NOT_TRADED       0.040486
15891    MWW          14  NOT_TRADED       0.023961
15892    RIG          15  NOT_TRADED       0.015092
15893  BEAM2          16  NOT_TRADED       0.023003
15894    GGP          17  NOT_TRADED       0.053921
15895   NVDA          18      TRADED       0.119809
15896    PXD