# Polymarket Win Rate Analysis

Analyzing calibration and systematic mispricing in Polymarket prediction markets.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path

# Plot settings
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 7)
plt.rcParams['font.size'] = 11

## Load Data

In [None]:
# Find and load the most recent data file
data_dir = Path("polymarket_data")
data_files = list(data_dir.glob("polymarket_trades_*.csv"))

if not data_files:
    print("No data files found! Run polymarket_data_collector.py first.")
else:
    latest_file = max(data_files, key=lambda p: p.stat().st_mtime)
    print(f"Loading: {latest_file}")
    
    df = pd.read_csv(latest_file)
    df['trade_timestamp'] = pd.to_datetime(df['trade_timestamp'])
    print(f"Loaded {len(df):,} trades from {df['condition_id'].nunique():,} markets")

## Data Overview

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# Quick stats
print(f"Date range: {df['trade_timestamp'].min()} to {df['trade_timestamp'].max()}")
print(f"\nSide distribution:")
print(df['side'].value_counts())
print(f"\nOutcome distribution:")
print(df['outcome'].value_counts())
print(f"\nOverall win rate: {df['won'].mean():.2%}")
print(f"Average price: {df['price'].mean():.3f}")

In [None]:
# Category distribution
df['category'].value_counts().head(15)

In [None]:
# Trades per market distribution
trades_per_market = df.groupby('condition_id').size()
print(f"Trades per market:")
print(f"  Min: {trades_per_market.min()}, Max: {trades_per_market.max()}, Median: {trades_per_market.median():.0f}")
print(f"  Markets with <10 trades:  {(trades_per_market < 10).sum()}")
print(f"  Markets with <50 trades:  {(trades_per_market < 50).sum()}")
print(f"  Markets with 100+ trades: {(trades_per_market >= 100).sum()}")

## Helper Functions

In [None]:
def wilson_ci(successes, n, confidence=0.95):
    """Wilson score confidence interval for proportions"""
    if n == 0:
        return 0, 0
    z = stats.norm.ppf((1 + confidence) / 2)
    p = successes / n
    denominator = 1 + z**2 / n
    center = (p + z**2 / (2*n)) / denominator
    margin = z * np.sqrt(p*(1-p)/n + z**2/(4*n**2)) / denominator
    return max(0, center - margin), min(1, center + margin)


def calculate_win_rates(data, price_bins=20, min_samples=30):
    """Calculate win rate by price bucket"""
    data = data.copy()
    data['price_bin'] = pd.cut(data['price'], bins=price_bins, include_lowest=True)
    
    results = []
    for price_bin in data['price_bin'].cat.categories:
        bin_data = data[data['price_bin'] == price_bin]
        if len(bin_data) < min_samples:
            continue
        
        n = len(bin_data)
        wins = bin_data['won'].sum()
        win_rate = wins / n
        ci_low, ci_high = wilson_ci(wins, n)
        
        results.append({
            'price_bin': price_bin,
            'price_midpoint': price_bin.mid,
            'win_rate': win_rate,
            'ci_low': ci_low,
            'ci_high': ci_high,
            'n_trades': n,
            'n_markets': bin_data['condition_id'].nunique()
        })
    
    return pd.DataFrame(results)

## Filter to BUY trades only

For calibration analysis, we focus on BUY trades: "Did what I bought win?"

In [None]:
buys = df[df['side'] == 'BUY'].copy()
print(f"BUY trades: {len(buys):,} ({len(buys)/len(df):.1%} of all trades)")

---
# Weighted Calibration Setup

Inverse weight each trade by market size to prevent large markets from dominating.
- Markets with >= 100 trades: contribute equally (total weight = 1.0 each)
- Markets with < 100 trades: downweighted proportionally (total weight = n/100)

In [None]:
# =============================================================================
# MARKET-WEIGHTED CALIBRATION FUNCTIONS
# =============================================================================
# Implements inverse market weighting so large markets don't dominate.
# Markets >= weight_cap trades contribute equally; smaller markets are downweighted.

def add_market_weights(data, weight_cap=None, min_trades=None):
    """
    Add inverse market weights to trade data.
    
    Args:
        data: DataFrame with trades (must have 'condition_id' column)
        weight_cap: If set, cap the effective trade count for weighting.
                   Markets >= cap contribute equally (weight = 1/n_trades, total = 1.0)
                   Markets < cap are downweighted (total contribution = n_trades/cap)
        min_trades: If set, exclude markets with fewer than this many trades.
    
    Returns:
        DataFrame with 'market_weight' column added
    """
    data = data.copy()
    
    # Count trades per market
    trades_per_market = data.groupby('condition_id').size().rename('market_trade_count')
    data = data.merge(trades_per_market, on='condition_id', how='left')
    
    # Optional: filter out small markets
    if min_trades is not None:
        before_filter = len(data)
        data = data[data['market_trade_count'] >= min_trades]
        print(f"Filtered out markets with <{min_trades} trades: {before_filter - len(data):,} trades removed")
    
    # Calculate weights
    if weight_cap is not None:
        # Capped inverse weighting
        effective_count = data['market_trade_count'].clip(lower=weight_cap)
        data['market_weight'] = 1 / effective_count
    else:
        # Pure inverse weighting
        data['market_weight'] = 1 / data['market_trade_count']
    
    # Normalize weights to sum to number of unique markets
    n_markets = data['condition_id'].nunique()
    weight_sum = data['market_weight'].sum()
    data['market_weight'] = data['market_weight'] * (n_markets / weight_sum)
    
    return data


def weighted_wilson_ci(weighted_wins, weighted_n, confidence=0.95):
    """Approximate Wilson CI for weighted proportions."""
    if weighted_n == 0:
        return 0, 0
    
    p = weighted_wins / weighted_n
    n_eff = weighted_n
    
    z = stats.norm.ppf((1 + confidence) / 2)
    denominator = 1 + z**2 / n_eff
    center = (p + z**2 / (2*n_eff)) / denominator
    margin = z * np.sqrt(p*(1-p)/n_eff + z**2/(4*n_eff**2)) / denominator
    
    return max(0, center - margin), min(1, center + margin)


def calculate_weighted_win_rates(data, price_bins=20, min_markets=10):
    """
    Calculate market-weighted win rate by price bucket.
    
    Args:
        data: DataFrame with 'price', 'won', 'condition_id', 'market_weight' columns
        price_bins: Number of price bins
        min_markets: Minimum unique markets required in a bin
    
    Returns:
        DataFrame with calibration results
    """
    if 'market_weight' not in data.columns:
        raise ValueError("Data must have 'market_weight' column. Run add_market_weights() first.")
    
    data = data.copy()
    data['price_bin'] = pd.cut(data['price'], bins=price_bins, include_lowest=True)
    
    results = []
    for price_bin in data['price_bin'].cat.categories:
        bin_data = data[data['price_bin'] == price_bin]
        
        n_markets = bin_data['condition_id'].nunique()
        if n_markets < min_markets:
            continue
        
        # Weighted calculations
        weighted_n = bin_data['market_weight'].sum()
        weighted_wins = (bin_data['won'] * bin_data['market_weight']).sum()
        weighted_win_rate = weighted_wins / weighted_n if weighted_n > 0 else 0
        
        ci_low, ci_high = weighted_wilson_ci(weighted_wins, weighted_n)
        unweighted_win_rate = bin_data['won'].mean()
        
        results.append({
            'price_bin': price_bin,
            'price_midpoint': price_bin.mid,
            'weighted_win_rate': weighted_win_rate,
            'unweighted_win_rate': unweighted_win_rate,
            'ci_low': ci_low,
            'ci_high': ci_high,
            'n_trades': len(bin_data),
            'n_markets': n_markets,
            'effective_n': weighted_n,
        })
    
    return pd.DataFrame(results)


# =============================================================================
# PREPARE WEIGHTED DATA
# =============================================================================

# Add weights: markets >= 100 trades contribute equally, smaller markets downweighted
buys_weighted = add_market_weights(buys, weight_cap=100, min_trades=10)

print(f"\nWeighted data summary:")
print(f"  Total trades: {len(buys_weighted):,}")
print(f"  Unique markets: {buys_weighted['condition_id'].nunique():,}")
print(f"  Weight range: {buys_weighted['market_weight'].min():.4f} - {buys_weighted['market_weight'].max():.4f}")

In [None]:
# =============================================================================
# MAIN CALIBRATION ANALYSIS (Weighted vs Unweighted)
# =============================================================================

# Calculate both weighted and unweighted results
weighted_results = calculate_weighted_win_rates(buys_weighted, price_bins=20, min_markets=15)
unweighted_results = calculate_win_rates(buys, price_bins=20, min_samples=50)

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Left: Weighted calibration
ax1 = axes[0]
ax1.plot(weighted_results['price_midpoint'] * 100, weighted_results['weighted_win_rate'] * 100, 
        'o-', color='#3498db', linewidth=2.5, markersize=6, label='Observed', zorder=3)
ax1.fill_between(weighted_results['price_midpoint'] * 100, 
                 weighted_results['ci_low'] * 100, 
                 weighted_results['ci_high'] * 100,
                 alpha=0.3, color='#3498db')
ax1.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=2, label='Perfect', zorder=2)
ax1.set_xlabel('Price (cents)')
ax1.set_ylabel('Win %')
ax1.set_title('WEIGHTED Calibration', fontsize=13, fontweight='bold')
ax1.set_xlim(0, 100)
ax1.set_ylim(0, 100)
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)

# Middle: Unweighted calibration
ax2 = axes[1]
ax2.plot(unweighted_results['price_midpoint'] * 100, unweighted_results['win_rate'] * 100, 
        'o-', color='#e74c3c', linewidth=2.5, markersize=6, label='Observed', zorder=3)
ax2.fill_between(unweighted_results['price_midpoint'] * 100, 
                 unweighted_results['ci_low'] * 100, 
                 unweighted_results['ci_high'] * 100,
                 alpha=0.3, color='#e74c3c')
ax2.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=2, label='Perfect', zorder=2)
ax2.set_xlabel('Price (cents)')
ax2.set_ylabel('Win %')
ax2.set_title('UNWEIGHTED Calibration', fontsize=13, fontweight='bold')
ax2.set_xlim(0, 100)
ax2.set_ylim(0, 100)
ax2.legend(loc='upper left')
ax2.grid(True, alpha=0.3)

# Right: Deviation comparison
ax3 = axes[2]
weighted_dev = (weighted_results['weighted_win_rate'] - weighted_results['price_midpoint']) * 100
unweighted_dev = (unweighted_results['win_rate'] - unweighted_results['price_midpoint']) * 100

ax3.plot(weighted_results['price_midpoint'] * 100, weighted_dev, 
         'o-', color='#3498db', linewidth=2.5, markersize=6, label='Weighted')
ax3.plot(unweighted_results['price_midpoint'] * 100, unweighted_dev, 
         'o-', color='#e74c3c', linewidth=2.5, markersize=6, label='Unweighted')
ax3.axhline(y=0, color='#2ecc71', linestyle='--', linewidth=2)
ax3.fill_between([0, 100], -5, 5, alpha=0.1, color='gray')
ax3.set_xlabel('Price (cents)')
ax3.set_ylabel('Deviation (cents)')
ax3.set_title('Deviation Comparison', fontsize=13, fontweight='bold')
ax3.set_xlim(0, 100)
ax3.set_ylim(-15, 15)
ax3.legend(loc='upper left')
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary stats
w_cal_error = np.abs(weighted_results['weighted_win_rate'] - weighted_results['price_midpoint']).mean()
u_cal_error = np.abs(unweighted_results['win_rate'] - unweighted_results['price_midpoint']).mean()

print(f"Mean Absolute Calibration Error:")
print(f"  Weighted:   {w_cal_error*100:.2f} cents")
print(f"  Unweighted: {u_cal_error*100:.2f} cents")

---
# YES vs NO Comparison (Weighted vs Unweighted)

Are YES contracts priced differently from NO contracts?

In [None]:
# Split into YES and NO trades (both weighted and unweighted)
yes_trades = buys[buys['outcome'] == 'Yes'].copy()
no_trades = buys[buys['outcome'] == 'No'].copy()

yes_weighted = buys_weighted[buys_weighted['outcome'] == 'Yes'].copy()
no_weighted = buys_weighted[buys_weighted['outcome'] == 'No'].copy()

print(f"YES trades: {len(yes_trades):,} ({len(yes_weighted):,} after weighting filter)")
print(f"NO trades:  {len(no_trades):,} ({len(no_weighted):,} after weighting filter)")

In [None]:
# =============================================================================
# YES vs NO CALIBRATION (Weighted vs Unweighted)
# =============================================================================

# Calculate weighted results
yes_w_results = calculate_weighted_win_rates(yes_weighted, price_bins=20, min_markets=10)
no_w_results = calculate_weighted_win_rates(no_weighted, price_bins=20, min_markets=10)

# Calculate unweighted results
yes_u_results = calculate_win_rates(yes_trades, price_bins=20, min_samples=30)
no_u_results = calculate_win_rates(no_trades, price_bins=20, min_samples=30)

fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# Top-left: Weighted YES vs NO
ax1 = axes[0, 0]
ax1.plot(yes_w_results['price_midpoint'] * 100, yes_w_results['weighted_win_rate'] * 100,
         'o-', color='#3498db', linewidth=2.5, markersize=6, label='YES')
ax1.fill_between(yes_w_results['price_midpoint'] * 100, yes_w_results['ci_low'] * 100, 
                 yes_w_results['ci_high'] * 100, alpha=0.2, color='#3498db')
ax1.plot(no_w_results['price_midpoint'] * 100, no_w_results['weighted_win_rate'] * 100,
         'o-', color='#e74c3c', linewidth=2.5, markersize=6, label='NO')
ax1.fill_between(no_w_results['price_midpoint'] * 100, no_w_results['ci_low'] * 100,
                 no_w_results['ci_high'] * 100, alpha=0.2, color='#e74c3c')
ax1.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=2, label='Perfect')
ax1.set_xlabel('Price (cents)')
ax1.set_ylabel('Win %')
ax1.set_title('WEIGHTED: YES vs NO Calibration', fontweight='bold')
ax1.set_xlim(0, 100)
ax1.set_ylim(0, 100)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Top-right: Unweighted YES vs NO
ax2 = axes[0, 1]
ax2.plot(yes_u_results['price_midpoint'] * 100, yes_u_results['win_rate'] * 100,
         'o-', color='#3498db', linewidth=2.5, markersize=6, label='YES')
ax2.fill_between(yes_u_results['price_midpoint'] * 100, yes_u_results['ci_low'] * 100, 
                 yes_u_results['ci_high'] * 100, alpha=0.2, color='#3498db')
ax2.plot(no_u_results['price_midpoint'] * 100, no_u_results['win_rate'] * 100,
         'o-', color='#e74c3c', linewidth=2.5, markersize=6, label='NO')
ax2.fill_between(no_u_results['price_midpoint'] * 100, no_u_results['ci_low'] * 100,
                 no_u_results['ci_high'] * 100, alpha=0.2, color='#e74c3c')
ax2.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=2, label='Perfect')
ax2.set_xlabel('Price (cents)')
ax2.set_ylabel('Win %')
ax2.set_title('UNWEIGHTED: YES vs NO Calibration', fontweight='bold')
ax2.set_xlim(0, 100)
ax2.set_ylim(0, 100)
ax2.legend()
ax2.grid(True, alpha=0.3)

# Bottom-left: Weighted deviation
ax3 = axes[1, 0]
yes_w_dev = (yes_w_results['weighted_win_rate'] - yes_w_results['price_midpoint']) * 100
no_w_dev = (no_w_results['weighted_win_rate'] - no_w_results['price_midpoint']) * 100
ax3.plot(yes_w_results['price_midpoint'] * 100, yes_w_dev, 'o-', color='#3498db', linewidth=2.5, label='YES')
ax3.plot(no_w_results['price_midpoint'] * 100, no_w_dev, 'o-', color='#e74c3c', linewidth=2.5, label='NO')
ax3.axhline(y=0, color='#2ecc71', linestyle='--', linewidth=2)
ax3.fill_between([0, 100], -5, 5, alpha=0.1, color='gray')
ax3.set_xlabel('Price (cents)')
ax3.set_ylabel('Deviation (cents)')
ax3.set_title('WEIGHTED: Deviation from Perfect', fontweight='bold')
ax3.set_xlim(0, 100)
ax3.set_ylim(-15, 15)
ax3.legend()
ax3.grid(True, alpha=0.3)

# Bottom-right: Unweighted deviation
ax4 = axes[1, 1]
yes_u_dev = (yes_u_results['win_rate'] - yes_u_results['price_midpoint']) * 100
no_u_dev = (no_u_results['win_rate'] - no_u_results['price_midpoint']) * 100
ax4.plot(yes_u_results['price_midpoint'] * 100, yes_u_dev, 'o-', color='#3498db', linewidth=2.5, label='YES')
ax4.plot(no_u_results['price_midpoint'] * 100, no_u_dev, 'o-', color='#e74c3c', linewidth=2.5, label='NO')
ax4.axhline(y=0, color='#2ecc71', linestyle='--', linewidth=2)
ax4.fill_between([0, 100], -5, 5, alpha=0.1, color='gray')
ax4.set_xlabel('Price (cents)')
ax4.set_ylabel('Deviation (cents)')
ax4.set_title('UNWEIGHTED: Deviation from Perfect', fontweight='bold')
ax4.set_xlim(0, 100)
ax4.set_ylim(-15, 15)
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary stats
print(f"\nYES Calibration Error:")
print(f"  Weighted:   {np.abs(yes_w_dev).mean():.2f}c")
print(f"  Unweighted: {np.abs(yes_u_dev).mean():.2f}c")
print(f"\nNO Calibration Error:")
print(f"  Weighted:   {np.abs(no_w_dev).mean():.2f}c")
print(f"  Unweighted: {np.abs(no_u_dev).mean():.2f}c")

---
# Analysis by Time to Resolution (Weighted vs Unweighted)

A 60c contract 3 months out is very different from 60c 1 hour before resolution.

In [None]:
# Prepare time-filtered data (both weighted and unweighted)
buys_with_time = buys[buys['time_to_resolution_hours'].notna()].copy()
buys_weighted_with_time = buys_weighted[buys_weighted['time_to_resolution_hours'].notna()].copy()

print(f"Trades with time data: {len(buys_with_time):,} (unweighted), {len(buys_weighted_with_time):,} (weighted)")

In [None]:
# =============================================================================
# TIME TO RESOLUTION ANALYSIS (Weighted vs Unweighted)
# =============================================================================

time_buckets = [
    (0, 24, '0-24h'),
    (24, 72, '1-3 days'),
    (72, 168, '3-7 days'),
    (168, 672, '1-4 weeks'),
    (672, 2016, '1-3 months'),
    (2016, float('inf'), '3+ months')
]

fig, axes = plt.subplots(2, 6, figsize=(20, 8))

for idx, (low, high, label) in enumerate(time_buckets):
    # Filter data for this time bucket
    mask_u = (buys_with_time['time_to_resolution_hours'] >= low) & (buys_with_time['time_to_resolution_hours'] < high)
    mask_w = (buys_weighted_with_time['time_to_resolution_hours'] >= low) & (buys_weighted_with_time['time_to_resolution_hours'] < high)
    
    bucket_u = buys_with_time[mask_u]
    bucket_w = buys_weighted_with_time[mask_w]
    
    # Top row: Weighted
    ax_w = axes[0, idx]
    if len(bucket_w) < 100 or bucket_w['condition_id'].nunique() < 10:
        ax_w.text(0.5, 0.5, f'Insufficient data\n(n={len(bucket_w)})', 
                ha='center', va='center', transform=ax_w.transAxes, fontsize=9)
    else:
        results_w = calculate_weighted_win_rates(bucket_w, price_bins=10, min_markets=5)
        ax_w.plot(results_w['price_midpoint'] * 100, results_w['weighted_win_rate'] * 100,
                'o-', color='#3498db', linewidth=2, markersize=4)
        ax_w.fill_between(results_w['price_midpoint'] * 100, results_w['ci_low'] * 100,
                        results_w['ci_high'] * 100, alpha=0.3, color='#3498db')
        ax_w.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=1.5, alpha=0.7)
    
    ax_w.set_xlim(0, 100)
    ax_w.set_ylim(0, 100)
    ax_w.set_title(f'{label}', fontweight='bold', fontsize=10)
    if idx == 0:
        ax_w.set_ylabel('WEIGHTED\nWin %', fontsize=10)
    ax_w.grid(True, alpha=0.3)
    ax_w.tick_params(labelsize=8)
    
    # Bottom row: Unweighted
    ax_u = axes[1, idx]
    if len(bucket_u) < 100:
        ax_u.text(0.5, 0.5, f'Insufficient data\n(n={len(bucket_u)})', 
                ha='center', va='center', transform=ax_u.transAxes, fontsize=9)
    else:
        results_u = calculate_win_rates(bucket_u, price_bins=10, min_samples=20)
        ax_u.plot(results_u['price_midpoint'] * 100, results_u['win_rate'] * 100,
                'o-', color='#e74c3c', linewidth=2, markersize=4)
        ax_u.fill_between(results_u['price_midpoint'] * 100, results_u['ci_low'] * 100,
                        results_u['ci_high'] * 100, alpha=0.3, color='#e74c3c')
        ax_u.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=1.5, alpha=0.7)
    
    ax_u.set_xlim(0, 100)
    ax_u.set_ylim(0, 100)
    ax_u.set_xlabel('Price (c)', fontsize=9)
    if idx == 0:
        ax_u.set_ylabel('UNWEIGHTED\nWin %', fontsize=10)
    ax_u.grid(True, alpha=0.3)
    ax_u.tick_params(labelsize=8)

plt.suptitle('Calibration by Time to Resolution: Weighted (top) vs Unweighted (bottom)', 
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

---
# Analysis by Category (Weighted vs Unweighted)

In [None]:
# Top categories by trade count
top_categories = buys['category'].value_counts().head(6).index.tolist()
print("Top categories:", top_categories)
print(f"\nTrades per category:")
for cat in top_categories:
    n_u = len(buys[buys['category'] == cat])
    n_w = len(buys_weighted[buys_weighted['category'] == cat])
    print(f"  {cat}: {n_u:,} trades ({n_w:,} weighted)")

In [None]:
# =============================================================================
# CATEGORY ANALYSIS (Weighted vs Unweighted)
# =============================================================================

fig, axes = plt.subplots(2, 6, figsize=(20, 8))

for idx, category in enumerate(top_categories):
    cat_u = buys[buys['category'] == category]
    cat_w = buys_weighted[buys_weighted['category'] == category]
    
    # Top row: Weighted
    ax_w = axes[0, idx]
    if len(cat_w) < 100 or cat_w['condition_id'].nunique() < 10:
        ax_w.text(0.5, 0.5, f'Insufficient data\n(n={len(cat_w)})',
                ha='center', va='center', transform=ax_w.transAxes, fontsize=9)
    else:
        results_w = calculate_weighted_win_rates(cat_w, price_bins=10, min_markets=5)
        ax_w.plot(results_w['price_midpoint'] * 100, results_w['weighted_win_rate'] * 100,
                'o-', color='#3498db', linewidth=2, markersize=4)
        ax_w.fill_between(results_w['price_midpoint'] * 100, results_w['ci_low'] * 100,
                        results_w['ci_high'] * 100, alpha=0.3, color='#3498db')
        ax_w.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=1.5, alpha=0.7)
    
    ax_w.set_xlim(0, 100)
    ax_w.set_ylim(0, 100)
    # Truncate long category names
    title = category[:15] + '...' if len(category) > 15 else category
    ax_w.set_title(f'{title}', fontweight='bold', fontsize=10)
    if idx == 0:
        ax_w.set_ylabel('WEIGHTED\nWin %', fontsize=10)
    ax_w.grid(True, alpha=0.3)
    ax_w.tick_params(labelsize=8)
    
    # Bottom row: Unweighted
    ax_u = axes[1, idx]
    if len(cat_u) < 100:
        ax_u.text(0.5, 0.5, f'Insufficient data\n(n={len(cat_u)})',
                ha='center', va='center', transform=ax_u.transAxes, fontsize=9)
    else:
        results_u = calculate_win_rates(cat_u, price_bins=10, min_samples=20)
        ax_u.plot(results_u['price_midpoint'] * 100, results_u['win_rate'] * 100,
                'o-', color='#e74c3c', linewidth=2, markersize=4)
        ax_u.fill_between(results_u['price_midpoint'] * 100, results_u['ci_low'] * 100,
                        results_u['ci_high'] * 100, alpha=0.3, color='#e74c3c')
        ax_u.plot([0, 100], [0, 100], '--', color='#2ecc71', linewidth=1.5, alpha=0.7)
    
    ax_u.set_xlim(0, 100)
    ax_u.set_ylim(0, 100)
    ax_u.set_xlabel('Price (c)', fontsize=9)
    if idx == 0:
        ax_u.set_ylabel('UNWEIGHTED\nWin %', fontsize=10)
    ax_u.grid(True, alpha=0.3)
    ax_u.tick_params(labelsize=8)

plt.suptitle('Calibration by Category: Weighted (top) vs Unweighted (bottom)', 
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

---
# Price Distribution

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Price distribution
ax1.hist(buys['price'] * 100, bins=50, color='#3498db', alpha=0.7, edgecolor='black')
ax1.set_xlabel('Price (cents)')
ax1.set_ylabel('Number of trades')
ax1.set_title('Distribution of Trade Prices')
ax1.axvline(x=50, color='red', linestyle='--', alpha=0.5)

# Trade size distribution
ax2.hist(buys['size'].clip(upper=1000), bins=50, color='#2ecc71', alpha=0.7, edgecolor='black')
ax2.set_xlabel('Trade Size ($)')
ax2.set_ylabel('Number of trades')
ax2.set_title('Distribution of Trade Sizes (capped at $1000)')

plt.tight_layout()
plt.show()

print(f"Trade size stats:")
print(f"  Mean: ${buys['size'].mean():.2f}")
print(f"  Median: ${buys['size'].median():.2f}")
print(f"  Max: ${buys['size'].max():.2f}")

---
# Custom Analysis Space

Use this section to explore specific hypotheses.

In [None]:
# Example: Filter to specific conditions and analyze
# Uncomment and modify as needed

# High volume markets only
# high_vol = buys[buys['volume_total'] > 100000]
# results = calculate_win_rates(high_vol)

# Specific category
# politics = buys[buys['category'] == 'Politics']
# results = calculate_win_rates(politics)

# Recent trades only
# recent = buys[buys['trade_timestamp'] > '2024-06-01']
# results = calculate_win_rates(recent)

---
# Findings & Notes

*Add your observations here as you explore the data.*

- 
- 
- 