# Top Traders Market Analysis

Comprehensive analysis of top 100 traders and their market participation patterns.

## Features:
- Top 100 traders by volume with full market details
- Market participation visualizations
- Trading pattern analysis
- Market concentration charts
- Category distribution (when available)
- Time-based activity heatmaps

In [26]:
from __future__ import annotations

import warnings
from pathlib import Path
import sys

import polars as pl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime

warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')  # Use default instead of seaborn-v0_8 for compatibility
sns.set_palette('husl')

# Add project root to path
PROJECT_ROOT = Path('..').resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from poly_utils import get_markets, PLATFORM_WALLETS

print("✓ Imports loaded")

✓ Imports loaded


## 1. Load Data

In [27]:
# Load markets with full metadata
markets_df = get_markets(
    main_file=str(PROJECT_ROOT / 'markets.csv'),
    missing_file=str(PROJECT_ROOT / 'missing_markets.csv')
)
print(f"Loaded {len(markets_df):,} markets")
print(f"Market columns: {markets_df.columns}")

# Rename id to market_id for easier joining
markets_df = markets_df.rename({'id': 'market_id'})

# Load trades
TRADES_FILE = PROJECT_ROOT / 'processed' / 'latest_10000000_trades.parquet'

if TRADES_FILE.exists():
    trades = pl.read_parquet(TRADES_FILE)
    print(f"\nLoaded {len(trades):,} trades from parquet")
else:
    print("⚠ Latest trades parquet not found, loading full CSV (this may take time)...")
    trades = pl.scan_csv(PROJECT_ROOT / 'processed' / 'trades.csv').collect(streaming=True)

# Ensure timestamp is datetime
if trades['timestamp'].dtype == pl.Utf8:
    trades = trades.with_columns(pl.col('timestamp').str.to_datetime())

print(f"Date range: {trades['timestamp'].min()} to {trades['timestamp'].max()}")
print(f"Total volume: ${trades['usd_amount'].sum():,.2f}")
print(f"Unique traders: {trades['maker'].n_unique():,}")
print(f"Unique markets: {trades['market_id'].n_unique():,}")

Loaded 176351 markets from /home/junel/random-walk-studio/polymarket/polymarket/markets.csv
Combined total: 175830 unique markets (sorted by createdAt)
Loaded 175,830 markets
Market columns: ['createdAt', 'id', 'question', 'answer1', 'answer2', 'neg_risk', 'market_slug', 'token1', 'token2', 'condition_id', 'volume', 'ticker', 'closedTime']

Loaded 10,000,000 trades from parquet
Date range: 2025-11-09 05:16:03 to 2025-11-16 05:23:41
Total volume: $846,886,996.22
Unique traders: 222,946
Unique markets: 18,920


## 2. Get Top 100 Traders

In [28]:
# Calculate trader statistics
top_traders = (
    trades
    .group_by('maker')
    .agg(
        pl.len().alias('num_trades'),
        pl.col('usd_amount').sum().alias('total_volume'),
        pl.col('usd_amount').mean().alias('avg_trade_size'),
        pl.col('usd_amount').median().alias('median_trade_size'),
        pl.col('market_id').n_unique().alias('unique_markets'),
        pl.col('maker_direction').eq('BUY').mean().alias('buy_ratio'),
        pl.col('timestamp').min().alias('first_trade'),
        pl.col('timestamp').max().alias('last_trade'),
    )
    .with_columns(
        (pl.col('last_trade') - pl.col('first_trade')).alias('active_period'),
        (pl.col('total_volume') / pl.col('num_trades')).alias('volume_per_trade')
    )
    .filter(~pl.col('maker').is_in(PLATFORM_WALLETS))  # Exclude platform wallets
    .sort('total_volume', descending=True)
    .head(1000)
)

print("=== Top 1000 Traders Summary ===")
print(top_traders.select(['maker', 'total_volume', 'num_trades', 'unique_markets', 'buy_ratio']).head(20))

# Summary stats
print(f"\n=== Aggregate Statistics for Top 1000 ===")
print(f"Total Volume: ${top_traders['total_volume'].sum():,.2f}")
print(f"Total Trades: {top_traders['num_trades'].sum():,}")
print(f"Avg Volume per Trader: ${top_traders['total_volume'].mean():,.2f}")
print(f"Avg Markets per Trader: {top_traders['unique_markets'].mean():.1f}")
print(f"Avg Buy Ratio: {top_traders['buy_ratio'].mean():.2%}")

=== Top 1000 Traders Summary ===
shape: (20, 5)
┌─────────────────────────────────┬──────────────┬────────────┬────────────────┬───────────┐
│ maker                           ┆ total_volume ┆ num_trades ┆ unique_markets ┆ buy_ratio │
│ ---                             ┆ ---          ┆ ---        ┆ ---            ┆ ---       │
│ str                             ┆ f64          ┆ u32        ┆ u32            ┆ f64       │
╞═════════════════════════════════╪══════════════╪════════════╪════════════════╪═══════════╡
│ 0x31519628fb5e5aa559d4ba27aa12… ┆ 1.7906e7     ┆ 9238       ┆ 278            ┆ 0.97467   │
│ 0x3657862e57070b82a289b5887ec9… ┆ 7.2780e6     ┆ 871        ┆ 16             ┆ 0.995408  │
│ 0xed88d69d689f3e2f6d1f77b2e35d… ┆ 7.0181e6     ┆ 2986       ┆ 42             ┆ 0.985934  │
│ 0x0d3b10b8eac8b089c6e4a695e65d… ┆ 6.7161e6     ┆ 7112       ┆ 33             ┆ 0.833521  │
│ 0xd218e474776403a330142299f779… ┆ 5.5820e6     ┆ 50367      ┆ 2141           ┆ 0.99996   │
│ 0xe8dd7741ccb1235095

In [29]:
top_traders['maker'].to_list()

['0x31519628fb5e5aa559d4ba27aa1248810b9f0977',
 '0x3657862e57070b82a289b5887ec943a7c2166b14',
 '0xed88d69d689f3e2f6d1f77b2e35d089c581df3c4',
 '0x0d3b10b8eac8b089c6e4a695e65d8e044167c46b',
 '0xd218e474776403a330142299f7796e8ba32eb5c9',
 '0xe8dd7741ccb12350957ec71e9ee332e0d1e6ec86',
 '0x16b29c50f2439faf627209b2ac0c7bbddaa8a881',
 '0xee613b3fc183ee44f9da9c05f53e2da107e3debf',
 '0x2a923d2f6edbc894e76357104e654b27a0d9071e',
 '0x847b8fdacd157191d47c7ccb2aad5c67bbd21027',
 '0xa676582530fb1a63502d5f5f5db9fb8d1449e38b',
 '0x751a2b86cab503496efd325c8344e10159349ea1',
 '0x39871d481a332c4fd8f19e8ae35c9e17438b259e',
 '0x204f72f35326db932158cba6adff0b9a1da95e14',
 '0x507e52ef684ca2dd91f90a9d26d149dd3288beae',
 '0xee00ba338c59557141789b127927a55f5cc5cea1',
 '0xc3c3b3ef304ddbea39fa2246e683a71da5d0eec8',
 '0x2f09642639aedd6ced432519c1a86e7d52034632',
 '0x76062e7bbfc0fb783487ff884960c4bc17962836',
 '0x21504551452f4c4b67a1fbee6ba743a611cdba16',
 '0xb744f56635b537e859152d14b022af5afe485210',
 '0x83b9f9e2d

## 3. Get Markets by Trader (with Full Market Metadata)

In [30]:
def get_trader_markets_with_details(trades_df: pl.DataFrame, markets_df: pl.DataFrame, trader_address: str) -> pl.DataFrame:
    """
    Get all markets for a trader with FULL market metadata joined.
    
    Returns: DataFrame with trading stats + all market columns
    """
    trader_markets = (
        trades_df
        .filter(pl.col('maker') == trader_address.lower())
        .group_by('market_id')
        .agg(
            pl.len().alias('num_trades'),
            pl.col('usd_amount').sum().alias('total_volume'),
            pl.col('usd_amount').mean().alias('avg_trade_size'),
            pl.col('maker_direction').eq('BUY').mean().alias('buy_ratio'),
            pl.col('price').mean().alias('avg_price'),
            pl.col('timestamp').min().alias('first_trade'),
            pl.col('timestamp').max().alias('last_trade'),
        )
        .sort('total_volume', descending=True)
    )
    
    # Join ALL market metadata
    # markets_df columns: createdAt, market_id, question, answer1, answer2, neg_risk, 
    #                     market_slug, token1, token2, condition_id, volume, ticker, closedTime
    result = trader_markets.join(
        markets_df,
        on='market_id',
        how='left'
    )
    
    return result


# Example: Get markets for top trader
top_trader_address = top_traders['maker'][0]
print(f"\n=== Markets for Top Trader: {top_trader_address[:16]}... ===")

trader_markets = get_trader_markets_with_details(trades, markets_df, top_trader_address)

# Display key columns
display_cols = ['question', 'num_trades', 'total_volume', 'buy_ratio', 'ticker', 'volume']
print(trader_markets.select(display_cols).head(20))

print(f"\nTotal markets traded: {len(trader_markets)}")
print(f"Total volume in these markets: ${trader_markets['total_volume'].sum():,.2f}")


=== Markets for Top Trader: 0x31519628fb5e5a... ===
shape: (20, 6)
┌─────────────────────────────────┬────────────┬───────────────┬───────────┬──────────────────────────────┬───────────────┐
│ question                        ┆ num_trades ┆ total_volume  ┆ buy_ratio ┆ ticker                       ┆ volume        │
│ ---                             ┆ ---        ┆ ---           ┆ ---       ┆ ---                          ┆ ---           │
│ str                             ┆ u32        ┆ f64           ┆ f64       ┆ str                          ┆ f64           │
╞═════════════════════════════════╪════════════╪═══════════════╪═══════════╪══════════════════════════════╪═══════════════╡
│ Devils vs. Capitals             ┆ 119        ┆ 1.5959e6      ┆ 0.991597  ┆ nhl-nj-wsh-2025-11-15        ┆ 2.4557e6      │
│ LoL: T1 vs KT Rolster (BO5)     ┆ 1887       ┆ 1.4140e6      ┆ 1.0       ┆ lol-t1-kt-2025-11-09         ┆ 2.8233e7      │
│ LoL: T1 vs KT Rolster - Game 3… ┆ 383        ┆ 858754.372994 ┆

## 4. Analyze All Top 1000 Traders' Market Participation

In [31]:
# Get all trades from top 1000 traders with market details
top_1000_addresses = top_traders['maker'].to_list()

top_1000_trades = (
    trades
    .filter(pl.col('maker').is_in(top_1000_addresses))
)

# Aggregate by market for top 1000 traders
top_1000_market_stats = (
    top_1000_trades
    .group_by('market_id')
    .agg(
        pl.col('maker').n_unique().alias('num_traders'),
        pl.len().alias('num_trades'),
        pl.col('usd_amount').sum().alias('total_volume'),
        pl.col('usd_amount').mean().alias('avg_trade_size'),
    )
    .sort('total_volume', descending=True)
)

# Join market metadata
top_1000_markets = top_1000_market_stats.join(markets_df, on='market_id', how='left')

print("=== Top 20 Markets by Top 1000 Traders ===")
print(top_1000_markets.select(['question', 'num_traders', 'num_trades', 'total_volume', 'ticker']).head(20))

print(f"\n=== Summary ===")
print(f"Unique markets traded by top 1000: {len(top_1000_markets):,}")
print(f"Total volume from top 1000: ${top_1000_market_stats['total_volume'].sum():,.2f}")
print(f"Avg traders per market: {top_1000_market_stats['num_traders'].mean():.1f}")

=== Top 20 Markets by Top 1000 Traders ===
shape: (20, 5)
┌─────────────────────────────────┬─────────────┬────────────┬──────────────┬─────────────────────────────────┐
│ question                        ┆ num_traders ┆ num_trades ┆ total_volume ┆ ticker                          │
│ ---                             ┆ ---         ┆ ---        ┆ ---          ┆ ---                             │
│ str                             ┆ u32         ┆ u32        ┆ f64          ┆ str                             │
╞═════════════════════════════════╪═════════════╪════════════╪══════════════╪═════════════════════════════════╡
│ LoL: T1 vs KT Rolster (BO5)     ┆ 186         ┆ 20155      ┆ 1.5318e7     ┆ lol-t1-kt-2025-11-09            │
│ Will the government shutdown e… ┆ 219         ┆ 19939      ┆ 8.8430e6     ┆ what-day-will-the-government-s… │
│ Eagles vs. Packers              ┆ 180         ┆ 10125      ┆ 7.0281e6     ┆ nfl-phi-gb-2025-11-10           │
│ Fed increases interest rates b… ┆ 59        