# Parlay User Addresses

This notebook loads parlay markets, filters trades for those markets, and extracts the unique user addresses (maker + taker).

In [25]:
from pathlib import Path
import polars as pl

In [26]:
# Locate parlay markets list
# Note: Running from notebooks/ directory, so parent is project root
project_root = Path.cwd().parent

parlay_candidates = [
    project_root / "processed" / "parlay_markets_api.csv",
    project_root / "processed" / "parlay_markets_manual.csv",
]

parlay_path = next((p for p in parlay_candidates if p.exists()), None)
if parlay_path is None:
    raise FileNotFoundError(
        "No parlay markets CSV found. Run scripts/fetch_parlays_from_api.py first."
    )

print(f"Loading parlay markets from {parlay_path}...")
parlay_df = pl.read_csv(parlay_path, schema_overrides={'token1': pl.Utf8, 'token2': pl.Utf8})
parlay_ids = set(parlay_df.select(pl.col("id")).to_series().to_list())
print(f"✓ Loaded {len(parlay_ids):,} parlay market IDs")
list(parlay_ids)[:10]

Loading parlay markets from /home/junel/random-walk-studio/polymarket/polymarket/processed/parlay_markets_api.csv...
✓ Loaded 135 parlay market IDs


[247814,
 549898,
 531470,
 549902,
 592915,
 525854,
 539681,
 506408,
 587830,
 518211]

In [27]:
# Locate trades dataset (prefer latest 10M parquet, then full parquet, then CSV)
trade_candidates = [
    project_root / "processed" / "latest_10000000_trades.parquet",
    project_root / "processed" / "trades.parquet",
    project_root / "processed" / "trades.csv",
]

trades_path = next((p for p in trade_candidates if p.exists()), None)
if trades_path is None:
    raise FileNotFoundError("No trades dataset found in processed/.")

print(f"Using trades from: {trades_path}")
print(f"File size: {trades_path.stat().st_size / (1024**3):.1f} GB")

Using trades from: /home/junel/random-walk-studio/polymarket/polymarket/processed/latest_10000000_trades.parquet
File size: 0.5 GB


In [28]:
# Filter trades to parlay markets and extract addresses
print("\nFiltering trades for parlay markets...")

if trades_path.suffix == ".parquet":
    trades = pl.scan_parquet(trades_path)
else:
    trades = pl.scan_csv(trades_path)

# Filter to only parlay markets
parlay_trades = trades.filter(pl.col("market_id").is_in(parlay_ids))

# Count parlay trades
num_parlay_trades = parlay_trades.select(pl.len()).collect().item()
print(f"✓ Found {num_parlay_trades:,} parlay trades")

# Extract unique addresses (both makers and takers)
print("\nExtracting unique user addresses...")
addresses = (
    parlay_trades
    .select([pl.col("maker"), pl.col("taker")])
    .unpivot()
    .select(pl.col("value").alias("address"))
    .unique()
    .collect()
)

print(f"✓ Found {len(addresses):,} unique addresses")
addresses.head(10)


Filtering trades for parlay markets...
✓ Found 7,608 parlay trades

Extracting unique user addresses...
✓ Found 1,676 unique addresses


address
str
"""0x77845807428ce854c33d15ff4c4a…"
"""0xfff14760a8ffa9cc9023fa9a2384…"
"""0xdd5db012cb168a94983b6b751f3f…"
"""0x1ecf3c2cc1af8e5c59dba99c15cd…"
"""0xe27814449a8e7eeb34b5c0d6e1f6…"
"""0x18e32f819d7b10db79e9509fc6f9…"
"""0xc534b3ad7e0d06943a86796f13ad…"
"""0x7901190bc328f832d23732a26e50…"
"""0x0a8753176b47c23b700858ebe7cf…"
"""0x59b010534bd9e0e6a9996b66d43a…"


In [29]:
# Save addresses to CSV
addresses_path = project_root / "processed" / "parlay_user_addresses.csv"
addresses.write_csv(addresses_path)

print(f"\n✅ Saved {len(addresses):,} addresses to:")
print(f"   {addresses_path}")


✅ Saved 1,676 addresses to:
   /home/junel/random-walk-studio/polymarket/polymarket/processed/parlay_user_addresses.csv


In [30]:
# Aggregate USD volume per address (maker + taker) and sort descending
print("\nAggregating USD volume per address...")
maker_vol = parlay_trades.select(
    pl.col("maker").alias("address"),
    pl.col("usd_amount")
)
taker_vol = parlay_trades.select(
    pl.col("taker").alias("address"),
    pl.col("usd_amount")
)

volume_df = (
    pl.concat([maker_vol, taker_vol])
    .collect()
    .group_by("address")
    .agg(pl.col("usd_amount").sum().alias("total_usd"))
    .sort("total_usd", descending=True)
)

volume_path = project_root / "processed" / "parlay_user_volume.csv"
volume_df.write_csv(volume_path)

print(f"✅ Aggregated {len(volume_df):,} addresses with volume")
print(f"\nTop 10 by volume:")
print(f"Saved to: {volume_path}")
volume_df.head(10)


Aggregating USD volume per address...
✅ Aggregated 1,676 addresses with volume

Top 10 by volume:
Saved to: /home/junel/random-walk-studio/polymarket/polymarket/processed/parlay_user_volume.csv


address,total_usd
str,f64
"""0x4bfb41d5b3570defd03c39a9a4d8…",252510.107794
"""0xf419573877439e31131f83aba0be…",131021.716419
"""0xfcf2378f20cf408d077c21e73127…",49638.060419
"""0x12d6cccfc7470a3f4bafc53599a4…",40583.191738
"""0x662ce90c51d613a2975a536272e4…",31455.1913
"""0xc5d563a36ae78145c45a50134d48…",25011.101763
"""0x06e8cb40376ff9f06d926e71a074…",24127.636239
"""0x205e652dc6014a63512a1402684b…",19804.092271
"""0x3a55b0c45449b955e43ec10734bd…",19002.334685
"""0xc02147dee42356b7a4edbb1c35ac…",15328.65
