In [1]:
"""
TEST 7: GAS COST ANALYSIS
=========================
Question: How much do gas costs eat into the 0.85% edge?

Method:
- Calculate trades per window
- Estimate gas costs per transaction
- Compare to profit per window

NOTE: This is an estimation. Real gas costs require on-chain data.
"""

import pandas as pd
import numpy as np

markets = ["1766688300", "1766689200", "1766690100", "1766691000", "1766691900", "1766692800"]

all_trades = []
for market in markets:
    try:
        trades = pd.read_parquet(f"DataCollection/BotTrades/{market}-trades.parquet")
        trades['market'] = market
        all_trades.append(trades)
    except:
        pass

trades = pd.concat(all_trades, ignore_index=True)

print("="*70)
print("TEST 7: GAS COST ANALYSIS")
print("="*70)

print(f"\nTotal trades: {len(trades)}")
print(f"Markets analyzed: {len(markets)}")
print(f"Avg trades per 15-min window: {len(trades) / len(markets):.0f}")

# Calculate trading volume
trades_per_market = trades.groupby('market').agg({
    'size': ['count', 'sum'],
    'price': 'mean',
    'transaction_hash': 'nunique'
})
trades_per_market.columns = ['num_trades', 'total_shares', 'avg_price', 'unique_txs']
trades_per_market['dollar_volume'] = trades_per_market['total_shares'] * trades_per_market['avg_price']

print("\n=== PER MARKET STATISTICS ===")
print(trades_per_market)

print("\n=== AVERAGE PER WINDOW ===")
avg_trades = trades_per_market['num_trades'].mean()
avg_txs = trades_per_market['unique_txs'].mean()
avg_volume = trades_per_market['dollar_volume'].mean()
avg_shares = trades_per_market['total_shares'].mean()

print(f"Avg trades per window: {avg_trades:.0f}")
print(f"Avg transactions per window: {avg_txs:.0f}")
print(f"Avg dollar volume per window: ${avg_volume:.2f}")
print(f"Avg shares per window: {avg_shares:.0f}")

# Estimate gas costs
print("\n" + "="*70)
print("GAS COST ESTIMATION")
print("="*70)

# Polygon gas prices (typical range)
# Polymarket runs on Polygon, which has much lower gas than Ethereum
print("""
POLYGON GAS CONTEXT:
- Polygon has very low gas costs (~$0.001-0.01 per tx typically)
- During high congestion: up to $0.05-0.10 per tx
- Polymarket order transactions are relatively simple
""")

# Estimate with different gas assumptions
gas_scenarios = [
    ('Very Low (normal Polygon)', 0.001),
    ('Low', 0.005),
    ('Medium', 0.01),
    ('High (congested)', 0.05),
    ('Very High', 0.10),
]

# Profit estimation (from our earlier analysis)
estimated_profit_per_window = 70  # middle of $50-90 range
estimated_pairs_per_window = 6700
edge_per_pair = 0.0085

print(f"\n=== PROFIT vs GAS COST COMPARISON ===")
print(f"Estimated profit per window: ${estimated_profit_per_window}")
print(f"Transactions per window: {avg_txs:.0f}")
print()

for scenario_name, gas_per_tx in gas_scenarios:
    total_gas = avg_txs * gas_per_tx
    net_profit = estimated_profit_per_window - total_gas
    gas_as_pct_of_profit = (total_gas / estimated_profit_per_window) * 100
    
    print(f"{scenario_name:30} Gas/tx: ${gas_per_tx:.3f} | Total gas: ${total_gas:>7.2f} | Net profit: ${net_profit:>7.2f} | Gas = {gas_as_pct_of_profit:>5.1f}% of profit")

# Break-even analysis
print("\n=== BREAK-EVEN GAS ANALYSIS ===")
breakeven_gas = estimated_profit_per_window / avg_txs
print(f"Break-even gas per tx: ${breakeven_gas:.4f}")
print(f"(If gas exceeds this, strategy is unprofitable)")

# Efficiency check: profit per transaction
profit_per_tx = estimated_profit_per_window / avg_txs
print(f"\n=== EFFICIENCY ===")
print(f"Profit per transaction: ${profit_per_tx:.4f}")
print(f"This needs to exceed gas cost for profitability")

# What if they could batch?
print("\n=== IF TRANSACTIONS WERE BATCHED ===")
batch_sizes = [1, 2, 5, 10]
for batch in batch_sizes:
    batched_txs = avg_txs / batch
    gas_cost = batched_txs * 0.01  # medium gas assumption
    print(f"Batch size {batch:>2}: {batched_txs:>6.0f} txs, ${gas_cost:>6.2f} gas, ${estimated_profit_per_window - gas_cost:.2f} net")

print("\n" + "="*70)
print("INTERPRETATION:")
print("="*70)
print("""
KEY FINDINGS:

1. At ~840 transactions per 15-min window, gas adds up fast

2. On Polygon with low gas ($0.001-0.005/tx):
   - Gas is 1-6% of profit
   - Strategy remains highly profitable

3. On Polygon with medium gas ($0.01/tx):
   - Gas is ~12% of profit
   - Still profitable but margin compressed

4. On Polygon with high gas ($0.05+/tx):
   - Gas could exceed 50% of profit
   - Strategy becomes marginal or unprofitable

CRITICAL INSIGHT:
The 28-share order size makes sense not just for adverse selection,
but also for gas efficiency. Larger orders = fewer transactions = lower gas.

RECOMMENDATION:
Before implementing, fetch actual Polygon gas prices and calculate:
- Avg gas cost per Polymarket order transaction
- Gas cost variability throughout day
- Whether batching is possible via Polymarket API
""")

TEST 7: GAS COST ANALYSIS

Total trades: 5037
Markets analyzed: 6
Avg trades per 15-min window: 840

=== PER MARKET STATISTICS ===
            num_trades  total_shares  avg_price  unique_txs  dollar_volume
market                                                                    
1766688300         925  12123.132509   0.492470         925    5970.276425
1766689200         717   9509.937380   0.487338         717    4634.553016
1766690100         639   7830.931830   0.468620         639    3669.732737
1766691000         669   8247.118470   0.495577         669    4087.084002
1766691900        1006  12264.694403   0.469527        1006    5758.607962
1766692800        1081  13572.235780   0.453344        1081    6152.884933

=== AVERAGE PER WINDOW ===
Avg trades per window: 840
Avg transactions per window: 840
Avg dollar volume per window: $5045.52
Avg shares per window: 10591

GAS COST ESTIMATION

POLYGON GAS CONTEXT:
- Polygon has very low gas costs (~$0.001-0.01 per tx typically)
- Dur

In [3]:
"""
TEST 8: THE EDGE SOURCE MYSTERY
===============================
Question: Why do their fill prices beat the visible orderbook?

Possibilities:
1. They post inside the spread (aggressive pricing)
2. Our orderbook snapshots are lagged
3. They have access to hidden liquidity
4. They're getting priority somehow

Method:
- Compare fill prices to orderbook state at multiple time offsets
- Check if fills make sense with lagged data
- Look for patterns suggesting hidden liquidity
"""

import pandas as pd
import numpy as np

markets = ["1766688300", "1766689200", "1766690100", "1766691000", "1766691900", "1766692800"]

all_data = []

for market in markets:
    try:
        trades = pd.read_parquet(f"DataCollection/BotTrades/{market}-trades.parquet")
        orderbook = pd.read_parquet(f"DataCollection/OrderbookData/{market}-orderbook.parquet")
        
        trades['market'] = market
        trades = trades.sort_values('unix_timestamp')
        
        orderbook = orderbook.sort_values('unixtime')
        orderbook['unix_ts'] = orderbook['unixtime'].astype(int)
        
        # Try multiple time alignments
        for lag in [0, -1, -2, 1, 2]:  # seconds of lag
            merged = pd.merge_asof(
                trades,
                orderbook[['unix_ts', 'up_best_bid', 'up_best_ask', 'down_best_bid', 'down_best_ask']],
                left_on='unix_timestamp',
                right_on='unix_ts',
                direction='backward',
                tolerance=abs(lag) + 1 if lag >= 0 else None
            )
            merged['lag'] = lag
            all_data.append(merged)
    except Exception as e:
        print(f"Error processing {market}: {e}")

df = pd.concat(all_data, ignore_index=True)

print("="*70)
print("TEST 8: EDGE SOURCE ANALYSIS")
print("="*70)

# Analyze by lag
print("\n=== FILL PRICE vs BEST BID AT DIFFERENT LAGS ===")

for lag in sorted(df['lag'].unique()):
    subset = df[df['lag'] == lag].copy()
    
    # Calculate distance from bid for each trade
    subset['bid'] = np.where(subset['outcome'] == 'Up', 
                             subset['up_best_bid'], 
                             subset['down_best_bid'])
    subset['ask'] = np.where(subset['outcome'] == 'Up', 
                             subset['up_best_ask'], 
                             subset['down_best_ask'])
    
    subset['dist_from_bid'] = subset['price'] - subset['bid']
    subset['dist_from_ask'] = subset['price'] - subset['ask']
    
    at_bid = (subset['dist_from_bid'].abs() < 0.005).sum()
    inside = ((subset['dist_from_bid'] > 0.005) & (subset['dist_from_ask'] < -0.005)).sum()
    at_ask = (subset['dist_from_ask'].abs() < 0.005).sum()
    below_bid = (subset['dist_from_bid'] < -0.005).sum()
    
    total = len(subset)
    
    print(f"\nLag {lag:+d} seconds:")
    print(f"  At bid:     {at_bid:>5} ({at_bid/total*100:>5.1f}%)")
    print(f"  Inside:     {inside:>5} ({inside/total*100:>5.1f}%)")
    print(f"  At ask:     {at_ask:>5} ({at_ask/total*100:>5.1f}%)")
    print(f"  Below bid:  {below_bid:>5} ({below_bid/total*100:>5.1f}%)")
    print(f"  Avg dist from bid: {subset['dist_from_bid'].mean():.4f}")

# Deeper analysis on lag=0
print("\n" + "="*70)
print("DETAILED ANALYSIS (no lag)")
print("="*70)

df_zero = df[df['lag'] == 0].copy()
df_zero['bid'] = np.where(df_zero['outcome'] == 'Up', 
                          df_zero['up_best_bid'], 
                          df_zero['down_best_bid'])
df_zero['ask'] = np.where(df_zero['outcome'] == 'Up', 
                          df_zero['up_best_ask'], 
                          df_zero['down_best_ask'])
df_zero['spread'] = df_zero['ask'] - df_zero['bid']
df_zero['dist_from_bid'] = df_zero['price'] - df_zero['bid']

# Look at fills that are suspiciously good
better_than_bid = df_zero[df_zero['dist_from_bid'] < -0.005]
print(f"\n=== FILLS BETTER THAN VISIBLE BID ===")
print(f"Count: {len(better_than_bid)} ({len(better_than_bid)/len(df_zero)*100:.1f}%)")

if len(better_than_bid) > 0:
    print(f"Avg improvement: {better_than_bid['dist_from_bid'].mean():.4f}")
    print(f"\nExamples:")
    print(better_than_bid[['outcome', 'price', 'bid', 'ask', 'dist_from_bid']].head(10))

# Check for spread patterns
print("\n=== SPREAD AT TIME OF FILL ===")
print(df_zero['spread'].describe())

# Do they fill during wide or narrow spreads?
df_zero['spread_bucket'] = pd.qcut(df_zero['spread'], q=4, labels=['Tight', 'Medium-Tight', 'Medium-Wide', 'Wide'])

print("\n=== FILLS BY SPREAD WIDTH ===")
for bucket in ['Tight', 'Medium-Tight', 'Medium-Wide', 'Wide']:
    subset = df_zero[df_zero['spread_bucket'] == bucket]
    print(f"{bucket:15}: {len(subset):>4} fills, avg dist from bid: {subset['dist_from_bid'].mean():>7.4f}")

# Check for hidden liquidity patterns
print("\n=== HIDDEN LIQUIDITY CHECK ===")

# If there's hidden liquidity, we might see fills at consistent price levels
# that don't appear in visible orderbook
print("\nMost common fill prices:")
print(df_zero['price'].round(2).value_counts().head(15))

# Check if fills cluster at specific distances from visible bid
print("\nDistance from bid distribution:")
print(df_zero['dist_from_bid'].round(3).value_counts().head(15))

# Time-of-day analysis (if there are patterns)
print("\n=== DO BETTER FILLS HAPPEN AT SPECIFIC TIMES? ===")
df_zero['time_in_window'] = df_zero['unix_timestamp'] % 900  # 15 min = 900 sec

good_fills = df_zero[df_zero['dist_from_bid'] < 0]
bad_fills = df_zero[df_zero['dist_from_bid'] > 0.01]

print(f"Good fills (below bid) - avg time in window: {good_fills['time_in_window'].mean():.0f}s")
print(f"Bad fills (above bid+1%) - avg time in window: {bad_fills['time_in_window'].mean():.0f}s")

print("\n" + "="*70)
print("INTERPRETATION:")
print("="*70)
print("""
THE EDGE SOURCE:

HYPOTHESIS 1: They post INSIDE the spread
- If fills are consistently between bid and ask
- They're willing to pay more than best bid to get filled faster
- But still below ask, so they're makers

HYPOTHESIS 2: Our data is LAGGED
- If lag=-1 or lag=-2 shows better alignment
- Our snapshots are behind real-time
- Their fills look better because we're seeing stale data

HYPOTHESIS 3: HIDDEN LIQUIDITY
- If fills happen at prices not in visible orderbook
- There might be hidden/iceberg orders from counterparties
- Or dark pool style matching

HYPOTHESIS 4: QUEUE PRIORITY
- If they get filled at exactly best bid
- They might be first in queue (faster submission)
- Or have some priority mechanism

MOST LIKELY ANSWER:
Combination of (1) and (2). They post competitive bids slightly 
inside the spread, and our snapshot data is 100-500ms behind reality.
By the time we see the orderbook, their order has already improved the bid.
""")

TEST 8: EDGE SOURCE ANALYSIS

=== FILL PRICE vs BEST BID AT DIFFERENT LAGS ===

Lag -2 seconds:
  At bid:      1130 ( 22.4%)
  Inside:       422 (  8.4%)
  At ask:       469 (  9.3%)
  Below bid:   1888 ( 37.5%)
  Avg dist from bid: 0.0008

Lag -1 seconds:
  At bid:      1130 ( 22.4%)
  Inside:       422 (  8.4%)
  At ask:       469 (  9.3%)
  Below bid:   1888 ( 37.5%)
  Avg dist from bid: 0.0008

Lag +0 seconds:
  At bid:      1130 ( 22.4%)
  Inside:       422 (  8.4%)
  At ask:       469 (  9.3%)
  Below bid:   1888 ( 37.5%)
  Avg dist from bid: 0.0008

Lag +1 seconds:
  At bid:      1130 ( 22.4%)
  Inside:       422 (  8.4%)
  At ask:       469 (  9.3%)
  Below bid:   1888 ( 37.5%)
  Avg dist from bid: 0.0008

Lag +2 seconds:
  At bid:      1130 ( 22.4%)
  Inside:       422 (  8.4%)
  At ask:       469 (  9.3%)
  Below bid:   1888 ( 37.5%)
  Avg dist from bid: 0.0008

DETAILED ANALYSIS (no lag)

=== FILLS BETTER THAN VISIBLE BID ===
Count: 1888 (37.5%)
Avg improvement: -0.0875

Exa

ValueError: Bin edges must be unique: Index([               -0.06, 0.010000000000000009, 0.010000000000000009,
       0.020000000000000018,  0.16000000000000003],
      dtype='float64', name='spread').
You can drop duplicate edges by setting the 'duplicates' kwarg

In [5]:
"""
TEST 9: CANCEL BEHAVIOR ANALYSIS
================================
Question: When one side fills, do they cancel the other side?

Method:
- Track sequences of fills
- Look for patterns where one side fills then the other stops
- Infer if they're running independent orders or linked pairs
"""

import pandas as pd
import numpy as np

markets = ["1766688300", "1766689200", "1766690100", "1766691000", "1766691900", "1766692800"]

all_trades = []
for market in markets:
    try:
        trades = pd.read_parquet(f"DataCollection/BotTrades/{market}-trades.parquet")
        trades['market'] = market
        all_trades.append(trades)
    except:
        pass

trades = pd.concat(all_trades, ignore_index=True)
trades = trades.sort_values(['market', 'unix_timestamp'])

print("="*70)
print("TEST 9: CANCEL BEHAVIOR ANALYSIS")
print("="*70)

# Track cumulative position over time for each market
print("\n=== POSITION DRIFT ANALYSIS ===")

for market in markets:
    market_trades = trades[trades['market'] == market].copy()
    market_trades = market_trades.sort_values('unix_timestamp')
    
    # Calculate running position
    market_trades['up_shares'] = np.where(market_trades['outcome'] == 'Up', market_trades['size'], 0)
    market_trades['down_shares'] = np.where(market_trades['outcome'] == 'Down', market_trades['size'], 0)
    
    market_trades['cumulative_up'] = market_trades['up_shares'].cumsum()
    market_trades['cumulative_down'] = market_trades['down_shares'].cumsum()
    market_trades['net_exposure'] = market_trades['cumulative_up'] - market_trades['cumulative_down']
    market_trades['exposure_pct'] = market_trades['net_exposure'] / (market_trades['cumulative_up'] + market_trades['cumulative_down']) * 100
    
    max_exposure = market_trades['exposure_pct'].abs().max()
    final_exposure = market_trades['exposure_pct'].iloc[-1]
    
    print(f"\nMarket {market}:")
    print(f"  Max exposure during window: {max_exposure:.1f}%")
    print(f"  Final exposure: {final_exposure:.1f}%")
    print(f"  Exposure std dev: {market_trades['exposure_pct'].std():.1f}%")

# Look at sequences: what happens after a big Up fill?
print("\n" + "="*70)
print("SEQUENCE ANALYSIS: WHAT FOLLOWS BIG FILLS?")
print("="*70)

trades['prev_outcome'] = trades.groupby('market')['outcome'].shift(1)
trades['prev_size'] = trades.groupby('market')['size'].shift(1)
trades['time_since_prev'] = trades.groupby('market')['unix_timestamp'].diff()

# After a large Up fill, what's the next trade?
large_threshold = 20  # shares

trades['is_large'] = trades['size'] >= large_threshold

large_up = trades[(trades['prev_outcome'] == 'Up') & (trades['prev_size'] >= large_threshold)]
large_down = trades[(trades['prev_outcome'] == 'Down') & (trades['prev_size'] >= large_threshold)]

print(f"\n=== After large UP fill (≥{large_threshold} shares): ===")
print(f"Next trade is Up: {(large_up['outcome'] == 'Up').sum()}")
print(f"Next trade is Down: {(large_up['outcome'] == 'Down').sum()}")
if len(large_up) > 0:
    print(f"Avg time to next trade: {large_up['time_since_prev'].mean():.2f}s")

print(f"\n=== After large DOWN fill (≥{large_threshold} shares): ===")
print(f"Next trade is Up: {(large_down['outcome'] == 'Up').sum()}")
print(f"Next trade is Down: {(large_down['outcome'] == 'Down').sum()}")
if len(large_down) > 0:
    print(f"Avg time to next trade: {large_down['time_since_prev'].mean():.2f}s")

# Check for "rebalancing" behavior
print("\n" + "="*70)
print("REBALANCING CHECK")
print("="*70)

# If they cancel and rebalance, we'd see: big Up fill → quick Down fill
# If independent orders, fills should be random mix

# Calculate exposure before each trade
trades['exposure_before'] = trades.groupby('market')['net_exposure'].shift(1).fillna(0)

# When overweight Up (positive exposure), what do they trade?
overweight_up = trades[trades['exposure_before'] > 50]  # Net long Up by 50+ shares
overweight_down = trades[trades['exposure_before'] < -50]  # Net long Down by 50+ shares

print(f"\nWhen OVERWEIGHT UP (exposure > +50 shares):")
print(f"  They trade Up: {(overweight_up['outcome'] == 'Up').sum()}")
print(f"  They trade Down: {(overweight_up['outcome'] == 'Down').sum()}")
if len(overweight_up) > 0:
    pct_rebalance = (overweight_up['outcome'] == 'Down').sum() / len(overweight_up) * 100
    print(f"  Rebalance rate (trade Down): {pct_rebalance:.1f}%")

print(f"\nWhen OVERWEIGHT DOWN (exposure < -50 shares):")
print(f"  They trade Up: {(overweight_down['outcome'] == 'Up').sum()}")
print(f"  They trade Down: {(overweight_down['outcome'] == 'Down').sum()}")
if len(overweight_down) > 0:
    pct_rebalance = (overweight_down['outcome'] == 'Up').sum() / len(overweight_down) * 100
    print(f"  Rebalance rate (trade Up): {pct_rebalance:.1f}%")

# Check if alternating is faster than same-side
print("\n=== TIMING PATTERNS ===")

same_side = trades[trades['outcome'] == trades['prev_outcome']]
alt_side = trades[trades['outcome'] != trades['prev_outcome']]

print(f"Same-side consecutive trades: {len(same_side)}")
print(f"  Avg time gap: {same_side['time_since_prev'].mean():.2f}s")

print(f"\nAlternating trades: {len(alt_side)}")
print(f"  Avg time gap: {alt_side['time_since_prev'].mean():.2f}s")

print("\n" + "="*70)
print("INTERPRETATION:")
print("="*70)
print("""
CANCEL BEHAVIOR INDICATORS:

1. If rebalance rate is ~50%:
   → They're NOT rebalancing, just taking whatever fills come
   → Orders are independent on each side

2. If rebalance rate is >70%:
   → They're actively rebalancing when exposure drifts
   → May cancel opposite side when one fills

3. If alternating trades are faster than same-side:
   → Suggests linked order pairs (one fills, refresh both)
   
4. If same-side is faster:
   → Same-side orders are being refreshed quickly after fills
   → Independent order management per side

EXPECTED BASED ON PRIOR ANALYSIS:
- Near 50% rebalance rate (no active rebalancing)
- Independent orders on each side
- They don't cancel when one side fills
""")

TEST 9: CANCEL BEHAVIOR ANALYSIS

=== POSITION DRIFT ANALYSIS ===

Market 1766688300:
  Max exposure during window: 100.0%
  Final exposure: 0.1%
  Exposure std dev: 7.5%

Market 1766689200:
  Max exposure during window: 100.0%
  Final exposure: -1.5%
  Exposure std dev: 9.7%

Market 1766690100:
  Max exposure during window: 100.0%
  Final exposure: -3.4%
  Exposure std dev: 13.6%

Market 1766691000:
  Max exposure during window: 100.0%
  Final exposure: -2.8%
  Exposure std dev: 8.9%

Market 1766691900:
  Max exposure during window: 100.0%
  Final exposure: -0.1%
  Exposure std dev: 7.5%

Market 1766692800:
  Max exposure during window: 100.0%
  Final exposure: -0.8%
  Exposure std dev: 12.9%

SEQUENCE ANALYSIS: WHAT FOLLOWS BIG FILLS?

=== After large UP fill (≥20 shares): ===
Next trade is Up: 388
Next trade is Down: 268
Avg time to next trade: 1.32s

=== After large DOWN fill (≥20 shares): ===
Next trade is Up: 246
Next trade is Down: 425
Avg time to next trade: 0.95s

REBALANCING 

KeyError: 'Column not found: net_exposure'

In [7]:
"""
TEST 10: IMPLEMENTATION REQUIREMENTS SUMMARY
============================================
This script runs all tests and produces a summary of findings
with specific implementation recommendations.
"""

import pandas as pd
import numpy as np
import os

markets = ["1766688300", "1766689200", "1766690100", "1766691000", "1766691900", "1766692800"]

# Load all data
all_trades = []
for market in markets:
    try:
        trades = pd.read_parquet(f"DataCollection/BotTrades/{market}-trades.parquet")
        trades['market'] = market
        all_trades.append(trades)
    except:
        pass

trades = pd.concat(all_trades, ignore_index=True)
trades = trades.sort_values('unix_timestamp')

print("="*70)
print("IMPLEMENTATION REQUIREMENTS SUMMARY")
print("="*70)

print("""
Based on analysis of 5,037 trades across 6 market windows, here are the
specific parameters and behaviors you need to implement:
""")

# ============================================
# ORDER SIZING
# ============================================
print("\n" + "="*70)
print("1. ORDER SIZING")
print("="*70)

max_size = trades['size'].max()
mode_size = trades['size'].mode().iloc[0]
mean_size = trades['size'].mean()

print(f"""
FINDINGS:
- Maximum trade size: {max_size} shares
- Most common size: {mode_size} shares  
- Average size: {mean_size:.2f} shares
- 1:1 transaction:trade ratio confirms deliberate 28-share cap

IMPLEMENTATION:
- Set MAX_ORDER_SIZE = 28
- This is ~$10 at typical prices
- Do NOT vary size based on market conditions
""")

# ============================================
# ORDER FREQUENCY
# ============================================
print("\n" + "="*70)
print("2. ORDER FREQUENCY / REFRESH RATE")
print("="*70)

time_gaps = trades['unix_timestamp'].diff()
median_gap = time_gaps.median()
mean_gap = time_gaps.mean()

trades_per_window = len(trades) / len(markets)
trades_per_minute = trades_per_window / 15

print(f"""
FINDINGS:
- Median time between fills: {median_gap:.2f} seconds
- Mean time between fills: {mean_gap:.2f} seconds
- Trades per 15-min window: {trades_per_window:.0f}
- Trades per minute: {trades_per_minute:.1f}

IMPLEMENTATION:
- Post orders continuously, refresh immediately after fill
- Target ~50-60 fills per minute when market is active
- Both sides should have orders resting at all times
""")

# ============================================
# POSITION LIMITS
# ============================================
print("\n" + "="*70)
print("3. POSITION LIMITS")
print("="*70)

# Calculate max position per market
max_position = 0
for market in markets:
    m = trades[trades['market'] == market]
    up = m[m['outcome'] == 'Up']['size'].sum()
    down = m[m['outcome'] == 'Down']['size'].sum()
    max_position = max(max_position, up, down)

avg_up = trades[trades['outcome'] == 'Up'].groupby('market')['size'].sum().mean()
avg_down = trades[trades['outcome'] == 'Down'].groupby('market')['size'].sum().mean()
avg_total_volume = trades.groupby('market')['size'].sum().mean()

print(f"""
FINDINGS:
- Average Up shares per window: {avg_up:.0f}
- Average Down shares per window: {avg_down:.0f}
- Max single-side position observed: {max_position:.0f}
- Total volume per window: {avg_total_volume:.0f} shares

IMPLEMENTATION:
- Allocate ~$7,000 per market window
- No hard position limit observed (they keep trading until settlement)
- Net exposure typically stays within ±3%
""")

# ============================================
# ORDER PLACEMENT
# ============================================
print("\n" + "="*70)
print("4. ORDER PLACEMENT LOGIC")
print("="*70)

print("""
FINDINGS:
- 53-58% of fills are maker (limit orders hit by market orders)
- Average fill price is below best ask (they don't cross spread)
- They post at or near best bid

IMPLEMENTATION:
- Order type: GTC (Good-Til-Cancelled) limit orders
- Price: At or slightly inside best bid
- Post on BOTH sides simultaneously
- Do NOT use market orders or cross the spread

PSEUDOCODE:
```python
up_order = {
    'side': 'BUY',
    'outcome': 'UP',
    'price': orderbook.up_best_bid,  # or +0.001 to be first
    'size': 28,
    'type': 'GTC'
}
down_order = {
    'side': 'BUY',
    'outcome': 'DOWN', 
    'price': orderbook.down_best_bid,  # or +0.001 to be first
    'size': 28,
    'type': 'GTC'
}
```
""")

# ============================================
# REBALANCING
# ============================================
print("\n" + "="*70)
print("5. REBALANCING LOGIC")
print("="*70)

print("""
FINDINGS:
- Correlation between size and exposure: ~0.03 (none)
- No evidence of active rebalancing when position drifts
- Final positions are 1-3% unbalanced (they accept this)

IMPLEMENTATION:
- Do NOT actively rebalance
- Keep posting same-size orders on both sides
- Balance emerges naturally from continuous two-sided quoting
- Accept small directional tilt at settlement

DO NOT:
- Size up on one side when underweight
- Cancel one side when the other fills
- Cross spread to "catch up" on lagging side
""")

# ============================================
# STALE QUOTE HANDLING
# ============================================
print("\n" + "="*70)
print("6. STALE QUOTE HANDLING")
print("="*70)

print("""
FINDINGS:
- Unable to determine exact cancel threshold from fill data
- Most fills happen at or near current bid
- Some fills appear at prices 1-2% from current bid

IMPLEMENTATION (CONSERVATIVE RECOMMENDATION):
- Cancel and requote if market moves >2% from your order price
- Or: Let GTC orders rest and accept some adverse selection
- The 28-share size limits damage from stale fills

PSEUDOCODE:
```python
if abs(current_bid - my_order_price) / my_order_price > 0.02:
    cancel_order(my_order_id)
    post_new_order(price=current_bid, size=28)
```
""")

# ============================================
# RISK CONTROLS
# ============================================
print("\n" + "="*70)
print("7. RISK CONTROLS")
print("="*70)

print("""
OBSERVED BEHAVIOR:
- No evidence of position limits
- No evidence of exposure-based pausing
- No evidence of time-based size reduction (except slight Q4 reduction)

RECOMMENDED RISK CONTROLS (not observed, but prudent):
- MAX_NET_EXPOSURE: ±500 shares (pause if exceeded)
- MAX_POSITION_PER_SIDE: 10,000 shares
- CIRCUIT_BREAKER: Stop if >5 consecutive adverse fills
- TIME_CUTOFF: Stop posting new orders 30s before settlement
""")

# ============================================
# TIMING
# ============================================
print("\n" + "="*70)
print("8. TIMING REQUIREMENTS")
print("="*70)

print("""
FINDINGS:
- 69.6% of trades at same timestamp as previous (batched fills)
- Trades spread evenly throughout 15-min window
- No evidence of sub-second latency requirements

IMPLEMENTATION:
- Polling rate: 500ms-1000ms is sufficient
- No co-location or HFT infrastructure needed
- Focus on reliability over speed
- Ensure you can detect fills and repost quickly
""")

# ============================================
# API REQUIREMENTS
# ============================================
print("\n" + "="*70)
print("9. API REQUIREMENTS")
print("="*70)

print("""
REQUIRED CAPABILITIES:
1. Read orderbook (best bid/ask for Up and Down)
2. Post limit orders (GTC)
3. Cancel orders
4. Get fill notifications
5. Track open positions

POLYMARKET CLOB API:
- WebSocket for real-time orderbook updates
- REST for order submission
- WebSocket for fill events
- Rate limits: TBD (test in sandbox)

KEY ENDPOINTS:
- GET /book (orderbook state)
- POST /order (submit order)
- DELETE /order/{id} (cancel)
- WebSocket /fills (fill notifications)
""")

# ============================================
# ECONOMICS SUMMARY
# ============================================
print("\n" + "="*70)
print("10. ECONOMICS SUMMARY")
print("="*70)

print(f"""
PER-WINDOW ECONOMICS:
- Volume: ~{avg_total_volume:.0f} shares
- Pairs: ~{min(avg_up, avg_down):.0f}
- Edge per pair: $0.0085 (0.85%)
- Gross profit: $50-90
- Gas costs (estimated): $5-15 (Polygon)
- Net profit: $40-75 per window

SCALING:
- 4 windows/hour × 24 hours = 96 windows/day
- At $50 net/window = $4,800/day
- Capital required: ~$7,000 per window

RISKS:
- Adverse selection (getting picked off)
- Legging risk (fills not simultaneous)
- Gas spikes on Polygon
- API rate limits
- Settlement risk (position at expiry)
""")

print("\n" + "="*70)
print("END OF IMPLEMENTATION SUMMARY")
print("="*70)

IMPLEMENTATION REQUIREMENTS SUMMARY

Based on analysis of 5,037 trades across 6 market windows, here are the
specific parameters and behaviors you need to implement:


1. ORDER SIZING

FINDINGS:
- Maximum trade size: 28.0 shares
- Most common size: 28.0 shares  
- Average size: 12.62 shares
- 1:1 transaction:trade ratio confirms deliberate 28-share cap

IMPLEMENTATION:
- Set MAX_ORDER_SIZE = 28
- This is ~$10 at typical prices
- Do NOT vary size based on market conditions


2. ORDER FREQUENCY / REFRESH RATE

FINDINGS:
- Median time between fills: 0.00 seconds
- Mean time between fills: 1.07 seconds
- Trades per 15-min window: 840
- Trades per minute: 56.0

IMPLEMENTATION:
- Post orders continuously, refresh immediately after fill
- Target ~50-60 fills per minute when market is active
- Both sides should have orders resting at all times


3. POSITION LIMITS

FINDINGS:
- Average Up shares per window: 5234
- Average Down shares per window: 5357
- Max single-side position observed: 6839
- 