In [186]:
import requests
import time
from datetime import datetime
def get_market_by_slug(slug: str) -> dict:
    """Fetch market details directly by slug (no searching)"""
    response = requests.get(
        f"https://gamma-api.polymarket.com/markets",
        params={"slug": slug}
    )
    markets = response.json()
    return markets[0] if markets else None
def get_btc_15m_market(timestamp: int) -> dict:
    """Get BTC 15-min market for a specific timestamp"""
    slug = f"btc-updown-15m-{timestamp}"
    return get_market_by_slug(slug)
def get_next_market_timestamp(current_ts: int) -> int:
    """Calculate next 15-min market timestamp"""
    return current_ts + 900
# Example: Get current market
current_ts = 1766692800
market = get_btc_15m_market(current_ts)
if market:
    print(f"Question: {market['question']}")
    print(f"End Date: {market['endDate']}")
    print(f"Condition ID: {market['conditionId']}")
    print(f"Token IDs: {market['clobTokenIds']}")  # This is what WebSocket needs

    yes_token_id = market['clobTokenIds'][0]
    no_token_id = market['clobTokenIds'][1]

Question: Bitcoin Up or Down - December 25, 3:00PM-3:15PM ET
End Date: 2025-12-25T20:15:00Z
Condition ID: 0x6e2708933d0dc36b5fffb650ebda1152e43cc41ec99edd089f3acd94eb860025
Token IDs: ["53398642787529207982308002576599779120889686331928151735250039510047392627488", "29822429672172354396578021183134197938504920225831222333842740602723777494319"]


In [168]:
import json
import threading
import pandas as pd
import time
import signal
import sys
import atexit
import requests
from datetime import datetime
from pathlib import Path
from websocket import WebSocketApp

# ============ CONFIGURATION ============
TOKEN_ID_UP = "34677096804281061354510426690791417876020897574627293028025660235001537633441"
TOKEN_ID_DOWN = "105199598457062001504908835303365997420467421354232934994696377997100360894990"
MARKET_SLUG = "btc-updown-15m-1766685600"

DATA_DIR = Path("polymarket_data")
DATA_DIR.mkdir(exist_ok=True)
FILENAME = DATA_DIR / "test2.parquet"

# Sampling and save settings
SAMPLE_INTERVAL = 0.1  # seconds between samples
SAVE_INTERVAL = 60     # seconds between saves
MAX_BUFFER_SIZE = 1000 # force save after this many records

# REST API endpoint (confirmed to return accurate prices)
CLOB_API = "https://clob.polymarket.com"

# ============ SHARED STATE ============
prices = {
    TOKEN_ID_UP: {"best_bid": None, "best_ask": None, "last_trade": None},
    TOKEN_ID_DOWN: {"best_bid": None, "best_ask": None, "last_trade": None},
}
data_lock = threading.Lock()
snapshot_buffer = []
last_save_time = time.time()
running = True
ws_instance = None

# ============ REST API PRICE FETCHER ============
def fetch_price(token_id, side):
    """Fetch price from REST API (confirmed accurate)."""
    try:
        resp = requests.get(
            f"{CLOB_API}/price",
            params={"token_id": token_id, "side": side},
            timeout=5
        )
        if resp.status_code == 200:
            data = resp.json()
            return float(data.get("price", 0))
    except:
        pass
    return None

def price_poller():
    """Poll REST API for accurate prices."""
    while running:
        try:
            for token_id in [TOKEN_ID_UP, TOKEN_ID_DOWN]:
                bid = fetch_price(token_id, "BUY")   # Best bid = what buyers offer (you receive when selling)
                ask = fetch_price(token_id, "SELL")  # Best ask = what sellers want (you pay when buying)
                
                if bid is not None and ask is not None:
                    with data_lock:
                        prices[token_id]["best_bid"] = bid
                        prices[token_id]["best_ask"] = ask
            
            time.sleep(0.5)  # Poll every 500ms
        except:
            time.sleep(1)

# ============ DATA PERSISTENCE ============
def save_buffer_to_disk(final=False):
    """Save buffered snapshots to parquet file."""
    global snapshot_buffer, last_save_time
    
    if not snapshot_buffer:
        return
    
    try:
        new_df = pd.DataFrame(snapshot_buffer)
        
        if FILENAME.exists():
            existing_df = pd.read_parquet(FILENAME)
            final_df = pd.concat([existing_df, new_df], ignore_index=True)
        else:
            final_df = new_df
        
        final_df.to_parquet(FILENAME, index=False)
        
        count = len(snapshot_buffer)
        snapshot_buffer = []
        last_save_time = time.time()
        
        if final:
            print(f"[FINAL SAVE] {count} records saved to {FILENAME}")
        
    except Exception as e:
        print(f"[SAVE ERROR] {e}")

def graceful_shutdown(*args):
    """Handle graceful shutdown on signal or exit."""
    global running, ws_instance
    
    running = False
    
    if ws_instance:
        try:
            ws_instance.close()
        except:
            pass
    
    save_buffer_to_disk(final=True)
    
    total_records = 0
    if FILENAME.exists():
        try:
            df = pd.read_parquet(FILENAME)
            total_records = len(df)
        except:
            pass
    
    print(f"[SHUTDOWN] Total records in file: {total_records}")
    sys.exit(0)

# Register shutdown handlers
signal.signal(signal.SIGINT, graceful_shutdown)
signal.signal(signal.SIGTERM, graceful_shutdown)
atexit.register(lambda: save_buffer_to_disk(final=True))

# ============ WEBSOCKET (for last_trade_price) ============
def on_open(ws):
    global ws_instance
    ws_instance = ws
    
    print(f"[CONNECTED] Subscribing to market: {MARKET_SLUG}")
    
    msg = {
        "assets_ids": [TOKEN_ID_UP, TOKEN_ID_DOWN],
        "type": "market"
    }
    ws.send(json.dumps(msg))
    
    def ping():
        while running:
            try:
                ws.send("PING")
                time.sleep(10)
            except:
                break
    threading.Thread(target=ping, daemon=True).start()

def on_message(ws, message):
    if message == "PONG":
        return

    try:
        data = json.loads(message)
        events = data if isinstance(data, list) else [data]

        for event in events:
            event_type = event.get("event_type")
            
            # Capture last trade price from WebSocket (this is accurate)
            if event_type == "last_trade_price":
                asset_id = event.get("asset_id")
                if asset_id in prices:
                    with data_lock:
                        prices[asset_id]["last_trade"] = float(event.get("price", 0))

    except:
        pass

def on_close(ws, close_status_code, close_msg):
    print(f"[DISCONNECTED] Code: {close_status_code}")
    save_buffer_to_disk(final=True)

def on_error(ws, error):
    pass

def run_websocket():
    while running:
        try:
            ws = WebSocketApp(
                "wss://ws-subscriptions-clob.polymarket.com/ws/market",
                on_open=on_open,
                on_message=on_message,
                on_close=on_close,
                on_error=on_error
            )
            ws.run_forever()
            
            if running:
                save_buffer_to_disk()
                time.sleep(2)
        except:
            if running:
                time.sleep(5)

# ============ SAMPLER & SAVER ============
def sampler_and_saver():
    global snapshot_buffer, last_save_time
    
    print(f"[STARTED] Capturing to {FILENAME}")
    print(f"[CONFIG] Sample interval: {SAMPLE_INTERVAL}s, Save interval: {SAVE_INTERVAL}s")
    
    records_since_last_log = 0
    last_log_time = time.time()

    while running:
        now_ts = time.time()

        with data_lock:
            up_bid = prices[TOKEN_ID_UP]["best_bid"]
            up_ask = prices[TOKEN_ID_UP]["best_ask"]
            up_last = prices[TOKEN_ID_UP]["last_trade"]
            down_bid = prices[TOKEN_ID_DOWN]["best_bid"]
            down_ask = prices[TOKEN_ID_DOWN]["best_ask"]
            down_last = prices[TOKEN_ID_DOWN]["last_trade"]

        if up_bid is not None and up_ask is not None:
            record = {
                "timestamp": datetime.fromtimestamp(now_ts),
                "unixtime": now_ts,
                "market_slug": MARKET_SLUG,
                "up_best_bid": up_bid,
                "up_best_ask": up_ask,
                "up_spread": up_ask - up_bid,
                "up_last_trade": up_last,
                "down_best_bid": down_bid,
                "down_best_ask": down_ask,
                "down_spread": (down_ask - down_bid) if down_bid and down_ask else None,
                "down_last_trade": down_last,
                "implied_sum_bid": up_bid + (down_bid or 0),
                "implied_sum_ask": up_ask + (down_ask or 0),
            }
            snapshot_buffer.append(record)
            records_since_last_log += 1

        # Periodic save
        should_save = (
            len(snapshot_buffer) > 0 and 
            (now_ts - last_save_time > SAVE_INTERVAL or len(snapshot_buffer) >= MAX_BUFFER_SIZE)
        )
        
        if should_save:
            save_buffer_to_disk()

        # Status log every 60 seconds
        if now_ts - last_log_time >= 60:
            total = 0
            if FILENAME.exists():
                try:
                    df = pd.read_parquet(FILENAME)
                    total = len(df)
                except:
                    pass
            
            # Show current prices in status
            with data_lock:
                up_b = prices[TOKEN_ID_UP]["best_bid"]
                up_a = prices[TOKEN_ID_UP]["best_ask"]
            
            print(f"[STATUS] +{records_since_last_log} records | Total: {total} | UP bid/ask: {up_b}/{up_a}")
            records_since_last_log = 0
            last_log_time = now_ts

        time.sleep(SAMPLE_INTERVAL)

# ============ EXECUTION ============
if __name__ == "__main__":
    print("=" * 50)
    print("Polymarket Orderbook Capturer")
    print("=" * 50)
    print(f"Market: {MARKET_SLUG}")
    print(f"Output: {FILENAME}")
    print("Press Ctrl+C to stop and savÃ§e")
    print("=" * 50)
    
    # Start REST API price poller
    t_poller = threading.Thread(target=price_poller, daemon=True)
    t_poller.start()
    
    # Start WebSocket for last_trade_price events
    t_ws = threading.Thread(target=run_websocket, daemon=True)
    t_ws.start()
    
    try:
        sampler_and_saver()
    except KeyboardInterrupt:
        graceful_shutdown()
    except Exception as e:
        print(f"[ERROR] {e}")
        graceful_shutdown()


Polymarket Orderbook Capturer
Market: btc-updown-15m-1766685600
Output: polymarket_data/test2.parquet
Press Ctrl+C to stop and savÃ§e
[STARTED] Capturing to polymarket_data/test2.parquet
[CONFIG] Sample interval: 0.1s, Save interval: 60s
[CONNECTED] Subscribing to market: btc-updown-15m-1766685600
[STATUS] +574 records | Total: 574 | UP bid/ask: 0.93/0.95
[STATUS] +581 records | Total: 1155 | UP bid/ask: 0.94/0.95
[STATUS] +581 records | Total: 1155 | UP bid/ask: 0.93/0.94
[STATUS] +581 records | Total: 1737 | UP bid/ask: 0.95/0.96
[STATUS] +581 records | Total: 2318 | UP bid/ask: 0.97/0.98
[STATUS] +581 records | Total: 2900 | UP bid/ask: 0.98/0.99
[STATUS] +570 records | Total: 3482 | UP bid/ask: 0.98/0.99
[STATUS] +566 records | Total: 4052 | UP bid/ask: 0.99/1.0
[STATUS] +564 records | Total: 4618 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[STATUS] +570 records | Total: 5182 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[STATUS] +580 records | Total: 5752 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[STATUS] +582 records | Total: 6333 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[STATUS] +581 records | Total: 6915 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[STATUS] +582 records | Total: 7497 | UP bid/ask: 0.99/1.0


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


[DISCONNECTED] Code: None
[FINAL SAVE] 419 records saved to polymarket_data/test2.parquet
[FINAL SAVE] 0 records saved to polymarket_data/test2.parquet
[SHUTDOWN] Total records in file: 8498


  final_df = pd.concat([existing_df, new_df], ignore_index=True)


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [164]:
import pandas as pd

df = pd.read_parquet("polymarket_data/test.parquet")
df.iloc[900:950]

Unnamed: 0,timestamp,unixtime,market_slug,up_best_bid,up_best_ask,up_spread,up_last_trade,down_best_bid,down_best_ask,down_spread,down_last_trade,implied_sum_bid,implied_sum_ask
900,2025-12-25 10:01:52.717910,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
901,2025-12-25 10:01:52.819044,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
902,2025-12-25 10:01:52.924197,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
903,2025-12-25 10:01:53.027526,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
904,2025-12-25 10:01:53.127958,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
905,2025-12-25 10:01:53.230683,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
906,2025-12-25 10:01:53.335370,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
907,2025-12-25 10:01:53.435992,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
908,2025-12-25 10:01:53.540696,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98
909,2025-12-25 10:01:53.641131,1766686000.0,btc-updown-15m-1766685600,0.63,0.61,-0.02,0.63,0.39,0.37,-0.02,0.39,1.02,0.98


In [174]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import argparse
from datetime import datetime
import time
import json
import os
from pathlib import Path

# Your Polygonscan API key
POLYGONSCAN_API_KEY = "4W4KUG6RPKGX2WBPZQXI9WU2XE5H3P2YKJ"  # Replace with your actual key

# Polymarket's CTF Exchange contract address on Polygon
CTF_EXCHANGE_ADDRESS = "0x4bFb41d5B3570DeFd03C39a9A4D8dE6Bd8B8982E"

DEFAULT_WALLET = "0x6031b6e60c50489f1787e2ebafb0c3cd8c03b6fb"  # Replace with wallet you want to track
DEFAULT_MARKET = "btc-updown-15m-1766686500"  # Replace with market slug or set to None
DEFAULT_MONITOR_INTERVAL = 0.5  # seconds between updates

# Data storage directory
DATA_DIR = Path("polymarket_data")
DATA_DIR.mkdir(exist_ok=True)


def extract_market_slug(market_url):
    """
    Extract the market slug from a Polymarket URL
    Examples:
    - https://polymarket.com/event/bitcoin-up-or-down-december-21-3am-et -> bitcoin-up-or-down-december-21-3am-et
    - bitcoin-up-or-down-december-21-3am-et -> bitcoin-up-or-down-december-21-3am-et
    """
    if 'polymarket.com' in market_url:
        # Extract slug from URL
        parts = market_url.rstrip('/').split('/')
        return parts[-1]
    else:
        # Already a slug
        return market_url.strip()


def get_market_info(market_slug):
    """
    Get market information from Polymarket API to find the condition IDs
    """
    print(f"Fetching market info for: {market_slug}")
    
    # Try the Gamma API to get market details
    url = f"https://gamma-api.polymarket.com/events/{market_slug}"
    
    try:
        response = requests.get(url)
        if response.status_code == 200:
            event_data = response.json()
            
            # Extract condition IDs from all markets in this event
            condition_ids = []
            market_info = {}
            
            for market in event_data.get('markets', []):
                condition_id = market.get('conditionId')
                if condition_id:
                    condition_ids.append(condition_id)
                    market_info[condition_id] = {
                        'question': market.get('question'),
                        'outcomes': market.get('outcomes', []),
                        'slug': market_slug
                    }
            
            print(f"Found {len(condition_ids)} markets in event: {event_data.get('title')}")
            return condition_ids, market_info
        else:
            print(f"Error fetching market info: {response.status_code}")
            return [], {}
    except Exception as e:
        print(f"Error: {e}")
        return [], {}


def fetch_user_transactions(wallet_address):
    """
    Fetch ERC-1155 token transfers for a wallet from Polygonscan
    """
    print(f"Fetching transactions for wallet: {wallet_address}")
    
    url = f"https://api.polygonscan.com/api"
    params = {
        'module': 'account',
        'action': 'token1155tx',
        'address': wallet_address,
        'startblock': 0,
        'endblock': 99999999,
        'sort': 'asc',
        'apikey': POLYGONSCAN_API_KEY
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    if data['status'] == '1':
        print(f"Found {len(data['result'])} transactions")
        return pd.DataFrame(data['result'])
    else:
        print(f"Error: {data['message']}")
        return pd.DataFrame()


def get_current_positions(wallet_address, condition_ids=None, market_filter=None):
    """
    Get current positions using Polymarket's Data API
    Optionally filter by specific condition IDs or market criteria
    """
    print(f"Fetching current positions from Polymarket API...")
    
    url = f"https://data-api.polymarket.com/positions"
    params = {
        'user': wallet_address,
        'limit': 500
    }
    
    try:
        response = requests.get(url, params=params)
        positions = response.json()
        
        # Filter by condition IDs if provided
        if condition_ids:
            positions = [p for p in positions if p.get('conditionId') in condition_ids]
            print(f"Found {len(positions)} positions matching specified condition IDs")
        
        # Additional filtering by market characteristics
        if market_filter:
            original_count = len(positions)
            
            # Filter by event slug if specified
            if 'event_slugs' in market_filter:
                positions = [p for p in positions if p.get('eventSlug') in market_filter['event_slugs']]
            
            # Filter by active/not redeemable (exclude expired markets)
            if market_filter.get('active_only'):
                positions = [p for p in positions if not p.get('redeemable', False)]
            
            # Filter by end date (only future markets)
            if market_filter.get('future_only'):
                now = datetime.now().isoformat()
                positions = [p for p in positions if p.get('endDate', '') > now]
            
            print(f"Filtered from {original_count} to {len(positions)} positions based on criteria")
        
        if not condition_ids and not market_filter:
            print(f"Found {len(positions)} total positions")
        
        return positions
    except Exception as e:
        print(f"Error fetching positions: {e}")
        return []


def save_position_snapshot(wallet_address, positions, market_slugs=None):
    """
    Save a snapshot of positions to a Parquet file with timestamp
    Parquet is best for time-series data - more efficient than CSV, easier than JSON
    """
    if not positions:
        print("No positions to save")
        return
    
    timestamp = datetime.now()
    
    # Create records with timestamp
    records = []
    for pos in positions:
        record = {
            'timestamp': timestamp,
            'wallet_address': wallet_address,
            'market_title': pos['title'],
            'market_slug': pos['slug'],
            'event_slug': pos['eventSlug'],
            'condition_id': pos['conditionId'],
            'outcome': pos['outcome'],
            'outcome_index': pos['outcomeIndex'],
            'shares': pos['size'],
            'avg_price': pos['avgPrice'],
            'current_price': pos['curPrice'],
            'initial_value': pos['initialValue'],
            'current_value': pos['currentValue'],
            'cash_pnl': pos['cashPnl'],
            'percent_pnl': pos['percentPnl'],
            'total_bought': pos['totalBought'],
            'realized_pnl': pos['realizedPnl'],
            'percent_realized_pnl': pos['percentRealizedPnl'],
            'redeemable': pos['redeemable'],
            'end_date': pos['endDate'],
        }
        records.append(record)
    
    df = pd.DataFrame(records)
    
    # Create filename based on wallet and markets
    if market_slugs:
        markets_str = "_".join([slug[:20] for slug in market_slugs[:2]])  # Use first 2 slugs
        filename = f"positions_{wallet_address[:8]}_{markets_str}.parquet"
    else:
        filename = f"positions_{wallet_address[:8]}_all.parquet"
    
    filepath = DATA_DIR / filename
    
    # Append to existing file or create new one
    if filepath.exists():
        existing_df = pd.read_parquet(filepath)
        df = pd.concat([existing_df, df], ignore_index=True)
    
    df.to_parquet(filepath, index=False)
    print(f"Saved snapshot to: {filepath}")
    print(f"Total records in file: {len(df)}")
    
    return filepath


def load_position_history(filepath):
    """
    Load saved position history into a pandas DataFrame
    """
    if not Path(filepath).exists():
        print(f"File not found: {filepath}")
        return None
    
    df = pd.read_parquet(filepath)
    print(f"Loaded {len(df)} records from {filepath}")
    print(f"Time range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    
    return df


def display_positions(positions, market_info=None):
    """
    Display current positions in a readable format
    """
    if not positions:
        print("No active positions found")
        return
    
    print("\n" + "="*80)
    print("CURRENT POSITIONS")
    print("="*80)
    
    for pos in positions:
        print(f"\nMarket: {pos['title']}")
        
        # Show additional market context if available
        if market_info and pos['conditionId'] in market_info:
            info = market_info[pos['conditionId']]
            print(f"Question: {info['question']}")
        
        print(f"Outcome: {pos['outcome']}")
        print(f"Shares: {pos['size']:.2f}")
        print(f"Avg Price: ${pos['avgPrice']:.4f}")
        print(f"Current Price: ${pos['curPrice']:.4f}")
        print(f"Current Value: ${pos['currentValue']:.2f}")
        print(f"P&L: ${pos['cashPnl']:.2f} ({pos['percentPnl']:.2f}%)")
        print(f"Condition ID: {pos['conditionId']}")
        print("-"*80)


def monitor_wallet(wallet_address, interval=60, condition_ids=None, market_info=None, 
                   market_slugs=None, save_data=True, market_filter=None):
    """
    Continuously monitor a wallet's positions and save data
    """
    print(f"Starting real-time monitoring for {wallet_address}")
    # ... (Keep your existing print statements here) ...
    print(f"Updating every {interval} seconds. Press Ctrl+C to stop.\n")
    
    # CHANGE 1: Create a memory buffer and a timer
    position_buffer = [] 
    last_save_time = time.time()
    
    try:
        while True:
            positions = get_current_positions(wallet_address, condition_ids, market_filter)
            display_positions(positions, market_info)
            
            # CHANGE 2: Process data immediately into RAM, but DO NOT save to disk yet.
            # We must do this here to capture the exact timestamp of the fetch.
            if save_data and positions:
                current_ts = datetime.now()
                for pos in positions:
                    # We manually create the record dict here to ensure accurate timestamps
                    record = {
                        'timestamp': current_ts,
                        'wallet_address': wallet_address,
                        'market_title': pos.get('title'),
                        'market_slug': pos.get('slug'),
                        'event_slug': pos.get('eventSlug'),
                        'condition_id': pos.get('conditionId'),
                        'outcome': pos.get('outcome'),
                        'outcome_index': pos.get('outcomeIndex'),
                        'shares': pos.get('size'),
                        'avg_price': pos.get('avgPrice'),
                        'current_price': pos.get('curPrice'),
                        'initial_value': pos.get('initialValue'),
                        'current_value': pos.get('currentValue'),
                        'cash_pnl': pos.get('cashPnl'),
                        'percent_pnl': pos.get('percentPnl'),
                        'total_bought': pos.get('totalBought'),
                        'realized_pnl': pos.get('realizedPnl'),
                        'percent_realized_pnl': pos.get('percentRealizedPnl'),
                        'redeemable': pos.get('redeemable'),
                        'end_date': pos.get('endDate'),
                    }
                    position_buffer.append(record)

            # CHANGE 3: Flush to disk only once every 60 seconds (or on high buffer size)
            # This turns 300 disk writes into 1 disk write.
            if save_data and (time.time() - last_save_time > 60 or len(position_buffer) > 5000):
                if position_buffer:
                    print(f"\nðŸ’¾ Flushing {len(position_buffer)} records to disk...")
                    
                    # Create DataFrame from buffer
                    new_df = pd.DataFrame(position_buffer)
                    
                    # Generate filename (same logic as before)
                    if market_slugs:
                        markets_str = "_".join([slug[:20] for slug in market_slugs[:2]])
                        filename = f"positions_{wallet_address[:8]}_{markets_str}.parquet"
                    else:
                        filename = f"positions_{wallet_address[:8]}_all.parquet"
                    filepath = DATA_DIR / filename

                    # Read existing -> Concat -> Write (The expensive part, now done rarely)
                    if filepath.exists():
                        existing_df = pd.read_parquet(filepath)
                        final_df = pd.concat([existing_df, new_df], ignore_index=True)
                    else:
                        final_df = new_df
                        
                    final_df.to_parquet(filepath, index=False)
                    
                    # Clear buffer and reset timer
                    position_buffer = []
                    last_save_time = time.time()
                    print(f"âœ… Data saved. Total records in file: {len(final_df)}")
            
            # print(f"\nLast updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
            # print(f"Next update in {interval} seconds...")
            time.sleep(interval)

    except KeyboardInterrupt:
        print("\n\nMonitoring stopped by user")
        
        # CHANGE 4: Emergency flush on exit so you don't lose the last minute of data
        if save_data and position_buffer:
            print(f"ðŸ’¾ Saving remaining {len(position_buffer)} records...")
            new_df = pd.DataFrame(position_buffer)
            
            if market_slugs:
                markets_str = "_".join([slug[:20] for slug in market_slugs[:2]])
                filename = f"positions_{wallet_address[:8]}_{markets_str}.parquet"
            else:
                filename = f"positions_{wallet_address[:8]}_all.parquet"
            filepath = DATA_DIR / filename

            if filepath.exists():
                existing_df = pd.read_parquet(filepath)
                final_df = pd.concat([existing_df, new_df], ignore_index=True)
            else:
                final_df = new_df
            
            final_df.to_parquet(filepath, index=False)
            print(f"âœ… Final save complete.")
            
        if save_data:
            print(f"\nData saved to: {DATA_DIR}")

def main(wallet_addresses=None, market_urls=None, skip_leaderboard=False, top_volume=False, 
         top_profit=False, plot=True, latest_price_mode=False, monitor=False,
         interval=60, save_data=True, load_file=None, active_only=False, future_only=False):
    """
    Main function to analyze Polymarket user positions
    """
    
    # Handle loading existing data
    if load_file:
        print(f"Loading data from: {load_file}")
        df = load_position_history(load_file)
        if df is not None:
            print("\nDataFrame Info:")
            print(df.info())
            print("\nFirst few rows:")
            print(df.head())
            print("\nUnique markets:")
            print(df['market_title'].unique())
        return
    
    if not wallet_addresses:
        print("No wallet address provided. Please use --wallets flag.")
        print("\nUsage examples:")
        print("  Monitor active positions in event (filters out expired):")
        print("    python polymarket_tracker.py --wallets 0xADDRESS --markets btc-updown-15m-1766341800 --monitor --active-only")
        print("  Load saved data:")
        print("    python polymarket_tracker.py --load polymarket_data/positions_0x123456_bitcoin.parquet")
        return
    
    # Process market URLs to get condition IDs
    condition_ids = []
    market_info = {}
    market_slugs = []
    
    if market_urls:
        print(f"\nProcessing {len(market_urls)} market URLs...")
        for url in market_urls:
            slug = extract_market_slug(url)
            market_slugs.append(slug)
            cids, minfo = get_market_info(slug)
            condition_ids.extend(cids)
            market_info.update(minfo)
        
        print(f"\nTotal condition IDs found across all events: {len(condition_ids)}")
        if not condition_ids:
            print("Warning: No valid markets found. Will show all positions.")
    
    # Build market filter criteria
    market_filter = None
    if market_slugs or active_only or future_only:
        market_filter = {}
        if market_slugs:
            market_filter['event_slugs'] = market_slugs
        if active_only:
            market_filter['active_only'] = True
        if future_only:
            market_filter['future_only'] = True
    
    for wallet in wallet_addresses:
        print(f"\n{'='*80}")
        print(f"Analyzing wallet: {wallet}")
        if market_slugs:
            print(f"Filtering for event slugs: {market_slugs}")
        if active_only:
            print("Filtering: Active positions only (not expired/redeemed)")
        if future_only:
            print("Filtering: Future markets only")
        print(f"{'='*80}\n")
        
        if monitor:
            # Continuous monitoring mode
            monitor_wallet(wallet, interval, condition_ids, market_info, 
                         market_slugs, save_data, market_filter)
        else:
            # One-time check
            print("Running one-time position check. Use --monitor for continuous updates.\n")
            positions = get_current_positions(wallet, condition_ids, market_filter)
            display_positions(positions, market_info)
            
            # Save snapshot if requested
            if save_data and positions:
                filepath = save_position_snapshot(wallet, positions, market_slugs)
                print(f"\nData saved to: {filepath}")
            
            # Optionally fetch blockchain transaction history
            if not latest_price_mode and POLYGONSCAN_API_KEY != "YOUR_API_KEY_HERE":
                print("\nFetching blockchain transaction history...")
                transactions = fetch_user_transactions(wallet)
                if not transactions.empty:
                    print(f"Total blockchain transactions: {len(transactions)}")
            elif not latest_price_mode:
                print("\nSkipping blockchain history (Polygonscan API key not configured)")
                print("To enable: Set POLYGONSCAN_API_KEY in the script")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Track Polymarket user positions in real-time with data logging',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Run with defaults (monitors default wallet/market)
  python polymarket_tracker.py
  
  # Monitor specific Bitcoin markets
  python polymarket_tracker.py --wallets 0x123... --markets https://polymarket.com/event/bitcoin-up-or-down-december-21 --monitor --interval 30
  
  # Load and analyze saved data
  python polymarket_tracker.py --load polymarket_data/positions_0x6031b6e_bitcoin-up-or-down.parquet
        """
    )
    
    parser.add_argument(
        '--wallets',
        nargs='+',
        default=[DEFAULT_WALLET] if DEFAULT_WALLET else None,
        help=f'List of wallet addresses to track (default: {DEFAULT_WALLET})'
    )
    parser.add_argument(
        '--markets',
        nargs='+',
        default=[DEFAULT_MARKET] if DEFAULT_MARKET else None,
        help=f'List of market URLs or slugs to filter (default: {DEFAULT_MARKET})'
    )
    parser.add_argument(
        '--monitor',
        action='store_true',
        default=True,  # Changed: monitor by default
        help='Enable continuous monitoring mode (default: True)'
    )
    parser.add_argument(
        '--interval',
        type=float,
        default=DEFAULT_MONITOR_INTERVAL,
        help=f'Update interval in seconds for monitor mode (default: {DEFAULT_MONITOR_INTERVAL})'
    )
    parser.add_argument(
        '--active-only',
        action='store_true',
        default=True,  # Changed: filter active by default
        help='Only show active positions (default: True)'
    )
    parser.add_argument(
        '--future-only',
        action='store_true',
        help='Only show positions in markets that have not ended yet'
    )
    parser.add_argument(
        '--no-save',
        action='store_true',
        help='Disable automatic data saving'
    )
    parser.add_argument(
        '--load',
        type=str,
        help='Load and display saved data from a Parquet file'
    )
    parser.add_argument(
        '--skip-leaderboard',
        action='store_true',
        help='Skip leaderboard fetching'
    )
    parser.add_argument(
        '--top-volume',
        action='store_true',
        help='Fetch top volume users'
    )
    parser.add_argument(
        '--top-profit',
        action='store_true',
        help='Fetch top profit users'
    )
    parser.add_argument(
        '--no-plot',
        action='store_true',
        help='Disable plot generation'
    )
    parser.add_argument(
        '--latest-price-mode',
        action='store_true',
        help='Only retrieve the latest prices, no plotting'
    )
    
    args = parser.parse_args()
    
    main(
        wallet_addresses=args.wallets,
        market_urls=args.markets,
        skip_leaderboard=args.skip_leaderboard,
        top_volume=args.top_volume,
        top_profit=args.top_profit,
        plot=not args.no_plot,
        latest_price_mode=args.latest_price_mode,
        monitor=args.monitor,
        interval=args.interval,
        save_data=not args.no_save,
        load_file=args.load,
        active_only=args.active_only,
        future_only=args.future_only
    )

usage: ipykernel_launcher.py [-h] [--wallets WALLETS [WALLETS ...]]
                             [--markets MARKETS [MARKETS ...]] [--monitor]
                             [--interval INTERVAL] [--active-only]
                             [--future-only] [--no-save] [--load LOAD]
                             [--skip-leaderboard] [--top-volume]
                             [--top-profit] [--no-plot] [--latest-price-mode]
ipykernel_launcher.py: error: unrecognized arguments: -f /Users/ryanjain/Library/Jupyter/runtime/kernel-d1fe22d9-57bb-4554-a918-e0329cbea370.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
