# Data Exploration

This notebook demonstrates how to use the framework to load and visualize
Polymarket + Binance data for hourly binary option markets.

In [None]:
# Setup
import os
import sys
from datetime import date, datetime, timedelta

# Add src to path if running from notebooks directory
sys.path.insert(0, os.path.dirname(os.getcwd()))

import pandas as pd
import numpy as np
import plotly.io as pio
pio.renderers.default = 'notebook'

## 1. Configure S3 Access

Set your S3 credentials before loading data.

In [None]:
# Set environment variables (or pass directly to S3Config)
os.environ["S3_ACCESS_KEY"] = "your-access-key"
os.environ["S3_SECRET_KEY"] = "your-secret-key"

# Import after setting env vars
from src.config import get_config, set_config, DataConfig, S3Config

# Optionally customize config
# config = DataConfig(
#     s3=S3Config(
#         bucket="marketdata-archive",
#         endpoint="nbg1.your-objectstorage.com",
#     )
# )
# set_config(config)

## 2. Load a Single Market Session

The `HourlyMarketSession` is the core abstraction. It bundles:
- Polymarket BBO, trades, and order book
- Binance BBO and trades
- Aligned time series
- Market outcome

In [None]:
from src.data import load_session

# Load BTC market for January 18, 2026 at 9am ET
session = load_session(
    asset="BTC",
    market_date=date(2026, 1, 18),
    hour_et=9,  # 9am Eastern Time
    lookback_hours=3,  # Load 3 hours of Binance data before market for vol estimation
)

print(session)
print(f"\nUTC window: {session.utc_start} to {session.utc_end}")
print(f"Lookback starts: {session.lookback_start}")

In [None]:
# Access the aligned DataFrame (Polymarket + Binance joined on ts_recv)
df = session.aligned
print(f"Aligned data shape: {df.shape}")
df.head()

In [None]:
# Check market outcome
outcome = session.outcome
if outcome:
    print(f"Open price: ${outcome.open_price:,.2f}")
    print(f"Close price: ${outcome.close_price:,.2f}")
    print(f"Return: {outcome.return_pct:+.4f}%")
    print(f"Outcome: {outcome.outcome.upper()}")

In [None]:
from src.viz import plot_session

fig = plot_session(
    session,
    pm_fields=["pm_bid", "pm_ask", "pm_mid", "pm_microprice"],
    bnc_fields=["bnc_mid"],
    show_outcome=True,
)
fig.show()

## 4. Access Raw Data

You can also access the underlying data directly.

In [None]:
# Polymarket BBO
pm_bbo = session.polymarket_bbo
print(f"Polymarket BBO updates: {len(pm_bbo)}")
pm_bbo.head()

In [None]:
# Binance BBO (much more frequent)
bnc_bbo = session.binance_bbo
print(f"Binance BBO updates: {len(bnc_bbo)}")
bnc_bbo.head()

In [None]:
# Order book data (L2 snapshots)
book = session.polymarket_book
print(f"Book snapshots: {len(book)}")
if not book.empty:
    print(f"\nFirst snapshot bid depth: {len(book.iloc[0]['bid_prices'])} levels")
    print(f"First snapshot ask depth: {len(book.iloc[0]['ask_prices'])} levels")

## 5. Visualize Order Book

In [None]:
from src.viz.book import plot_book_depth, plot_book_depth_over_time

if not book.empty:
    # Single snapshot - depth chart
    row = book.iloc[len(book) // 2]  # Middle of the hour
    fig = plot_book_depth(
        bid_prices=row["bid_prices"],
        bid_sizes=row["bid_sizes"],
        ask_prices=row["ask_prices"],
        ask_sizes=row["ask_sizes"],
        title="Book Depth (Mid-Session)",
    )
    fig.show()

In [None]:
# Book depth over time
if not book.empty:
    fig = plot_book_depth_over_time(book, depth=5, sample_interval=20)
    fig.show()

## 6. Compute Volatility

Use Binance data (including lookback period) to estimate volatility.

In [None]:
from src.features.volatility import SimpleRealizedVol

# Get Binance trades including lookback
trades = session.binance_lookback_trades
print(f"Trades in lookback + market period: {len(trades)}")

# Compute realized vol
vol_estimator = SimpleRealizedVol(sample_interval_ms=1000)  # 1-second sampling
vol = vol_estimator.compute(trades, price_col="price")
print(f"\nRealized volatility (annualized): {vol:.2%}")

In [None]:
# Rolling volatility
rolling_vol = vol_estimator.compute_rolling(
    trades, 
    window_seconds=300,  # 5-minute rolling window
    price_col="price",
)

import plotly.express as px
fig = px.line(
    x=pd.to_datetime(rolling_vol.index, unit="ms"),
    y=rolling_vol.values,
    title="5-Minute Rolling Volatility",
    labels={"x": "Time", "y": "Annualized Vol"},
)
fig.show()

## 7. Microstructure Features

In [None]:
from src.features.microstructure import compute_microprice, compute_book_imbalance, compute_spread_bps

df = session.aligned.copy()

# Book imbalance
df["pm_imbalance"] = compute_book_imbalance(df["pm_bid_sz"], df["pm_ask_sz"])

# Spread in bps
df["pm_spread_bps"] = compute_spread_bps(df["pm_bid"], df["pm_ask"])

df[["ts_recv", "pm_bid", "pm_ask", "pm_microprice", "pm_imbalance", "pm_spread_bps"]].describe()

## 8. Load Multiple Sessions

In [None]:
from src.data.session import load_sessions_range

# Load all 9am sessions for 3 days
sessions = load_sessions_range(
    asset="BTC",
    start_date=date(2026, 1, 16),
    end_date=date(2026, 1, 18),
    hours_et=[9],  # Only 9am
    preload=False,  # Don't load data yet
)

print(f"Loaded {len(sessions)} sessions")
for s in sessions:
    print(f"  {s}")

In [None]:
# Compute hourly returns
from src.features.historical import compute_hourly_returns

returns_df = compute_hourly_returns(sessions)
returns_df

## 9. Use a Pricer

The pricer interface is a placeholder for your models.

In [None]:
from src.pricing import Pricer, PricerOutput
from src.pricing.base import MoneynessPricer

# Use the simple moneyness-based pricer
pricer = MoneynessPricer(sensitivity=100)

# Price at a point in time
if session.outcome:
    output = pricer.price(
        time_to_expiry_sec=1800,  # 30 minutes left
        realized_vol=vol,
        current_price=session.outcome.open_price * 1.001,  # Slightly above open
        strike_price=session.outcome.open_price,
    )
    
    print(f"Up probability: {output.up_prob:.2%}")
    print(f"Fair bid: {output.up_fair_bid:.4f}")
    print(f"Fair ask: {output.up_fair_ask:.4f}")

## Next Steps

1. **Build your volatility estimator** - Extend `RealizedVolEstimator` with more sophisticated methods
2. **Implement your pricer** - Create a `Pricer` subclass with your pricing model
3. **Analyze historical patterns** - Use `get_historical_hourly_stats` to find hour-of-day effects
4. **Build a backtester** - Use the aligned data to simulate trading strategies