# FundingRateDetector - Signal Detection from Funding Rate Transitions

This notebook demonstrates:
1. Loading perpetual futures data from **3 exchanges** (Binance, OKX, Bybit) for the last 3 months
2. Computing **Aggregated Open Interest** across all pairs
3. Building a `FundingRateDetector` that generates **RISE** signals when funding rate transitions from positive series to negative
4. Signal metrics visualization

## 1. Setup and Imports

In [None]:
from __future__ import annotations

import asyncio
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, ClassVar

import aiohttp
import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from signalflow.core import RawData, Signals, RawDataView
from signalflow.core.enums import RawDataType, SignalCategory, SfComponentType
from signalflow.core import sf_component
from signalflow.detector.base import SignalDetector
from signalflow.data.source import (
    BinanceFuturesUsdtLoader,
    OkxFuturesLoader,
    BybitFuturesLoader,
)
from signalflow.data.raw_store import DuckDbRawStore, DuckDbSpotStore
from signalflow.feature import AggregatedOpenInterest, AggregatedOpenInterestMultiSource
from signalflow.analytic.signals import (
    SignalDistributionMetric,
    SignalProfileMetric,
)

## 2. Load Perpetual Data from 3 Exchanges

Download perpetual futures data from Binance, OKX, and Bybit for the last 3 months.

**Data availability:**
- **Binance**: OHLCV + `funding_rate` + `open_interest`
- **OKX**: OHLCV only (funding rate via separate API)
- **Bybit**: OHLCV only (funding rate via separate API)

In [None]:
# Configuration
DATA_DIR = Path("../data")
DATA_DIR.mkdir(exist_ok=True)

# Database paths for each exchange
DB_BINANCE = DATA_DIR / "perpetual_binance.duckdb"
DB_OKX = DATA_DIR / "perpetual_okx.duckdb"
DB_BYBIT = DATA_DIR / "perpetual_bybit.duckdb"

TIMEFRAME = "8h"  # Funding rate is typically every 8 hours

# Date range: last 3 months
END_DATE = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
START_DATE = END_DATE - timedelta(days=90)

print(f"Date range: {START_DATE} to {END_DATE}")
print(f"Timeframe: {TIMEFRAME}")
print(f"Databases: {DB_BINANCE.name}, {DB_OKX.name}, {DB_BYBIT.name}")

In [None]:
async def get_binance_perpetual_pairs() -> list[str]:
    """Fetch all USDT perpetual pairs from Binance."""
    url = "https://fapi.binance.com/fapi/v1/exchangeInfo"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            data = await resp.json()
    return sorted([
        s["symbol"] for s in data["symbols"]
        if s["contractType"] == "PERPETUAL"
        and s["quoteAsset"] == "USDT"
        and s["status"] == "TRADING"
    ])

async def get_okx_perpetual_pairs() -> list[str]:
    """Fetch all USDT perpetual pairs from OKX."""
    url = "https://www.okx.com/api/v5/public/instruments?instType=SWAP"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            data = await resp.json()
    # Convert OKX format (BTC-USDT-SWAP) to standard (BTCUSDT)
    pairs = []
    for inst in data.get("data", []):
        if inst["settleCcy"] == "USDT" and inst["state"] == "live":
            # BTC-USDT-SWAP -> BTCUSDT
            base = inst["instId"].split("-")[0]
            pairs.append(f"{base}USDT")
    return sorted(pairs)

async def get_bybit_perpetual_pairs() -> list[str]:
    """Fetch all USDT perpetual pairs from Bybit."""
    url = "https://api.bybit.com/v5/market/instruments-info?category=linear"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            data = await resp.json()
    return sorted([
        s["symbol"] for s in data.get("result", {}).get("list", [])
        if s["quoteCoin"] == "USDT"
        and s["status"] == "Trading"
        and s["contractType"] == "LinearPerpetual"
    ])

# Get pairs from all exchanges
binance_pairs, okx_pairs, bybit_pairs = await asyncio.gather(
    get_binance_perpetual_pairs(),
    get_okx_perpetual_pairs(),
    get_bybit_perpetual_pairs(),
)

print(f"Binance: {len(binance_pairs)} pairs")
print(f"OKX: {len(okx_pairs)} pairs")
print(f"Bybit: {len(bybit_pairs)} pairs")

# Find common pairs across all exchanges
common_pairs = sorted(set(binance_pairs) & set(okx_pairs) & set(bybit_pairs))
print(f"\nCommon pairs (all 3 exchanges): {len(common_pairs)}")
print(f"Examples: {common_pairs[:10]}")

In [None]:
# Create stores and loaders for each exchange

# Binance - perpetual data with funding_rate and open_interest
store_binance = DuckDbRawStore(
    db_path=DB_BINANCE,
    data_type="perpetual",
    timeframe=TIMEFRAME,
)
loader_binance = BinanceFuturesUsdtLoader(store=store_binance, timeframe=TIMEFRAME)

# OKX - OHLCV only (spot store schema)
store_okx = DuckDbSpotStore(db_path=DB_OKX, timeframe=TIMEFRAME)
loader_okx = OkxFuturesLoader(store=store_okx, timeframe=TIMEFRAME)

# Bybit - OHLCV only (spot store schema)
store_bybit = DuckDbSpotStore(db_path=DB_BYBIT, timeframe=TIMEFRAME)
loader_bybit = BybitFuturesLoader(store=store_bybit, timeframe=TIMEFRAME)

print("Loaders initialized:")

In [None]:
# Download data from all exchanges (skip if already exists)
# Using common_pairs to ensure we have data for same pairs across all exchanges

async def download_all():
    tasks = []
    
    if not DB_BINANCE.exists():
        print("Downloading Binance data...")
        tasks.append(loader_binance.download(
            pairs=common_pairs,
            start=START_DATE,
            end=END_DATE,
            fill_gaps=True,
        ))
    else:
        print(f"Binance: Using existing data from {DB_BINANCE.name}")
    
    if not DB_OKX.exists():
        print("Downloading OKX data...")
        tasks.append(loader_okx.download(
            pairs=common_pairs,
            start=START_DATE,
            end=END_DATE,
            fill_gaps=True,
        ))
    else:
        print(f"OKX: Using existing data from {DB_OKX.name}")
    
    if not DB_BYBIT.exists():
        print("Downloading Bybit data...")
        tasks.append(loader_bybit.download(
            pairs=common_pairs,
            start=START_DATE,
            end=END_DATE,
            fill_gaps=True,
        ))
    else:
        print(f"Bybit: Using existing data from {DB_BYBIT.name}")
    
    if tasks:
        await asyncio.gather(*tasks)
        print("\nDownload complete!")

await download_all()

In [None]:
# Load data from all exchanges
# Binance has funding_rate and open_interest
df_binance = store_binance.load_many(pairs=common_pairs, start=START_DATE, end=END_DATE)
df_binance = df_binance.with_columns(pl.lit("binance").alias("source"))

# OKX and Bybit have OHLCV only
df_okx = store_okx.load_many(pairs=common_pairs, start=START_DATE, end=END_DATE)
df_okx = df_okx.with_columns(pl.lit("okx").alias("source"))

df_bybit = store_bybit.load_many(pairs=common_pairs, start=START_DATE, end=END_DATE)
df_bybit = df_bybit.with_columns(pl.lit("bybit").alias("source"))

print(f"Binance: {len(df_binance):,} rows, columns: {df_binance.columns}")
print(f"OKX: {len(df_okx):,} rows, columns: {df_okx.columns}")
print(f"Bybit: {len(df_bybit):,} rows, columns: {df_bybit.columns}")

# Use Binance data as primary (has funding_rate and open_interest)
raw_df = df_binance
print(f"\nUsing Binance as primary source (with funding_rate & open_interest)")
print(f"Total pairs: {raw_df['pair'].n_unique()}")
print(f"Date range: {raw_df['timestamp'].min()} to {raw_df['timestamp'].max()}")

raw_df.head(5)

In [None]:
# Check funding rate statistics
print("Funding Rate Statistics:")
raw_df.select(
    pl.col("funding_rate").mean().alias("mean"),
    pl.col("funding_rate").std().alias("std"),
    pl.col("funding_rate").min().alias("min"),
    pl.col("funding_rate").max().alias("max"),
    pl.col("funding_rate").quantile(0.25).alias("q25"),
    pl.col("funding_rate").quantile(0.75).alias("q75"),
)

print("\nOpen Interest Statistics:")
raw_df.select(
    pl.col("open_interest").mean().alias("mean"),
    pl.col("open_interest").std().alias("std"),
    pl.col("open_interest").min().alias("min"),
    pl.col("open_interest").max().alias("max"),
)

## 2.1 Aggregated Open Interest Feature

Calculate market-wide aggregated open interest from Binance (the only source with OI in klines).

For multi-exchange analysis, we also compute **aggregated volume** across all 3 exchanges.

In [None]:
# Aggregated Open Interest from Binance
agg_oi_feature = AggregatedOpenInterest(
    zscore_window=21,  # ~7 days at 8h timeframe
    include_pair_count=True,
)

# Add aggregated OI to Binance data
raw_df_with_agg_oi = agg_oi_feature.compute(raw_df)

print("Aggregated OI columns added:")
print([c for c in raw_df_with_agg_oi.columns if "agg_oi" in c or c == "n_pairs"])

# Show sample
raw_df_with_agg_oi.select([
    "pair", "timestamp", "close", "funding_rate", "open_interest",
    "agg_oi", "agg_oi_change", "agg_oi_zscore"
]).head(10)

In [None]:
# Extract unique aggregated OI time series for visualization
agg_oi_ts = (
    raw_df_with_agg_oi
    .select(["timestamp", "agg_oi", "agg_oi_change", "agg_oi_zscore"])
    .unique(subset=["timestamp"])
    .sort("timestamp")
)

# Visualize aggregated open interest
fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=(
        "Aggregated Open Interest (All Pairs)",
        "Aggregated OI Change (%)",
        "Aggregated OI Z-Score"
    ),
    shared_xaxes=True,
    vertical_spacing=0.08,
)

# Raw aggregated OI
fig.add_trace(
    go.Scatter(
        x=agg_oi_ts["timestamp"].to_list(),
        y=agg_oi_ts["agg_oi"].to_list(),
        mode="lines",
        name="Agg OI",
        line=dict(color="#2171b5", width=1.5),
    ),
    row=1, col=1
)

# OI change
fig.add_trace(
    go.Scatter(
        x=agg_oi_ts["timestamp"].to_list(),
        y=(agg_oi_ts["agg_oi_change"] * 100).fill_null(0).to_list(),
        mode="lines",
        name="OI Change %",
        line=dict(color="#41ab5d", width=1),
    ),
    row=2, col=1
)

# Z-score with threshold lines
fig.add_trace(
    go.Scatter(
        x=agg_oi_ts["timestamp"].to_list(),
        y=agg_oi_ts["agg_oi_zscore"].fill_null(0).to_list(),
        mode="lines",
        name="Z-Score",
        line=dict(color="#d94801", width=1),
    ),
    row=3, col=1
)
fig.add_hline(y=2, line_dash="dash", line_color="red", row=3, col=1)
fig.add_hline(y=-2, line_dash="dash", line_color="green", row=3, col=1)

fig.update_layout(
    height=700,
    title="Market-Wide Aggregated Open Interest",
    showlegend=False,
)
fig.update_yaxes(title_text="OI (USD)", row=1, col=1)
fig.update_yaxes(title_text="Change %", row=2, col=1)
fig.update_yaxes(title_text="Z-Score", row=3, col=1)
fig.show()

### Multi-Exchange Aggregated Volume

Since OKX and Bybit don't provide OI in klines API, we compute aggregated volume across all 3 exchanges as an alternative market sentiment indicator.

In [None]:
# Combine OHLCV from all 3 exchanges for volume analysis
# Normalize columns to common schema
common_cols = ["pair", "timestamp", "open", "high", "low", "close", "volume", "source"]

df_all_exchanges = pl.concat([
    df_binance.select([c for c in common_cols if c in df_binance.columns]),
    df_okx.select([c for c in common_cols if c in df_okx.columns]),
    df_bybit.select([c for c in common_cols if c in df_bybit.columns]),
], how="diagonal")

print(f"Combined data: {len(df_all_exchanges):,} rows")
print(f"Sources: {df_all_exchanges['source'].unique().to_list()}")

# Compute aggregated volume per timestamp across all exchanges
agg_volume = (
    df_all_exchanges
    .group_by("timestamp")
    .agg([
        pl.col("volume").sum().alias("agg_volume_total"),
        # Per-exchange volumes
        pl.col("volume").filter(pl.col("source") == "binance").sum().alias("agg_volume_binance"),
        pl.col("volume").filter(pl.col("source") == "okx").sum().alias("agg_volume_okx"),
        pl.col("volume").filter(pl.col("source") == "bybit").sum().alias("agg_volume_bybit"),
    ])
    .sort("timestamp")
)

# Add change and z-score
agg_volume = agg_volume.with_columns([
    (pl.col("agg_volume_total") / pl.col("agg_volume_total").shift(1) - 1).alias("agg_volume_change"),
    ((pl.col("agg_volume_total") - pl.col("agg_volume_total").rolling_mean(21)) 
     / pl.col("agg_volume_total").rolling_std(21)).alias("agg_volume_zscore"),
])

print(f"\nAggregated volume time series: {len(agg_volume)} points")
agg_volume.head(10)

In [None]:
# Visualize multi-exchange volume comparison
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=(
        "Aggregated Volume by Exchange",
        "Volume Market Share (%)"
    ),
    shared_xaxes=True,
    vertical_spacing=0.12,
)

# Stacked area chart of volume by exchange
for source, color in [("binance", "#F0B90B"), ("okx", "#000000"), ("bybit", "#F7A600")]:
    col = f"agg_volume_{source}"
    fig.add_trace(
        go.Scatter(
            x=agg_volume["timestamp"].to_list(),
            y=agg_volume[col].fill_null(0).to_list(),
            mode="lines",
            name=source.capitalize(),
            stackgroup="volume",
            line=dict(width=0.5),
            fillcolor=color,
        ),
        row=1, col=1
    )

# Market share over time
for source, color in [("binance", "#F0B90B"), ("okx", "#121212"), ("bybit", "#F7A600")]:
    col = f"agg_volume_{source}"
    share = (agg_volume[col].fill_null(0) / agg_volume["agg_volume_total"] * 100)
    fig.add_trace(
        go.Scatter(
            x=agg_volume["timestamp"].to_list(),
            y=share.to_list(),
            mode="lines",
            name=f"{source.capitalize()} %",
            line=dict(color=color, width=1.5),
        ),
        row=2, col=1
    )

fig.update_layout(
    height=600,
    title="Multi-Exchange Volume Analysis (Binance, OKX, Bybit)",
    showlegend=True,
    legend=dict(x=1.02, y=1),
)
fig.update_yaxes(title_text="Volume (USD)", row=1, col=1)
fig.update_yaxes(title_text="Market Share %", row=2, col=1)
fig.show()

## 3. FundingRateDetector Implementation

**Logic:**
- Track consecutive positive funding rate periods per pair
- Generate **RISE** signal when:
  - Previous `n` funding rates were all positive (overbought longs)
  - Current funding rate turns negative (shorts taking over = potential reversal up)

**Rationale:** When funding switches from positive to negative after a sustained positive streak, it often indicates that overleveraged longs are being squeezed out, creating buying opportunities.

In [None]:
@dataclass
@sf_component(name="funding_rate_detector")
class FundingRateDetector(SignalDetector):
    """Detects potential reversals based on funding rate transitions.
    
    Generates RISE signal when:
    - Previous `min_positive_streak` funding rates were all positive
    - Current funding rate turns negative
    
    This pattern suggests overleveraged longs are exiting, potentially
    creating upward price pressure as shorts cover.
    
    Attributes:
        min_positive_streak: Minimum consecutive positive funding rates
            before transition to negative triggers a signal.
        funding_col: Column name for funding rate data.
    """
    
    component_type: ClassVar[SfComponentType] = SfComponentType.DETECTOR
    signal_category: SignalCategory = SignalCategory.PRICE_DIRECTION
    raw_data_type: RawDataType | str = RawDataType.PERPETUAL
    
    # Detector parameters
    min_positive_streak: int = 3  # Minimum consecutive positive funding rates
    funding_col: str = "funding_rate"
    
    # Signal types
    allowed_signal_types: set[str] | None = field(
        default_factory=lambda: {"rise"}
    )
    
    def preprocess(
        self,
        raw_data_view: RawDataView,
        context: dict[str, Any] | None = None,
    ) -> pl.DataFrame:
        """Extract perpetual data with funding rates."""
        key = (
            self.raw_data_type.value 
            if hasattr(self.raw_data_type, "value") 
            else str(self.raw_data_type)
        )
        df = raw_data_view.to_polars(key)
        return df.sort([self.pair_col, self.ts_col])
    
    def detect(
        self,
        features: pl.DataFrame,
        context: dict[str, Any] | None = None,
    ) -> Signals:
        """Detect funding rate transition signals.
        
        Signal Logic:
        1. Compute whether funding rate is positive for each row
        2. Count consecutive positive funding rates (streak)
        3. Detect when current funding turns negative after streak >= min_positive_streak
        """
        funding = pl.col(self.funding_col)
        pair = pl.col(self.pair_col)
        
        # Step 1: Mark positive funding rates
        df = features.with_columns(
            (funding > 0).cast(pl.Int32).alias("_is_positive"),
            (funding < 0).alias("_is_negative"),
        )
        
        # Step 2: Calculate consecutive positive streak using cumsum trick
        # Reset counter when funding goes non-positive
        df = df.with_columns(
            # Create groups that reset when positive streak breaks
            (pl.col("_is_positive") == 0)
            .cum_sum()
            .over(pair)
            .alias("_streak_group"),
        )
        
        # Count within each streak group
        df = df.with_columns(
            pl.col("_is_positive")
            .cum_sum()
            .over([pair, "_streak_group"])
            .alias("_positive_streak"),
        )
        
        # Step 3: Get previous streak length (before current row)
        df = df.with_columns(
            pl.col("_positive_streak")
            .shift(1)
            .over(pair)
            .fill_null(0)
            .alias("_prev_streak"),
        )
        
        # Step 4: Signal when transitioning from positive streak to negative
        signal_condition = (
            pl.col("_is_negative") &  # Current is negative
            (pl.col("_prev_streak") >= self.min_positive_streak)  # Had enough positive streak
        )
        
        # Build signals DataFrame
        signals_df = (
            df.with_columns(
                pl.when(signal_condition)
                .then(pl.lit("rise"))
                .otherwise(pl.lit(None, dtype=pl.Utf8))
                .alias("signal_type"),
                # Probability based on streak length (longer streak = stronger signal)
                pl.when(signal_condition)
                .then(
                    (pl.col("_prev_streak") / (self.min_positive_streak * 3))
                    .clip(0.5, 1.0)
                )
                .otherwise(pl.lit(None, dtype=pl.Float64))
                .alias("probability"),
                # Include streak length for analysis
                pl.when(signal_condition)
                .then(pl.col("_prev_streak"))
                .otherwise(pl.lit(None, dtype=pl.Int32))
                .alias("streak_length"),
            )
            .filter(pl.col("signal_type").is_not_null())
            .select([
                self.pair_col,
                self.ts_col,
                "signal_type",
                pl.lit(1).alias("signal"),
                "probability",
                "streak_length",
                self.funding_col,  # Include funding rate for analysis
            ])
        )
        
        return Signals(signals_df)

## 4. Run Detection

In [None]:
# Create RawData object
raw_data = RawData(perpetual=raw_df)

# Initialize detector with different streak lengths to compare
detector_n3 = FundingRateDetector(min_positive_streak=3)
detector_n5 = FundingRateDetector(min_positive_streak=5)
detector_n7 = FundingRateDetector(min_positive_streak=7)

# Run detection
signals_n3 = detector_n3.run(raw_data.view())
signals_n5 = detector_n5.run(raw_data.view())
signals_n7 = detector_n7.run(raw_data.view())

print(f"Signals (n=3): {len(signals_n3.df):,}")
print(f"Signals (n=5): {len(signals_n5.df):,}")
print(f"Signals (n=7): {len(signals_n7.df):,}")

In [None]:
# Examine signals
print("Sample signals (n=3):")
signals_n3.df.head(20)

In [None]:
# Signal statistics by pair
print("Top 10 pairs by signal count (n=3):")
signals_n3.df.group_by("pair").len().sort("len", descending=True).head(10)

In [None]:
# Streak length distribution
print("Streak length distribution (n=3):")
signals_n3.df.select(
    pl.col("streak_length").mean().alias("mean"),
    pl.col("streak_length").std().alias("std"),
    pl.col("streak_length").min().alias("min"),
    pl.col("streak_length").max().alias("max"),
    pl.col("streak_length").quantile(0.5).alias("median"),
)

## 5. Signal Metrics

In [None]:
# Use the default signals (n=3)
signals = signals_n3

### 5.1 Signal Distribution Metric

Analyzes how signals are distributed across pairs and over time.

In [None]:
# Signal distribution metric
dist_metric = SignalDistributionMetric(
    n_bars=15,
    rolling_window_minutes=60 * 24,  # 1 day rolling window
    ma_window_hours=24 * 7,  # 1 week moving average
)

dist_computed, dist_ctx = dist_metric.compute(
    raw_data=raw_data,
    signals=signals,
)

print("Distribution Metrics:")
for k, v in dist_computed["quant"].items():
    print(f"  {k}: {v:.2f}" if isinstance(v, float) else f"  {k}: {v}")

In [None]:
# Plot distribution
fig = dist_metric.plot(dist_computed, dist_ctx, raw_data, signals)
fig.update_layout(title="Funding Rate Signal Distribution")
# fig.show()

### 5.2 Signal Profile Metric

Analyzes post-signal price behavior - what happens to price after the signal.

In [None]:
# Signal profile metric - analyze 3 days after signal (8h candles = 9 candles)
# Note: look_ahead is in terms of number of rows/candles
profile_metric = SignalProfileMetric(
    look_ahead=9,  # 9 x 8h = 72 hours = 3 days
    quantiles=(0.25, 0.75),
)

profile_computed, profile_ctx = profile_metric.compute(
    raw_data=raw_data,
    signals=signals,
)

print("Profile Metrics:")
for k, v in profile_computed["quant"].items():
    if isinstance(v, float):
        print(f"  {k}: {v:.4f}")
    else:
        print(f"  {k}: {v}")

In [None]:
# Plot price profile after signals
fig = profile_metric.plot(profile_computed, profile_ctx, raw_data, signals)
fig.update_layout(title="Price Profile After Funding Rate RISE Signals")
# fig.show()

### 5.3 Custom Visualization: Compare Different Streak Lengths

In [None]:
def compute_profile_stats(signals: Signals, raw_data: RawData, look_ahead: int = 9):
    """Compute basic profile statistics for signals."""
    metric = SignalProfileMetric(look_ahead=look_ahead)
    computed, _ = metric.compute(raw_data, signals)
    return computed["quant"]

# Compare different streak lengths
results = []
for n, sig in [(3, signals_n3), (5, signals_n5), (7, signals_n7)]:
    if len(sig.df) > 0:
        stats = compute_profile_stats(sig, raw_data)
        results.append({
            "min_streak": n,
            "n_signals": stats.get("n_signals", 0),
            "final_mean_%": stats.get("final_mean", 0) * 100,
            "final_median_%": stats.get("final_median", 0) * 100,
            "max_uplift_%": stats.get("avg_max_uplift", 0) * 100,
        })

comparison_df = pl.DataFrame(results)
print("Comparison of Different Streak Lengths:")
comparison_df

In [None]:
# Visualize comparison
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Number of Signals", "Average Return After Signal (%)")
)

# Number of signals
fig.add_trace(
    go.Bar(
        x=[f"n={r['min_streak']}" for r in results],
        y=[r["n_signals"] for r in results],
        marker_color="#2171b5",
        name="Signals"
    ),
    row=1, col=1
)

# Returns
fig.add_trace(
    go.Bar(
        x=[f"n={r['min_streak']}" for r in results],
        y=[r["final_mean_%"] for r in results],
        marker_color="#41ab5d",
        name="Mean Return"
    ),
    row=1, col=2
)

fig.update_layout(
    height=400,
    title="FundingRateDetector: Impact of min_positive_streak Parameter",
    showlegend=False,
)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Return %", row=1, col=2)
# fig.show()

### 5.4 Signal Timing Analysis

In [None]:
# Analyze signal timing by day of week and hour
signals_with_time = signals.df.with_columns(
    pl.col("timestamp").dt.weekday().alias("weekday"),
    pl.col("timestamp").dt.hour().alias("hour"),
)

# Weekday distribution
weekday_dist = signals_with_time.group_by("weekday").len().sort("weekday")
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

fig = go.Figure()
fig.add_trace(go.Bar(
    x=[weekday_names[int(r["weekday"])] for r in weekday_dist.to_dicts()],
    y=[r["len"] for r in weekday_dist.to_dicts()],
    marker_color="#2171b5",
))
fig.update_layout(
    title="Signal Distribution by Day of Week",
    xaxis_title="Day",
    yaxis_title="Number of Signals",
    height=400,
)
# fig.show()

## 6. Summary

This notebook demonstrated:

1. **Multi-Exchange Data Loading**: 
   - Fetched perpetual pairs from **Binance, OKX, Bybit**
   - Found common pairs across all 3 exchanges
   - Binance provides `funding_rate` + `open_interest`, OKX/Bybit provide OHLCV

2. **Aggregated Market Features**:
   - **Aggregated Open Interest** from Binance (market-wide positioning)
   - **Aggregated Volume** across all 3 exchanges (market sentiment)

3. **FundingRateDetector**: Generates **RISE** signals when:
   - Token had `n` consecutive positive funding rates (overleveraged longs)
   - Funding rate turns negative (shorts taking control)
   
4. **Signal Metrics**:
   - Distribution analysis: how signals spread across pairs and time
   - Profile analysis: post-signal price behavior
   - Parameter comparison: impact of `min_positive_streak` on signal quality

### Key Insights

- Longer positive funding streaks (higher `n`) produce fewer but potentially higher-quality signals
- Multi-exchange volume analysis reveals market share dynamics
- The transition from positive to negative funding can indicate a shift in market sentiment