# Download Funding Rates from CoinGecko

This notebook collects funding rate data from CoinGecko API for backtesting the funding rate arbitrage strategy.

**Exchanges**: Lighter DEX, Extended DEX  
**Tokens**: KAITO, IP, GRASS, ZEC, APT, SUI, TRUMP, LDO, OP, SEI (10 tokens available on both)  
**Storage**: `/Users/tdl321/quants-lab/app/data/cache/funding/`

## 1. Setup & Configuration

In [None]:
import sys
import os
import asyncio
import pandas as pd
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv

# Add core module to path
sys.path.insert(0, '/Users/tdl321/quants-lab')

from core.data_sources.coingecko_funding import CoinGeckoFundingDataSource
from core.data_sources.funding_rate_collector import FundingRateCollector

# Load environment variables
load_dotenv('/Users/tdl321/quants-lab/.env')

print("‚úÖ Imports successful")

In [None]:
# Configuration
API_KEY = os.getenv("COINGECKO_API_KEY")
USER_AGENT = os.getenv("COINGECKO_USER_AGENT", "backtest")

# Exchanges and tokens (based on our validation)
EXCHANGES = ["lighter", "extended"]
TOKENS = ["KAITO", "IP", "GRASS", "ZEC", "APT", "SUI", "TRUMP", "LDO", "OP", "SEI"]

print(f"API Key: {API_KEY[:10] if API_KEY else 'None'}...")
print(f"Exchanges: {', '.join(EXCHANGES)}")
print(f"Tokens: {', '.join(TOKENS)}")

## 2. Validate Exchange & Token Availability

In [None]:
# Test CoinGecko connection
cg = CoinGeckoFundingDataSource(api_key=API_KEY, user_agent=USER_AGENT)

await cg.start()

# Validate exchanges
available, missing = await cg.validate_exchanges(EXCHANGES)

print(f"‚úÖ Available exchanges: {', '.join(available)}")
if missing:
    print(f"‚ùå Missing exchanges: {', '.join(missing)}")

await cg.stop()

## 3. Collect Single Snapshot (Test)

In [None]:
# Initialize collector
collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

# Start session
await collector.cg_source.start()

# Collect single snapshot
snapshot_df = await collector.collect_single_snapshot()

# Display results
if snapshot_df is not None:
    print(f"\n‚úÖ Collected {len(snapshot_df)} funding rates")
    print(f"\nSnapshot preview:")
    display(snapshot_df[['exchange', 'base', 'target', 'funding_rate', 'index']].head(10))
    
    # Calculate spreads
    spreads = collector.calculate_spreads(snapshot_df)
    print(f"\nüìä Spreads:")
    display(spreads)
else:
    print("‚ùå Failed to collect data")

await collector.cg_source.stop()

## 4. Start Historical Data Collection

Choose one of the collection strategies below:

### Option A: Short Test Collection (1 hour, every 10 minutes)

In [None]:
# Test collection: 1 hour with 10-minute intervals
collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

await collector.start_collection(
    duration_hours=1,
    interval_minutes=10
)

### Option B: Daily Collection (24 hours, hourly snapshots)

In [None]:
# Daily collection: 24 hours with hourly snapshots
collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

await collector.start_collection(
    duration_hours=24,
    interval_minutes=60
)

### Option C: Long-Term Collection (30 days, hourly snapshots)

‚ö†Ô∏è **Note**: This will run for 30 days continuously. Run in a background process or server.

In [None]:
# Long-term collection: 30 days with hourly snapshots
# CAUTION: This will run for 30 days!

collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

await collector.start_collection(
    duration_hours=30 * 24,  # 30 days
    interval_minutes=60
)

### Option D: Collect N Snapshots Only

In [None]:
# Collect specific number of snapshots
collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

await collector.start_collection(
    max_snapshots=10,  # Collect 10 snapshots only
    interval_minutes=5  # Every 5 minutes
)

## 5. Load & Analyze Collected Data

In [None]:
# Load all historical data
collector = FundingRateCollector(
    api_key=API_KEY,
    user_agent=USER_AGENT,
    exchanges=EXCHANGES,
    tokens=TOKENS
)

historical_df = collector.load_historical_data()

print(f"üìä Total records: {len(historical_df)}")
print(f"üìÖ Date range: {datetime.fromtimestamp(historical_df['timestamp'].min())} to {datetime.fromtimestamp(historical_df['timestamp'].max())}")
print(f"üè¶ Exchanges: {historical_df['exchange'].nunique()}")
print(f"üí∞ Tokens: {historical_df['base'].nunique()}")

display(historical_df.head(10))

In [None]:
# Data quality check
quality_metrics = collector.validate_data_quality(historical_df)

print("üìä Data Quality Metrics:")
for key, value in quality_metrics.items():
    print(f"  {key}: {value}")

In [None]:
# Calculate spreads over time
spreads_df = collector.calculate_spreads(historical_df)

print(f"\nüìà Spreads calculated for {len(spreads_df)} tokens")
display(spreads_df)

## 6. Visualize Funding Rates Over Time

In [None]:
import matplotlib.pyplot as plt

# Plot funding rates for a specific token
token = "KAITO"
token_data = historical_df[historical_df['base'] == token].copy()
token_data['datetime'] = pd.to_datetime(token_data['timestamp'], unit='s')

fig, ax = plt.subplots(figsize=(14, 6))

for exchange in EXCHANGES:
    ex_data = token_data[token_data['exchange'] == exchange]
    ax.plot(ex_data['datetime'], ex_data['funding_rate'], marker='o', label=exchange)

ax.set_title(f'{token} Funding Rates Over Time')
ax.set_xlabel('Time')
ax.set_ylabel('Funding Rate (%)')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Plot funding rate spreads
if 'extended_lighter_spread' in spreads_df.columns or 'lighter_extended_spread' in spreads_df.columns:
    spread_col = 'extended_lighter_spread' if 'extended_lighter_spread' in spreads_df.columns else 'lighter_extended_spread'
    
    fig, ax = plt.subplots(figsize=(10, 6))
    spreads_df.plot(x='base', y=spread_col, kind='bar', ax=ax)
    ax.set_title('Funding Rate Spreads (Extended vs Lighter)')
    ax.set_xlabel('Token')
    ax.set_ylabel('Spread (%)') 
    ax.axhline(y=0.003, color='r', linestyle='--', label='0.3% threshold')
    ax.legend()
    plt.tight_layout()
    plt.show()

## 7. Export Data for Backtesting

In [None]:
# Save processed spreads data
output_path = Path('/Users/tdl321/quants-lab/app/data/cache/funding/processed')
output_path.mkdir(parents=True, exist_ok=True)

# Save spreads
spreads_file = output_path / f"spreads_{datetime.now().strftime('%Y-%m')}.parquet"
spreads_df.to_parquet(spreads_file, index=False)
print(f"‚úÖ Spreads saved to: {spreads_file}")

# Save full historical data
historical_file = output_path / f"historical_{datetime.now().strftime('%Y-%m')}.parquet"
historical_df.to_parquet(historical_file, index=False)
print(f"‚úÖ Historical data saved to: {historical_file}")

## 8. Collection Metadata

In [None]:
# View collection metadata
metadata = collector.get_metadata()

print("üìã Collection Metadata:")
for key, value in metadata.items():
    print(f"  {key}: {value}")