# Data Sanity Check

This notebook demonstrates basic data loading and validation functionality of the Cross-Asset Alpha Engine.

## Prerequisites

1. Ensure you have set your Polygon API key in the `.env` file
2. Install the package: `pip install -e .`
3. Activate the virtual environment: `source .venv/bin/activate`


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, timedelta
import warnings
warnings.filterwarnings('ignore')

# Import Cross-Asset Alpha Engine components
from cross_asset_alpha_engine.data import (
    PolygonClient, 
    DataCache, 
    AssetUniverse,
    load_daily_bars
)
from cross_asset_alpha_engine.utils import setup_logger

# Setup logging
logger = setup_logger("data_check", console_output=True)
print("✅ All imports successful!")


✅ All imports successful!


## 1. Asset Universe Exploration


In [8]:
# Initialize asset universe
universe = AssetUniverse()

# Get universe statistics
stats = universe.get_universe_stats()
print("Asset Universe Statistics:")
for key, value in stats.items():
    print(f"  {key}: {value}")

# Get equity symbols
equity_symbols = universe.get_equity_symbols()
print(f"\nAvailable equity symbols ({len(equity_symbols)}):")
print(equity_symbols[:10])  # Show first 10

# Get regime detection symbols
regime_symbols = universe.get_market_regime_symbols()
print(f"\nRegime detection symbols: {regime_symbols}")

# Get cross-asset symbols by class
cross_asset = universe.get_cross_asset_symbols()
print(f"\nCross-asset symbols by class:")
for asset_class, symbols in cross_asset.items():
    print(f"  {asset_class}: {symbols[:5]}")  # Show first 5 per class


Asset Universe Statistics:
  total_assets: 32
  active_assets: 32
  asset_class_counts: {'equity': 26, 'bond': 3, 'commodity': 3}
  exchange_counts: {'NYSE': 23, 'NASDAQ': 8, 'CBOE': 1}

Available equity symbols (26):
['AAPL', 'AMZN', 'BRK.B', 'GOOGL', 'IWM', 'JNJ', 'JPM', 'META', 'MSFT', 'NVDA']

Regime detection symbols: ['SPY', 'QQQ', 'IWM', 'VIX', 'TLT', 'GLD']

Cross-asset symbols by class:
  equity: ['AAPL', 'AMZN', 'BRK.B', 'GOOGL', 'IWM']
  commodity: ['GLD', 'SLV', 'USO']
  bond: ['HYG', 'IEF', 'TLT']


## 2. Data Loading Test


In [9]:
# Define test parameters - USE THESE DATES
test_symbols = ["SPY", "QQQ", "AAPL"]

# Use specific dates that we know have data
end_date = date(2025, 12, 6)    # Recent Friday with data
start_date = date(2025, 11, 15)  # 3 weeks back

print(f"Loading data for {test_symbols} from {start_date} to {end_date}")

# Load one symbol at a time to avoid rate limits
daily_data = pd.DataFrame()

for symbol in test_symbols:
    print(f"Loading {symbol}...")
    try:
        symbol_data = load_daily_bars([symbol], start_date, end_date, use_cache=True)
        if not symbol_data.empty:
            daily_data = pd.concat([daily_data, symbol_data], ignore_index=True)
            print(f"✅ Loaded {len(symbol_data)} bars for {symbol}")
        else:
            print(f"⚠️ No data for {symbol}")
    except Exception as e:
        print(f"❌ Error loading {symbol}: {e}")
    
    # Small delay to avoid rate limits
    import time
    time.sleep(0.5)

if not daily_data.empty:
    print(f"✅ Successfully loaded {len(daily_data)} total bars")
    print(f"Date range: {daily_data['timestamp'].min()} to {daily_data['timestamp'].max()}")
    print(f"Symbols: {daily_data['symbol'].unique()}")
    
    # Display sample data
    print("\nSample data:")
    print(daily_data.head())
else:
    print("⚠️ No data loaded, using sample data...")
    # Your existing sample data code here

Loading data for ['SPY', 'QQQ', 'AAPL'] from 2025-11-15 to 2025-12-06
Loading SPY...
Fetching SPY daily data from API...
✅ Loaded 14 bars for SPY
Loading QQQ...
Fetching QQQ daily data from API...
✅ Loaded 14 bars for QQQ
Loading AAPL...
Fetching AAPL daily data from API...
✅ Loaded 14 bars for AAPL
✅ Successfully loaded 42 total bars
Date range: 2025-11-17 05:00:00 to 2025-12-05 05:00:00
Symbols: ['SPY' 'QQQ' 'AAPL']

Sample data:
  symbol           timestamp     open    high      low   close       volume  \
0    SPY 2025-11-17 05:00:00  669.700  673.71  662.170  665.67   90398965.0   
1    SPY 2025-11-18 05:00:00  662.095  665.12  655.860  660.08  114410799.0   
2    SPY 2025-11-19 05:00:00  660.780  667.34  658.745  662.63   94650506.0   
3    SPY 2025-11-20 05:00:00  672.910  675.56  651.890  652.53  165142677.0   
4    SPY 2025-11-21 05:00:00  655.050  664.55  650.850  659.03  123955685.0   

       vwap  
0  667.7416  
1  661.2790  
2  662.5886  
3  661.2759  
4  657.6147  
