In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

In [None]:
def load_minute_data(symbol: str, year: int = 2025) -> pd.DataFrame:
    base = Path('/home/klattm/projects/snapper/data/polygon/cache/minute')
    symbol_dir = base / symbol / str(year)
    dfs = []
    for f in sorted(symbol_dir.glob('*.csv')):
        df = pd.read_csv(f, parse_dates=['timestamp'])
        dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    data = data.set_index('timestamp').sort_index()
    return data

spy = load_minute_data('SPY')
btc = load_minute_data('X_BTCUSD')
print(f'SPY: {len(spy):,} rows, {spy.index.min()} to {spy.index.max()}')
print(f'BTC: {len(btc):,} rows, {btc.index.min()} to {btc.index.max()}')

In [None]:
spy_h = spy['close'].resample('1h').ohlc()
btc_h = btc['close'].resample('1h').ohlc()
spy_h['ret'] = spy_h['close'].pct_change()
btc_h['ret'] = btc_h['close'].pct_change()
combined = pd.DataFrame({
    'spy_close': spy_h['close'],
    'btc_close': btc_h['close'],
    'spy_ret': spy_h['ret'],
    'btc_ret': btc_h['ret']
}).dropna()
print(f'Combined hourly: {len(combined):,} rows')
combined.head()

In [None]:
corr_instant = combined['spy_ret'].corr(combined['btc_ret'])
print(f'Instant correlation (SPY vs BTC same hour): {corr_instant:.4f}')
print('\nLag analysis (SPY leads BTC by N hours):')
lag_corrs = {}
for lag in range(0, 25):
    spy_lagged = combined['spy_ret'].shift(lag)
    corr = spy_lagged.corr(combined['btc_ret'])
    lag_corrs[lag] = corr
    if lag <= 12 or lag == 24:
        print(f'  Lag {lag:2d}h: {corr:.4f}')
best_lag = max(lag_corrs, key=lag_corrs.get)
print(f'\nBest lag: {best_lag}h with correlation {lag_corrs[best_lag]:.4f}')

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x=list(lag_corrs.keys()), y=list(lag_corrs.values()), name='Correlation'))
fig.update_layout(
    title='SPY â†’ BTC Lag Correlation (hourly returns)',
    xaxis_title='Lag (hours, SPY leads)',
    yaxis_title='Pearson Correlation',
    height=400
)
fig.show()

In [None]:
combined['hour'] = combined.index.hour
us_open = (combined['hour'] >= 14) & (combined['hour'] < 21)
us_close = combined['hour'] == 21
asia_session = (combined['hour'] >= 0) & (combined['hour'] < 8)
corr_us = combined.loc[us_open, 'spy_ret'].corr(combined.loc[us_open, 'btc_ret'])
corr_asia = combined.loc[asia_session, 'spy_ret'].shift(8).dropna().corr(
    combined.loc[asia_session, 'btc_ret'].dropna()
) if len(combined.loc[asia_session]) > 100 else np.nan
print(f'Correlation during US session (14-21 UTC): {corr_us:.4f}')
print(f'SPY close vs BTC Asia session: analyzing...')

In [None]:
spy_d = spy['close'].resample('1D').ohlc()
btc_d = btc['close'].resample('1D').ohlc()
spy_d['ret'] = spy_d['close'].pct_change()
btc_d['ret'] = btc_d['close'].pct_change()
daily = pd.DataFrame({
    'spy_ret': spy_d['ret'],
    'btc_ret': btc_d['ret']
}).dropna()
print(f'Daily correlation: {daily["spy_ret"].corr(daily["btc_ret"]):.4f}')
print('\nDaily lag analysis:')
for lag in range(0, 6):
    corr = daily['spy_ret'].shift(lag).corr(daily['btc_ret'])
    print(f'  SPY D-{lag} vs BTC D0: {corr:.4f}')

In [None]:
daily['spy_up'] = daily['spy_ret'] > 0
daily['spy_big_up'] = daily['spy_ret'] > 0.01
daily['spy_big_down'] = daily['spy_ret'] < -0.01
daily['btc_next'] = daily['btc_ret'].shift(-1)
print('BTC next day return when SPY:')
print(f"  Up >1%: {daily.loc[daily['spy_big_up'], 'btc_next'].mean()*100:.2f}% (n={daily['spy_big_up'].sum()})")
print(f"  Down <-1%: {daily.loc[daily['spy_big_down'], 'btc_next'].mean()*100:.2f}% (n={daily['spy_big_down'].sum()})")
print(f"  Any up: {daily.loc[daily['spy_up'], 'btc_next'].mean()*100:.2f}%")
print(f"  Any down: {daily.loc[~daily['spy_up'], 'btc_next'].mean()*100:.2f}%")

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=['Same Day', 'SPY Today vs BTC Tomorrow'])
fig.add_trace(go.Scatter(x=daily['spy_ret']*100, y=daily['btc_ret']*100, mode='markers', opacity=0.5, name='Same day'), row=1, col=1)
fig.add_trace(go.Scatter(x=daily['spy_ret']*100, y=daily['btc_next']*100, mode='markers', opacity=0.5, name='Next day'), row=1, col=2)
fig.update_xaxes(title_text='SPY Return %', row=1, col=1)
fig.update_xaxes(title_text='SPY Return %', row=1, col=2)
fig.update_yaxes(title_text='BTC Return %', row=1, col=1)
fig.update_yaxes(title_text='BTC Next Day %', row=1, col=2)
fig.update_layout(height=400, title='SPY vs BTC Daily Returns')
fig.show()

In [None]:
daily['rolling_corr_30d'] = daily['spy_ret'].rolling(30).corr(daily['btc_ret'])
fig = go.Figure()
fig.add_trace(go.Scatter(x=daily.index, y=daily['rolling_corr_30d'], name='30-day rolling correlation'))
fig.add_hline(y=0, line_dash='dash', line_color='gray')
fig.update_layout(title='SPY-BTC 30-Day Rolling Correlation', yaxis_title='Correlation', height=400)
fig.show()