# Stablecoin Depeg Prediction - Data Visualization

This notebook visualizes the data collected for USDT and USDC stablecoin depeg prediction.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Project paths
PROJECT_ROOT = Path('.').resolve().parent
DATA_DIR = PROJECT_ROOT / 'data'
PROCESSED_DIR = DATA_DIR / 'processed'
RAW_DIR = DATA_DIR / 'raw'

## 1. Load Data

In [None]:
# Load combined stablecoin data
df = pd.read_csv(PROCESSED_DIR / 'combined_stablecoins_daily.csv')
df['date'] = pd.to_datetime(df['date'])

# Separate by coin
usdt = df[df['coin'] == 'usdt'].copy()
usdc = df[df['coin'] == 'usdc'].copy()

print(f"Combined data shape: {df.shape}")
print(f"USDT rows: {len(usdt)}, USDC rows: {len(usdc)}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
df.head()

## 2. Price Deviation from Peg

The implied price is calculated from BTC/stablecoin trading pairs. A value of 1.0 means perfect peg.

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# USDT implied price
ax1 = axes[0]
ax1.plot(usdt['date'], usdt['implied_price'], color='green', alpha=0.7, linewidth=0.8)
ax1.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='Peg ($1.00)')
ax1.axhline(y=1.005, color='orange', linestyle=':', alpha=0.5, label='0.5% threshold')
ax1.axhline(y=0.995, color='orange', linestyle=':', alpha=0.5)
ax1.set_ylabel('Implied Price ($)')
ax1.set_title('USDT Implied Price Over Time')
ax1.legend(loc='upper right')
ax1.set_ylim(0.98, 1.02)

# USDC implied price
ax2 = axes[1]
ax2.plot(usdc['date'], usdc['implied_price'], color='blue', alpha=0.7, linewidth=0.8)
ax2.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='Peg ($1.00)')
ax2.axhline(y=1.005, color='orange', linestyle=':', alpha=0.5, label='0.5% threshold')
ax2.axhline(y=0.995, color='orange', linestyle=':', alpha=0.5)
ax2.set_ylabel('Implied Price ($)')
ax2.set_xlabel('Date')
ax2.set_title('USDC Implied Price Over Time')
ax2.legend(loc='upper right')
ax2.set_ylim(0.98, 1.02)

plt.tight_layout()
plt.show()

In [None]:
# Calculate deviation statistics
df['price_deviation'] = df['implied_price'] - 1.0
df['abs_deviation'] = df['price_deviation'].abs()

print("=== Price Deviation Statistics ===")
for coin in ['usdt', 'usdc']:
    coin_data = df[df['coin'] == coin]
    print(f"\n{coin.upper()}:")
    print(f"  Mean deviation: {coin_data['price_deviation'].mean()*100:.4f}%")
    print(f"  Std deviation:  {coin_data['price_deviation'].std()*100:.4f}%")
    print(f"  Max positive:   {coin_data['price_deviation'].max()*100:.4f}%")
    print(f"  Max negative:   {coin_data['price_deviation'].min()*100:.4f}%")
    print(f"  Days > 0.5%:    {(coin_data['abs_deviation'] > 0.005).sum()}")
    print(f"  Days > 1.0%:    {(coin_data['abs_deviation'] > 0.01).sum()}")

## 3. Depeg Events Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram of deviations
ax1 = axes[0]
ax1.hist(usdt['implied_price'] - 1.0, bins=50, alpha=0.6, label='USDT', color='green')
ax1.hist(usdc['implied_price'] - 1.0, bins=50, alpha=0.6, label='USDC', color='blue')
ax1.axvline(x=0.005, color='orange', linestyle='--', label='+0.5% threshold')
ax1.axvline(x=-0.005, color='orange', linestyle='--', label='-0.5% threshold')
ax1.set_xlabel('Price Deviation from $1.00')
ax1.set_ylabel('Frequency')
ax1.set_title('Distribution of Price Deviations')
ax1.legend()

# Box plot comparison
ax2 = axes[1]
deviation_data = [usdt['implied_price'] - 1.0, usdc['implied_price'] - 1.0]
bp = ax2.boxplot(deviation_data, labels=['USDT', 'USDC'], patch_artist=True)
bp['boxes'][0].set_facecolor('green')
bp['boxes'][1].set_facecolor('blue')
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax2.set_ylabel('Price Deviation from $1.00')
ax2.set_title('Deviation Distribution by Stablecoin')

plt.tight_layout()
plt.show()

## 4. BTC Price and Stablecoin Behavior

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(14, 10), sharex=True)

# BTC price (using USDT data)
ax1 = axes[0]
ax1.plot(usdt['date'], usdt['close'], color='orange', linewidth=1)
ax1.set_ylabel('BTC Price ($)')
ax1.set_title('BTC/USDT Price')
ax1.set_yscale('log')

# Trading volume
ax2 = axes[1]
ax2.bar(usdt['date'], usdt['quote_volume']/1e6, alpha=0.6, color='green', width=1, label='USDT')
ax2.bar(usdc['date'], usdc['quote_volume']/1e6, alpha=0.6, color='blue', width=1, label='USDC')
ax2.set_ylabel('Volume (millions $)')
ax2.set_title('Daily Trading Volume')
ax2.legend()

# Spread (volatility proxy)
ax3 = axes[2]
ax3.plot(usdt['date'], usdt['spread_proxy']*100, color='green', alpha=0.7, linewidth=0.8, label='USDT')
ax3.plot(usdc['date'], usdc['spread_proxy']*100, color='blue', alpha=0.7, linewidth=0.8, label='USDC')
ax3.set_ylabel('Spread (%)')
ax3.set_xlabel('Date')
ax3.set_title('Daily High-Low Spread (Volatility Proxy)')
ax3.legend()

plt.tight_layout()
plt.show()

## 5. Supply Changes Over Time

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Total circulating supply
ax1 = axes[0]
ax1.plot(usdt['date'], usdt['total_circulating']/1e9, color='green', linewidth=1.5, label='USDT')
ax1.plot(usdc['date'], usdc['total_circulating']/1e9, color='blue', linewidth=1.5, label='USDC')
ax1.set_ylabel('Circulating Supply (billions $)')
ax1.set_title('Total Circulating Supply')
ax1.legend()

# Daily supply change %
ax2 = axes[1]
ax2.plot(usdt['date'], usdt['circulating_change_pct']*100, color='green', alpha=0.7, linewidth=0.8, label='USDT')
ax2.plot(usdc['date'], usdc['circulating_change_pct']*100, color='blue', alpha=0.7, linewidth=0.8, label='USDC')
ax2.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
ax2.set_ylabel('Daily Supply Change (%)')
ax2.set_xlabel('Date')
ax2.set_title('Daily Circulating Supply Change')
ax2.set_ylim(-10, 10)  # Clip extreme values for visibility
ax2.legend()

plt.tight_layout()
plt.show()

## 6. Buy/Sell Pressure

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Buy ratio over time
ax1 = axes[0]
ax1.plot(usdt['date'], usdt['buy_ratio'], color='green', alpha=0.5, linewidth=0.8, label='USDT')
ax1.plot(usdc['date'], usdc['buy_ratio'], color='blue', alpha=0.5, linewidth=0.8, label='USDC')
ax1.axhline(y=0.5, color='red', linestyle='--', alpha=0.5, label='Neutral (0.5)')
ax1.set_ylabel('Buy Ratio')
ax1.set_title('Buy/Sell Pressure (Taker Buy Ratio)')
ax1.legend()
ax1.set_ylim(0.3, 0.7)

# Rolling average
usdt_buy_ma = usdt.set_index('date')['buy_ratio'].rolling(30).mean()
usdc_buy_ma = usdc.set_index('date')['buy_ratio'].rolling(30).mean()

ax2 = axes[1]
ax2.plot(usdt_buy_ma.index, usdt_buy_ma, color='green', linewidth=1.5, label='USDT (30d MA)')
ax2.plot(usdc_buy_ma.index, usdc_buy_ma, color='blue', linewidth=1.5, label='USDC (30d MA)')
ax2.axhline(y=0.5, color='red', linestyle='--', alpha=0.5, label='Neutral (0.5)')
ax2.set_ylabel('Buy Ratio (30d MA)')
ax2.set_xlabel('Date')
ax2.set_title('Smoothed Buy/Sell Pressure')
ax2.legend()

plt.tight_layout()
plt.show()

## 7. Feature Correlations

In [None]:
# Add features for correlation analysis
import sys
sys.path.insert(0, str(PROJECT_ROOT))
from src.features.engineering import create_features

# Create features for combined data
df_features = create_features(df)

# Select key features for correlation
feature_cols = [
    'btc_return_1d', 'btc_volatility_7d', 'btc_drawdown_30d',
    'volume_ratio_7d', 'spread_proxy', 'spread_zscore',
    'buy_ratio', 'supply_change_1d', 'supply_volatility_7d',
    'price_deviation', 'stress_indicator'
]

# Compute correlation matrix
corr_matrix = df_features[feature_cols].corr()

# Plot heatmap
fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
            square=True, linewidths=0.5, ax=ax)
ax.set_title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

## 8. Key Events Timeline

In [None]:
# Define significant events
events = {
    '2020-03-12': 'COVID Crash',
    '2021-05-19': 'China Mining Ban',
    '2022-05-09': 'UST Collapse',
    '2022-11-08': 'FTX Collapse',
    '2023-03-10': 'SVB Crisis (USDC)',
}

fig, ax = plt.subplots(figsize=(14, 6))

# Plot both stablecoins
ax.plot(usdt['date'], (usdt['implied_price'] - 1)*100, color='green', alpha=0.7, 
        linewidth=0.8, label='USDT')
ax.plot(usdc['date'], (usdc['implied_price'] - 1)*100, color='blue', alpha=0.7, 
        linewidth=0.8, label='USDC')

# Mark events
colors = plt.cm.tab10.colors
for i, (date, event) in enumerate(events.items()):
    event_date = pd.to_datetime(date)
    if event_date >= df['date'].min() and event_date <= df['date'].max():
        ax.axvline(x=event_date, color=colors[i], linestyle='--', alpha=0.7)
        ax.text(event_date, ax.get_ylim()[1]*0.9, f' {event}', rotation=90, 
                fontsize=8, va='top', color=colors[i])

ax.axhline(y=0, color='red', linestyle='-', alpha=0.3)
ax.axhline(y=0.5, color='orange', linestyle=':', alpha=0.5)
ax.axhline(y=-0.5, color='orange', linestyle=':', alpha=0.5)

ax.set_ylabel('Price Deviation (%)')
ax.set_xlabel('Date')
ax.set_title('Stablecoin Price Deviations with Key Market Events')
ax.legend(loc='lower left')
ax.set_ylim(-3, 3)

plt.tight_layout()
plt.show()

## 9. Stress Periods Analysis

In [None]:
# Find high-stress periods (high volatility + volume spikes)
threshold = 0.005  # 0.5% deviation

# Identify depeg events
df_features['abs_deviation'] = (df_features['implied_price'] - 1).abs()
df_features['is_depeg'] = df_features['abs_deviation'] > threshold

# Summary by coin
print("=== Depeg Event Summary (>0.5% deviation) ===")
for coin in ['usdt', 'usdc']:
    coin_data = df_features[df_features['coin'] == coin]
    depeg_days = coin_data['is_depeg'].sum()
    total_days = len(coin_data)
    print(f"\n{coin.upper()}:")
    print(f"  Depeg days: {depeg_days} / {total_days} ({depeg_days/total_days*100:.2f}%)")
    
    if depeg_days > 0:
        depeg_events = coin_data[coin_data['is_depeg']]
        print(f"  Max deviation: {depeg_events['abs_deviation'].max()*100:.2f}%")
        print(f"  Mean deviation (during depeg): {depeg_events['abs_deviation'].mean()*100:.2f}%")

In [None]:
# Scatter plot: BTC volatility vs stablecoin deviation
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for ax, coin in zip(axes, ['usdt', 'usdc']):
    coin_data = df_features[df_features['coin'] == coin].dropna()
    
    scatter = ax.scatter(coin_data['btc_volatility_7d']*100, 
                        coin_data['abs_deviation']*100,
                        c=coin_data['volume_ratio_7d'],
                        cmap='viridis', alpha=0.5, s=10)
    
    ax.set_xlabel('BTC 7-day Volatility (%)')
    ax.set_ylabel('Stablecoin Deviation (%)')
    ax.set_title(f'{coin.upper()}: BTC Volatility vs Price Deviation')
    plt.colorbar(scatter, ax=ax, label='Volume Ratio')
    ax.axhline(y=0.5, color='red', linestyle='--', alpha=0.5, label='0.5% threshold')
    ax.legend()

plt.tight_layout()
plt.show()

## 10. Summary Statistics

In [None]:
# Final summary table
summary = pd.DataFrame()

for coin in ['usdt', 'usdc']:
    coin_data = df[df['coin'] == coin]
    stats = {
        'Coin': coin.upper(),
        'Total Days': len(coin_data),
        'Date Range Start': coin_data['date'].min().strftime('%Y-%m-%d'),
        'Date Range End': coin_data['date'].max().strftime('%Y-%m-%d'),
        'Mean Price': f"${coin_data['implied_price'].mean():.4f}",
        'Std Price': f"${coin_data['implied_price'].std():.4f}",
        'Min Price': f"${coin_data['implied_price'].min():.4f}",
        'Max Price': f"${coin_data['implied_price'].max():.4f}",
        'Mean Supply (B)': f"${coin_data['total_circulating'].mean()/1e9:.2f}B",
        'Mean Daily Volume': f"${coin_data['quote_volume'].mean()/1e6:.1f}M",
    }
    summary = pd.concat([summary, pd.DataFrame([stats])], ignore_index=True)

summary.set_index('Coin', inplace=True)
summary.T