# Shannon's Demon with Real Market Data

This notebook demonstrates Shannon's Demon using actual market data from various asset classes.

**📚 Learning Path**: This is Part 2 of the tutorial series:
- **01_basic_demo.ipynb** - Basic concepts
- **02_real_data.ipynb** ← You are here (Real market data)
- **03_ml_enhanced.ipynb** - ML-enhanced strategies
- **../demon.ipynb** - Comprehensive analysis (must-see!)

**🎯 Quick Start**: For comprehensive analysis, open `../demon.ipynb` which includes detailed market analysis with 30+ assets

## 1. Setup and Data Loading

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add parent directory to path
sys.path.append('..')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Data directory
DATA_DIR = Path('../data')

In [None]:
# List available data files
data_files = sorted(DATA_DIR.glob('*.csv'))
print(f"Available assets ({len(data_files)}):")
print("=" * 50)

# Group by type
stocks = [f.stem for f in data_files if f.stem.upper() == f.stem and not f.stem.startswith('^')]
etfs = ['SMH', 'SOXL', 'SOXS', 'TQQQ', 'SQQQ', 'TNA', 'TZA', 'LABU', 'LABD', 'UVXY']
crypto = [f.stem for f in data_files if 'USD' in f.stem]
indices = [f.stem for f in data_files if f.stem.startswith('^')]

print(f"\nStocks: {', '.join([s for s in stocks if s not in etfs][:10])}...")
print(f"\nETFs: {', '.join(etfs)}")
print(f"\nCrypto: {', '.join(crypto)}")
print(f"\nIndices: {', '.join(indices)}")

## 2. Load and Prepare Data

In [None]:
def load_asset_data(symbol, start_date=None, end_date=None):
    """
    Load asset data from CSV file.
    """
    file_path = DATA_DIR / f"{symbol}.csv"
    
    if not file_path.exists():
        raise FileNotFoundError(f"Data file for {symbol} not found")
    
    # Load data
    df = pd.read_csv(file_path)
    
    # Parse dates
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.set_index('Date')
    
    # Filter by date range
    if start_date:
        df = df[df.index >= start_date]
    if end_date:
        df = df[df.index <= end_date]
    
    # Sort by date
    df = df.sort_index()
    
    return df

In [None]:
# Load Bitcoin data as example
btc_data = load_asset_data('BTC-USD', start_date='2020-01-01')

print(f"Bitcoin data shape: {btc_data.shape}")
print(f"Date range: {btc_data.index[0]} to {btc_data.index[-1]}")
print(f"\nColumns: {list(btc_data.columns)}")
print(f"\nFirst few rows:")
btc_data.head()

## 3. Implement Shannon's Demon for Real Data

In [None]:
class ShannonsDemon:
    """
    Shannon's Demon rebalancing strategy implementation.
    """
    
    def __init__(self, initial_capital=10000, target_allocation=0.5, 
                 rebalance_method='time', rebalance_frequency=30,
                 rebalance_threshold=0.2, transaction_cost=0.001):
        
        self.initial_capital = initial_capital
        self.target_allocation = target_allocation
        self.rebalance_method = rebalance_method
        self.rebalance_frequency = rebalance_frequency
        self.rebalance_threshold = rebalance_threshold
        self.transaction_cost = transaction_cost
        
        # Results storage
        self.portfolio_values = []
        self.allocations = []
        self.trades = []
        self.dates = []
    
    def should_rebalance(self, current_allocation, day_count):
        """
        Determine if rebalancing is needed.
        """
        if self.rebalance_method == 'time':
            return day_count % self.rebalance_frequency == 0
        
        elif self.rebalance_method == 'threshold':
            deviation = abs(current_allocation - self.target_allocation)
            return deviation > self.rebalance_threshold
        
        return False
    
    def run(self, prices, dates):
        """
        Run the strategy on price data.
        """
        # Initialize portfolio
        risky_value = self.initial_capital * self.target_allocation
        safe_value = self.initial_capital * (1 - self.target_allocation)
        n_shares = risky_value / prices[0]
        
        # Track initial state
        self.portfolio_values = [self.initial_capital]
        self.allocations = [self.target_allocation]
        self.dates = [dates[0]]
        
        # Run strategy
        for i in range(1, len(prices)):
            # Current values
            risky_value = n_shares * prices[i]
            total_value = risky_value + safe_value
            current_allocation = risky_value / total_value
            
            # Store values
            self.portfolio_values.append(total_value)
            self.allocations.append(current_allocation)
            self.dates.append(dates[i])
            
            # Check rebalancing
            if self.should_rebalance(current_allocation, i):
                # Calculate rebalancing trade
                target_risky_value = total_value * self.target_allocation
                risky_trade_value = target_risky_value - risky_value
                shares_traded = risky_trade_value / prices[i]
                
                # Apply transaction costs
                cost = abs(risky_trade_value) * self.transaction_cost
                
                # Update portfolio
                n_shares += shares_traded
                risky_value = n_shares * prices[i]
                safe_value = total_value - risky_value - cost
                
                # Record trade
                self.trades.append({
    'date': dates[i],
                    'price': prices[i],
                    'shares_traded': shares_traded,
                    'value_traded': risky_trade_value,
                    'cost': cost,
                    'allocation_before': current_allocation,
                    'allocation_after': self.target_allocation
                })
        
        return self
    
    def get_results(self):
        """
        Get strategy results as DataFrame.
        """
        results = pd.DataFrame({
            'date': self.dates,
            'portfolio_value': self.portfolio_values,
            'allocation': self.allocations
        })
        results.set_index('date', inplace=True)
        
        # Calculate returns
        results['returns'] = results['portfolio_value'].pct_change()
        results['cumulative_returns'] = (1 + results['returns']).cumprod() - 1
        
        return results

## 4. Test Strategy on Bitcoin

In [None]:
# Prepare data
btc_prices = btc_data['Close'].values
btc_dates = btc_data.index

# Run strategies
# Buy and Hold
initial_capital = 10000
btc_shares = (initial_capital * 0.5) / btc_prices[0]
bh_values = btc_shares * btc_prices + (initial_capital * 0.5)

# Shannon's Demon - Time-based
sd_time = ShannonsDemon(rebalance_method='time', rebalance_frequency=30)
sd_time.run(btc_prices, btc_dates)

# Shannon's Demon - Threshold-based
sd_threshold = ShannonsDemon(rebalance_method='threshold', rebalance_threshold=0.2)
sd_threshold.run(btc_prices, btc_dates)

print("Strategy Results:")
print("=" * 50)
print(f"Buy and Hold final value: ${bh_values[-1]:,.2f}")
print(f"SD Time-based final value: ${sd_time.portfolio_values[-1]:,.2f}")
print(f"SD Threshold-based final value: ${sd_threshold.portfolio_values[-1]:,.2f}")
print(f"\nNumber of trades:")
print(f"  Time-based: {len(sd_time.trades)}")
print(f"  Threshold-based: {len(sd_threshold.trades)}")

In [None]:
# Visualize results
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Portfolio values
ax = axes[0, 0]
ax.plot(btc_dates, bh_values, label='Buy and Hold', linewidth=2)
ax.plot(sd_time.dates, sd_time.portfolio_values, label='SD Time-based', linewidth=2)
ax.plot(sd_threshold.dates, sd_threshold.portfolio_values, label='SD Threshold-based', linewidth=2)
ax.set_title('Portfolio Value Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Portfolio Value ($)')
ax.legend()
ax.grid(True, alpha=0.3)

# Bitcoin price
ax = axes[0, 1]
ax.plot(btc_dates, btc_prices, color='orange', linewidth=2)
ax.set_title('Bitcoin Price')
ax.set_xlabel('Date')
ax.set_ylabel('Price ($)')
ax.grid(True, alpha=0.3)

# Allocation over time
ax = axes[1, 0]
ax.plot(sd_time.dates, np.array(sd_time.allocations) * 100, label='Time-based', alpha=0.7)
ax.plot(sd_threshold.dates, np.array(sd_threshold.allocations) * 100, label='Threshold-based', alpha=0.7)
ax.axhline(50, color='red', linestyle='--', label='Target')

# Mark rebalancing points
for trade in sd_time.trades:
    ax.scatter(trade['date'], trade['allocation_before'] * 100, color='blue', s=30, alpha=0.5)
for trade in sd_threshold.trades:
    ax.scatter(trade['date'], trade['allocation_before'] * 100, color='green', s=30, alpha=0.5)

ax.set_title('Portfolio Allocation Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Bitcoin Allocation (%)')
ax.legend()
ax.grid(True, alpha=0.3)

# Cumulative returns
ax = axes[1, 1]
bh_returns = pd.Series(bh_values).pct_change().fillna(0)
bh_cumret = (1 + bh_returns).cumprod() - 1

sd_time_results = sd_time.get_results()
sd_threshold_results = sd_threshold.get_results()

ax.plot(btc_dates, bh_cumret * 100, label='Buy and Hold', linewidth=2)
ax.plot(sd_time_results.index, sd_time_results['cumulative_returns'] * 100, label='SD Time-based', linewidth=2)
ax.plot(sd_threshold_results.index, sd_threshold_results['cumulative_returns'] * 100, label='SD Threshold-based', linewidth=2)

ax.set_title('Cumulative Returns')
ax.set_xlabel('Date')
ax.set_ylabel('Cumulative Return (%)')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Compare Different Asset Pairs

In [None]:
def test_asset_pair(asset1_symbol, asset2_symbol='CASH', start_date='2020-01-01'):
    """
    Test Shannon's Demon on an asset pair.
    """
    # Load data
    asset1 = load_asset_data(asset1_symbol, start_date=start_date)
    
    if asset2_symbol != 'CASH':
        asset2 = load_asset_data(asset2_symbol, start_date=start_date)
        # Align dates
        common_dates = asset1.index.intersection(asset2.index)
        asset1 = asset1.loc[common_dates]
        asset2 = asset2.loc[common_dates]
        prices = asset1['Close'].values / asset2['Close'].values
    else:
        prices = asset1['Close'].values
    
    dates = asset1.index
    
    # Run strategies
    initial = 10000
    
    # Buy and hold
    shares = (initial * 0.5) / prices[0]
    bh_values = shares * prices + (initial * 0.5)
    bh_return = (bh_values[-1] / bh_values[0] - 1) * 100
    
    # Shannon's Demon
    sd = ShannonsDemon(rebalance_method='threshold', rebalance_threshold=0.2)
    sd.run(prices, dates)
    sd_return = (sd.portfolio_values[-1] / sd.portfolio_values[0] - 1) * 100
    
    return {
        'asset_pair': f"{asset1_symbol}/{asset2_symbol}",
        'bh_return': bh_return,
        'sd_return': sd_return,
        'outperformance': sd_return - bh_return,
        'num_trades': len(sd.trades),
        'volatility': np.std(pd.Series(prices).pct_change()) * np.sqrt(252) * 100
    }

In [None]:
# Test various asset pairs
test_pairs = [
    ('BTC-USD', 'CASH'),
    ('ETH-USD', 'CASH'),
    ('AAPL', 'CASH'),
    ('TSLA', 'CASH'),
    ('TQQQ', 'SQQQ'),  # Leveraged ETF pair
    ('LABU', 'LABD'),  # Biotech bull/bear
]

results = []
for asset1, asset2 in test_pairs:
    try:
        result = test_asset_pair(asset1, asset2)
        results.append(result)
        print(f"Tested {asset1}/{asset2}: SD return = {result['sd_return']:.2f}%")
    except Exception as e:
        print(f"Error testing {asset1}/{asset2}: {e}")

# Create results DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('outperformance', ascending=False)

print("\nAsset Pair Comparison:")
print("=" * 80)
print(results_df.to_string(index=False, float_format='%.2f'))

## 6. Advanced Analysis: Correlation Impact

In [None]:
# Analyze correlation between assets
def analyze_correlation_impact(symbols, start_date='2020-01-01'):
    """
    Analyze how correlation affects Shannon's Demon performance.
    """
    # Load all data
    data = {}
    for symbol in symbols:
        try:
            df = load_asset_data(symbol, start_date=start_date)
            data[symbol] = df['Close']
        except:
            pass
    
    # Create price DataFrame
    prices_df = pd.DataFrame(data)
    
    # Calculate returns
    returns_df = prices_df.pct_change().dropna()
    
    # Calculate correlation matrix
    corr_matrix = returns_df.corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
                square=True, linewidths=1, cbar_kws={"shrink": 0.8})
    plt.title('Asset Correlation Matrix')
    plt.tight_layout()
    plt.show()
    
    return corr_matrix, returns_df

In [None]:
# Analyze correlations
analysis_symbols = ['BTC-USD', 'ETH-USD', 'AAPL', 'TSLA', 'SPY', 'GLD']
corr_matrix, returns_df = analyze_correlation_impact(analysis_symbols)

# Test Shannon's Demon with different correlation levels
correlation_results = []

for i, asset1 in enumerate(analysis_symbols[:-1]):
    for j, asset2 in enumerate(analysis_symbols[i+1:], i+1):
        correlation = corr_matrix.loc[asset1, asset2]
        
        try:
            result = test_asset_pair(asset1, asset2)
            result['correlation'] = correlation
            correlation_results.append(result)
        except:
            pass

# Analyze relationship
if correlation_results:
    corr_df = pd.DataFrame(correlation_results)
    
    plt.figure(figsize=(10, 6))
    plt.scatter(corr_df['correlation'], corr_df['outperformance'], s=100, alpha=0.6)
    
    # Add labels
    for idx, row in corr_df.iterrows():
        plt.annotate(row['asset_pair'], (row['correlation'], row['outperformance']), 
                    fontsize=8, alpha=0.7)
    
    plt.xlabel('Correlation')
    plt.ylabel('Shannon Demon Outperformance (%)')
    plt.title('Impact of Asset Correlation on Shannon Demon Performance')
    plt.grid(True, alpha=0.3)
    plt.axhline(0, color='red', linestyle='--', alpha=0.5)
    plt.show()

## 7. Transaction Cost Analysis

In [None]:
# Test different transaction costs
transaction_costs = [0, 0.0001, 0.0005, 0.001, 0.002, 0.005, 0.01]
cost_results = []

for cost in transaction_costs:
    sd = ShannonsDemon(
        rebalance_method='threshold',
        rebalance_threshold=0.2,
        transaction_cost=cost
    )
    sd.run(btc_prices, btc_dates)
    
    final_return = (sd.portfolio_values[-1] / sd.portfolio_values[0] - 1) * 100
    total_costs = sum(t['cost'] for t in sd.trades)
    
    cost_results.append({
        'transaction_cost_pct': cost * 100,
        'final_return': final_return,
        'total_costs': total_costs,
        'num_trades': len(sd.trades),
        'avg_cost_per_trade': total_costs / len(sd.trades) if sd.trades else 0
    })

cost_df = pd.DataFrame(cost_results)

# Plot impact
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Returns vs transaction cost
ax = axes[0]
ax.plot(cost_df['transaction_cost_pct'], cost_df['final_return'], 'o-', markersize=8)
ax.set_xlabel('Transaction Cost (%)')
ax.set_ylabel('Final Return (%)')
ax.set_title('Impact of Transaction Costs on Returns')
ax.grid(True, alpha=0.3)

# Total costs
ax = axes[1]
ax.bar(cost_df['transaction_cost_pct'], cost_df['total_costs'], alpha=0.7, color='red')
ax.set_xlabel('Transaction Cost (%)')
ax.set_ylabel('Total Costs Paid ($)')
ax.set_title('Total Transaction Costs')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Transaction Cost Analysis:")
print("=" * 70)
print(cost_df.to_string(index=False, float_format='%.2f'))

## 8. Performance Metrics

In [None]:
def calculate_performance_metrics(values, periods_per_year=252):
    """
    Calculate comprehensive performance metrics.
    """
    returns = pd.Series(values).pct_change().dropna()
    
    # Annual return
    total_return = values[-1] / values[0] - 1
    n_years = len(values) / periods_per_year
    annual_return = (1 + total_return) ** (1 / n_years) - 1
    
    # Volatility
    volatility = returns.std() * np.sqrt(periods_per_year)
    
    # Sharpe ratio (assuming 2% risk-free rate)
    sharpe_ratio = (annual_return - 0.02) / volatility if volatility > 0 else 0
    
    # Maximum drawdown
    cumulative = (1 + returns).cumprod()
    running_max = cumulative.expanding().max()
    drawdown = (cumulative - running_max) / running_max
    max_drawdown = drawdown.min()
    
    # Calmar ratio
    calmar_ratio = annual_return / abs(max_drawdown) if max_drawdown != 0 else 0
    
    # Win rate
    win_rate = (returns > 0).sum() / len(returns)
    
    # Sortino ratio (downside deviation)
    downside_returns = returns[returns < 0]
    downside_std = downside_returns.std() * np.sqrt(periods_per_year)
    sortino_ratio = (annual_return - 0.02) / downside_std if downside_std > 0 else 0
    
    return {
        'total_return': total_return * 100,
        'annual_return': annual_return * 100,
        'volatility': volatility * 100,
        'sharpe_ratio': sharpe_ratio,
        'sortino_ratio': sortino_ratio,
        'max_drawdown': max_drawdown * 100,
        'calmar_ratio': calmar_ratio,
        'win_rate': win_rate * 100
    }

In [None]:
# Calculate metrics for all strategies
strategies = {
    'Buy and Hold': bh_values,
    'SD Time-based': sd_time.portfolio_values,
    'SD Threshold-based': sd_threshold.portfolio_values
}

metrics_comparison = {}
for name, values in strategies.items():
    metrics_comparison[name] = calculate_performance_metrics(values)

# Create comparison table
metrics_df = pd.DataFrame(metrics_comparison).T

print("Performance Metrics Comparison (Bitcoin):")
print("=" * 80)
print(metrics_df.round(2).to_string())

# Visualize key metrics
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

metrics_to_plot = ['annual_return', 'sharpe_ratio', 'max_drawdown', 'calmar_ratio']
titles = ['Annual Return (%)', 'Sharpe Ratio', 'Maximum Drawdown (%)', 'Calmar Ratio']

for ax, metric, title in zip(axes.flat, metrics_to_plot, titles):
    values = metrics_df[metric].values
    bars = ax.bar(range(len(strategies)), values, 
                   color=['blue', 'green', 'orange'])
    ax.set_xticks(range(len(strategies)))
    ax.set_xticklabels(strategies.keys(), rotation=45, ha='right')
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
    
    # Add value labels on bars
    for bar, value in zip(bars, values):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{value:.2f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

## 9. Save Results and Next Steps

In [None]:
# Save detailed results
results_summary = {
    'strategies': metrics_df.to_dict(),
    'asset_pairs': results_df.to_dict() if 'results_df' in locals() else {},
    'transaction_costs': cost_df.to_dict() if 'cost_df' in locals() else {},
    'best_strategy': metrics_df['sharpe_ratio'].idxmax(),
    'best_sharpe': metrics_df['sharpe_ratio'].max()
}

print("\nKey Findings:")
print("=" * 50)
print(f"1. Best strategy by Sharpe ratio: {results_summary['best_strategy']}")
print(f"2. Best Sharpe ratio achieved: {results_summary['best_sharpe']:.2f}")
print(f"3. Threshold-based rebalancing used {len(sd_threshold.trades)} trades")
print(f"4. Time-based rebalancing used {len(sd_time.trades)} trades")
print(f"\n5. Transaction costs significantly impact returns above 0.2%")

## Conclusions and Next Steps

### Key Insights:

1. **Volatility Matters**: Shannon's Demon performs best with highly volatile assets like cryptocurrencies
2. **Correlation Impact**: Lower correlation between assets generally improves performance
3. **Transaction Costs**: Critical factor - keep below 0.1% for profitability
4. **Rebalancing Method**: Threshold-based often outperforms time-based with fewer trades

### Next Steps:

1. **Machine Learning Enhancement** (see notebook 03):
   - Predict optimal rebalancing times
   - Dynamic threshold adjustment
   - Asset selection optimization

2. **Multi-Asset Portfolios**:
   - Extend to 3+ assets
   - Optimize allocation weights
   - Correlation-based rebalancing

3. **Risk Management**:
   - Add stop-loss mechanisms
   - Implement position sizing rules
   - Create drawdown controls

4. **Production Implementation**:
   - Real-time data feeds
   - Automated execution
   - Performance monitoring dashboard