# Research: Robustness BTC-EMA-Cross sur Régimes Étendus (2019-2025)

## Contexte

**Stratégie C# analysée**: CSharp-BTC-EMA-Cross (QC Project 21193838)

**Logique actuelle**:
- Asset: BTCUSDT Daily (Binance)
- Entrée: EMA(18) > EMA(23) * 1.001
- Sortie: EMA(18) < EMA(23) * 0.999
- Période actuelle: 2021-10-16 → 2026
- Sharpe actuel: 1.094

**Objectif de recherche**: Valider la robustesse de l'EMA 18/23 sur des régimes étendus (2019-2025) incluant:
- COVID crash (Mars 2020, BTC -50% en 2 jours)
- Bull market 2020-2021
- Bear market 2022
- Recovery 2023-2024

## Hypothèses

1. **H1**: L'EMA 18/23 survit au COVID crash avec Sharpe > 0.5
2. **H2**: Performance stable (Sharpe 0.7-0.9) sur l'ensemble 2019-2025
3. **H3**: Walk-forward efficiency > 60% (train 252j, test 63j)
4. **H4**: Pas de meilleurs paramètres EMA que (18,23) sur tous les régimes

## Méthodologie

1. Charger BTCUSD Daily 2019-01-01 → aujourd'hui via QuantBook
2. Détecter régimes de marché (bear/bull/sideways via rendements 126j)
3. Backtester EMA(18,23) vectorisé sur chaque régime
4. Grid search EMA periods [(12,26), (15,20), (18,23), (21,55), (30,50)]
5. Walk-forward validation (train 252j, test 63j)
6. Exporter recommandations JSON

In [None]:
# Cell 2: Setup QuantBook et chargement des données
from AlgorithmImports import *
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json

# Initialiser QuantBook
qb = QuantBook()

# Charger BTCUSD Daily depuis 2019-01-01
btc = qb.AddCrypto("BTCUSD", Resolution.Daily).Symbol
history = qb.History(btc, datetime(2019, 1, 1), datetime.now(), Resolution.Daily)

# Convertir en DataFrame
df = history.loc[btc] if isinstance(history.index, pd.MultiIndex) else history
df = df.sort_index()

print(f"✓ BTC data loaded: {len(df)} bars")
print(f"  Period: {df.index[0]} → {df.index[-1]}")
print(f"  Columns: {df.columns.tolist()}")
print(f"\nFirst 3 bars:")
print(df.head(3))

In [None]:
# Cell 3: Détection des régimes de marché

# Calculer rendement glissant 126 jours (environ 6 mois crypto)
df['returns_126d'] = df['close'].pct_change(126)

# Classifier régimes
def classify_regime(ret):
    if pd.isna(ret):
        return 'unknown'
    elif ret > 0.3:  # +30% sur 6 mois
        return 'bull'
    elif ret < -0.2:  # -20% sur 6 mois
        return 'bear'
    else:
        return 'sideways'

df['regime'] = df['returns_126d'].apply(classify_regime)

# Statistiques par régime
regime_stats = df.groupby('regime').agg({
    'close': ['count', 'first', 'last']
}).round(0)

print("Régimes de marché détectés:")
print(regime_stats)

# Identifier périodes clés
covid_crash = df[(df.index >= '2020-03-01') & (df.index <= '2020-04-01')]
bull_2021 = df[(df.index >= '2020-10-01') & (df.index <= '2021-11-01')]
bear_2022 = df[(df.index >= '2022-01-01') & (df.index <= '2022-12-31')]
recovery_2023 = df[(df.index >= '2023-01-01') & (df.index <= '2024-12-31')]

print(f"\nPériodes clés:")
print(f"  COVID crash (Mar 2020): {len(covid_crash)} bars")
print(f"  Bull 2020-2021: {len(bull_2021)} bars")
print(f"  Bear 2022: {len(bear_2022)} bars")
print(f"  Recovery 2023-2024: {len(recovery_2023)} bars")

In [None]:
# Cell 4: Backtester vectorisé EMA Cross

def ema_cross_backtest(data, fast_period=18, slow_period=23, up_margin=1.001, down_margin=0.999):
    """
    Backtest vectorisé de la stratégie EMA Cross.
    
    Args:
        data: DataFrame avec colonne 'close'
        fast_period: Période EMA rapide
        slow_period: Période EMA lente
        up_margin: Buffer haussier (ex: 1.001 = +0.1%)
        down_margin: Buffer baissier (ex: 0.999 = -0.1%)
    
    Returns:
        dict: Métriques de performance
    """
    # Calculer EMAs
    ema_fast = data['close'].ewm(span=fast_period, adjust=False).mean()
    ema_slow = data['close'].ewm(span=slow_period, adjust=False).mean()
    
    # Générer signaux (state machine pour éviter flip-flop)
    signal = pd.Series(0.0, index=data.index)
    position = 0
    
    for i in range(1, len(data)):
        if position == 0 and ema_fast.iloc[i] > ema_slow.iloc[i] * up_margin:
            position = 1  # Enter long
        elif position == 1 and ema_fast.iloc[i] < ema_slow.iloc[i] * down_margin:
            position = 0  # Exit long
        signal.iloc[i] = position
    
    # Shift signal pour éviter look-ahead bias
    signal = signal.shift(1).fillna(0)
    
    # Calculer rendements
    returns = data['close'].pct_change() * signal
    
    # Métriques
    sharpe = returns.mean() / returns.std() * np.sqrt(365) if returns.std() > 0 else 0
    cum_returns = (1 + returns).cumprod()
    max_dd = ((cum_returns - cum_returns.cummax()) / cum_returns.cummax()).min()
    total_return = cum_returns.iloc[-1] - 1
    n_trades = int(signal.diff().abs().sum() / 2)
    win_rate = (returns[returns > 0].count() / returns[returns != 0].count()) if returns[returns != 0].count() > 0 else 0
    
    return {
        'sharpe': round(sharpe, 3),
        'max_dd': round(max_dd, 3),
        'total_return': round(total_return, 3),
        'n_trades': n_trades,
        'win_rate': round(win_rate, 3),
        'days': len(data)
    }

# Tester EMA(18,23) sur toute la période
print("=" * 60)
print("BACKTEST EMA(18,23) - Période complète 2019-2025")
print("=" * 60)

full_results = ema_cross_backtest(df)
for k, v in full_results.items():
    print(f"  {k}: {v}")

# Tester par régime
print("\n" + "=" * 60)
print("BACKTEST PAR RÉGIME")
print("=" * 60)

regime_results = {}
for regime in ['bull', 'bear', 'sideways']:
    regime_data = df[df['regime'] == regime]
    if len(regime_data) < 50:  # Skip si trop peu de données
        continue
    results = ema_cross_backtest(regime_data)
    regime_results[regime] = results
    print(f"\n{regime.upper()} ({results['days']} days):")
    for k, v in results.items():
        if k != 'days':
            print(f"  {k}: {v}")

# Tester périodes clés
print("\n" + "=" * 60)
print("PÉRIODES CLÉS")
print("=" * 60)

key_periods = {
    'COVID Crash (Mar 2020)': covid_crash,
    'Bull 2020-2021': bull_2021,
    'Bear 2022': bear_2022,
    'Recovery 2023-2024': recovery_2023
}

period_results = {}
for period_name, period_data in key_periods.items():
    if len(period_data) < 20:
        continue
    results = ema_cross_backtest(period_data)
    period_results[period_name] = results
    print(f"\n{period_name} ({results['days']} days):")
    for k, v in results.items():
        if k != 'days':
            print(f"  {k}: {v}")

In [None]:
# Cell 5: Grid Search EMA Periods

print("=" * 60)
print("GRID SEARCH EMA PERIODS")
print("=" * 60)

# Paramètres à tester
param_grid = [
    (12, 26),   # Classic MACD-like
    (15, 20),   # Tighter
    (18, 23),   # Current (baseline)
    (21, 55),   # Medium-term
    (30, 50)    # Slower
]

# Tester sur toute la période
grid_results = []
for fast, slow in param_grid:
    results = ema_cross_backtest(df, fast_period=fast, slow_period=slow)
    results['fast'] = fast
    results['slow'] = slow
    grid_results.append(results)

grid_df = pd.DataFrame(grid_results)
grid_df = grid_df.sort_values('sharpe', ascending=False)

print("\nRésultats triés par Sharpe (période complète):")
print(grid_df[['fast', 'slow', 'sharpe', 'max_dd', 'total_return', 'n_trades']].to_string(index=False))

# Tester robustesse par régime
print("\n" + "=" * 60)
print("ROBUSTESSE PAR RÉGIME (Sharpe moyen)")
print("=" * 60)

regime_robustness = []
for fast, slow in param_grid:
    sharpes = []
    for regime in ['bull', 'bear', 'sideways']:
        regime_data = df[df['regime'] == regime]
        if len(regime_data) < 50:
            continue
        results = ema_cross_backtest(regime_data, fast_period=fast, slow_period=slow)
        sharpes.append(results['sharpe'])
    
    avg_sharpe = np.mean(sharpes) if sharpes else 0
    min_sharpe = np.min(sharpes) if sharpes else 0
    regime_robustness.append({
        'fast': fast,
        'slow': slow,
        'avg_sharpe_regimes': round(avg_sharpe, 3),
        'min_sharpe_regime': round(min_sharpe, 3)
    })

robustness_df = pd.DataFrame(regime_robustness)
robustness_df = robustness_df.sort_values('avg_sharpe_regimes', ascending=False)

print("\nParamètres triés par Sharpe moyen sur tous les régimes:")
print(robustness_df.to_string(index=False))

In [None]:
# Cell 6: Walk-Forward Validation

print("=" * 60)
print("WALK-FORWARD VALIDATION")
print("Train: 252 days, Test: 63 days (rolling)")
print("=" * 60)

train_window = 252
test_window = 63
step = 63  # Re-train every quarter

wf_results = []
start_idx = train_window

while start_idx + test_window < len(df):
    # Train data
    train_data = df.iloc[start_idx - train_window:start_idx]
    
    # Test data
    test_data = df.iloc[start_idx:start_idx + test_window]
    
    # Grid search sur train
    best_sharpe = -999
    best_params = None
    for fast, slow in param_grid:
        train_results = ema_cross_backtest(train_data, fast_period=fast, slow_period=slow)
        if train_results['sharpe'] > best_sharpe:
            best_sharpe = train_results['sharpe']
            best_params = (fast, slow)
    
    # Backtest sur test avec meilleurs params
    test_results = ema_cross_backtest(test_data, fast_period=best_params[0], slow_period=best_params[1])
    
    # Benchmark: utiliser params fixes (18,23)
    benchmark_results = ema_cross_backtest(test_data, fast_period=18, slow_period=23)
    
    wf_results.append({
        'test_start': test_data.index[0],
        'test_end': test_data.index[-1],
        'best_fast': best_params[0],
        'best_slow': best_params[1],
        'train_sharpe': round(best_sharpe, 3),
        'test_sharpe': round(test_results['sharpe'], 3),
        'benchmark_sharpe': round(benchmark_results['sharpe'], 3)
    })
    
    start_idx += step

wf_df = pd.DataFrame(wf_results)

print(f"\nNombre de périodes walk-forward: {len(wf_df)}")
print("\nDétail par période:")
print(wf_df.to_string(index=False))

# Calculer efficiency
avg_train_sharpe = wf_df['train_sharpe'].mean()
avg_test_sharpe = wf_df['test_sharpe'].mean()
efficiency = (avg_test_sharpe / avg_train_sharpe * 100) if avg_train_sharpe > 0 else 0

print("\n" + "=" * 60)
print("WALK-FORWARD SUMMARY")
print("=" * 60)
print(f"Avg Train Sharpe (optimized): {avg_train_sharpe:.3f}")
print(f"Avg Test Sharpe (optimized): {avg_test_sharpe:.3f}")
print(f"Avg Benchmark Sharpe (18,23): {wf_df['benchmark_sharpe'].mean():.3f}")
print(f"Walk-Forward Efficiency: {efficiency:.1f}%")

if efficiency > 60:
    print("\n✓ Efficiency > 60%: Optimisation robuste")
else:
    print("\n✗ Efficiency < 60%: Risque d'overfitting")

## Résultats et Findings

*(Cette section sera remplie après exécution)*

### Hypothèse 1: COVID Crash (Mars 2020)
- **Status**: [À déterminer]
- **Sharpe observé**: [À calculer]
- **Conclusion**: [À analyser]

### Hypothèse 2: Performance 2019-2025
- **Status**: [À déterminer]
- **Sharpe observé**: [À calculer]
- **Objectif**: 0.7-0.9

### Hypothèse 3: Walk-Forward Efficiency
- **Status**: [À déterminer]
- **Efficiency observée**: [À calculer]
- **Objectif**: > 60%

### Hypothèse 4: Meilleurs Paramètres
- **Status**: [À déterminer]
- **Params optimaux période complète**: [À identifier]
- **Params optimaux par régime**: [À identifier]

## Conclusions Globales

[À compléter après analyse]

## Recommandations d'Implémentation

[À compléter - probablement juste SetStartDate(2019, 1, 1) si validation OK]

In [None]:
# Cell 8: Export Recommendations JSON

# Synthétiser les recommandations
recommendations = {
    "project_id": 21193838,
    "project_name": "CSharp-BTC-EMA-Cross",
    "analysis_date": datetime.now().strftime("%Y-%m-%d"),
    "current_period": "2021-10-16 → 2026",
    "tested_period": "2019-01-01 → 2025",
    "current_sharpe": 1.094,
    "findings": {
        "full_period_sharpe": full_results['sharpe'],
        "full_period_max_dd": full_results['max_dd'],
        "covid_crash_sharpe": period_results.get('COVID Crash (Mar 2020)', {}).get('sharpe', 'N/A'),
        "walk_forward_efficiency": round(efficiency, 1),
        "best_params_overall": {
            "fast": int(grid_df.iloc[0]['fast']),
            "slow": int(grid_df.iloc[0]['slow']),
            "sharpe": float(grid_df.iloc[0]['sharpe'])
        },
        "best_params_robust": {
            "fast": int(robustness_df.iloc[0]['fast']),
            "slow": int(robustness_df.iloc[0]['slow']),
            "avg_sharpe_regimes": float(robustness_df.iloc[0]['avg_sharpe_regimes'])
        }
    },
    "validation_status": {
        "h1_covid_crash": "PASS" if period_results.get('COVID Crash (Mar 2020)', {}).get('sharpe', 0) > 0.5 else "FAIL",
        "h2_full_period": "PASS" if 0.7 <= full_results['sharpe'] <= 0.9 else "FAIL",
        "h3_walk_forward": "PASS" if efficiency > 60 else "FAIL",
        "h4_params_18_23": "PASS" if grid_df.iloc[0]['fast'] == 18 and grid_df.iloc[0]['slow'] == 23 else "FAIL"
    },
    "recommended_changes": []
}

# Décider des changements
if full_results['sharpe'] >= 0.7 and efficiency > 60:
    recommendations["recommended_changes"].append({
        "file": "Main.cs",
        "line": "SetStartDate(2021, 10, 16)",
        "change_to": "SetStartDate(2019, 1, 1)",
        "reason": f"Extended period validation successful (Sharpe={full_results['sharpe']}, WF Eff={efficiency:.1f}%)"
    })
else:
    recommendations["recommended_changes"].append({
        "file": "None",
        "reason": f"Extended period validation failed (Sharpe={full_results['sharpe']}, target 0.7-0.9)"
    })

# Vérifier si changement de params recommandé
best_fast = int(robustness_df.iloc[0]['fast'])
best_slow = int(robustness_df.iloc[0]['slow'])
if (best_fast != 18 or best_slow != 23) and robustness_df.iloc[0]['avg_sharpe_regimes'] > full_results['sharpe'] * 1.1:
    recommendations["recommended_changes"].append({
        "file": "Main.cs",
        "line": "_fastEmaPeriod = 18; _slowEmaPeriod = 23;",
        "change_to": f"_fastEmaPeriod = {best_fast}; _slowEmaPeriod = {best_slow};",
        "reason": f"Better regime robustness (avg Sharpe={robustness_df.iloc[0]['avg_sharpe_regimes']} vs {full_results['sharpe']})"
    })

# Sauvegarder JSON
output_path = "research_robustness_recommendations.json"
with open(output_path, 'w') as f:
    json.dump(recommendations, f, indent=2)

print("=" * 60)
print("RECOMMENDATIONS EXPORTÉES")
print("=" * 60)
print(json.dumps(recommendations, indent=2))
print(f"\n✓ Saved to: {output_path}")