In [None]:
import ccxt
import pandas as pd
from backtest.utilities.data_manager import ExchangeDataManager
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
bitget = ccxt.bitget()
market = bitget.load_markets()
futures = [s for s in market.keys() if 'USDT:USDT' in s]

In [3]:
def get_pair_data(pair, timeframe, start = 2050, end = 2050):
    exchange = ExchangeDataManager(
        exchange_name="bitget",
        path_download="./database/exchanges"
    )

    return exchange.load_data(pair, timeframe, start, end)

In [4]:
start_date = "2025-01-01 00:00:00"
end_date = "2025-08-10 00:00:00"

In [7]:
df_list = []
for future in futures:
    df = get_pair_data(future, "4h", start_date, end_date)
    df["symbol"] = future
    df_list.append(df)

df = pd.concat(df_list).reset_index()
df_btc = df.loc[df["symbol"] == "BTC/USDT:USDT", ["index", "volume", "open", "close"]].rename({"volume": "volume_btc", "open": "open_btc", "close": "close_btc"}, axis=1)
df = df.loc[df["symbol"] != "BTC/USDT:USDT"]

In [8]:
df_final = pd.merge(df, df_btc, "left", "index")

In [19]:
df_final["volume_usdt"] = (df_final["close"] + df_final["open"]) / 2 * df_final["volume"]
df_final["volume_btc_usdt"] = (df_final["close_btc"] + df_final["open_btc"]) / 2 * df_final["volume_btc"]
df_final["volume_usdt_btc_prop"] = df_final["volume_usdt"] / df_final["volume_btc_usdt"] * 100
df_final["btc_vol"] = (df_final["close_btc"] - df_final["open_btc"]) / df_final["open_btc"] * 100

In [None]:
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap
import warnings
warnings.filterwarnings('ignore')

def analyze_first_candle_impact(df):
    """
    Analyse l'impact de la première bougie sur la performance future des cryptos

    Parameters:
    df: DataFrame avec colonnes ['index', 'low', 'high', 'close', 'open', 'volume', 'symbol']
    """

    # Conversion de la colonne index en datetime si nécessaire
    if not pd.api.types.is_datetime64_any_dtype(df['index']):
        df['index'] = pd.to_datetime(df['index'])

    # Trier par symbol et date
    df = df.sort_values(['symbol', 'index']).reset_index(drop=True)

    results = []

    # Analyser chaque crypto individuellement
    for symbol in df['symbol'].unique():
        symbol_data = df[df['symbol'] == symbol].copy()

        if len(symbol_data) < 10:  # Skip si pas assez de données
            continue

        # Première bougie (assumée être la première date après listing)
        first_candle = symbol_data.iloc[0]

        # Calcul de la performance de la première bougie
        first_candle_perf = (first_candle['close'] - first_candle['open']) / first_candle['open'] * 100
        first_candle_perf_low = (first_candle['low'] - first_candle['open']) / first_candle['open'] * 100
        first_candle_perf_high = (first_candle['high'] - first_candle['open']) / first_candle['open'] * 100

        # Calcul de la volatilité de la première bougie
        first_candle_volatility = (first_candle['high'] - first_candle['low']) / first_candle['open'] * 100

        # Performance et prix à différents horizons (7, 14, 30 jours si disponible)
        performances = {}
        prices = {}

        for days in [7, 14, 30]:
            if len(symbol_data) > days:
                future_price = symbol_data.iloc[days]['close']
                perf = (future_price - first_candle['open']) / first_candle['open'] * 100
                performances[f'perf_{days}d'] = perf
                prices[f'close_t{days}'] = future_price
                prices[f'max_price_t{days}'] = symbol_data.iloc[1:days]['high'].max()
                prices[f'min_price_t{days}'] = symbol_data.iloc[1:days]['low'].min()
            else:
                performances[f'perf_{days}d'] = np.nan
                prices[f'close_t{days}'] = np.nan
                prices[f'max_price_t{days}'] = np.nan
                prices[f'min_price_t{days}'] = np.nan

        # Performance finale (dernière valeur disponible)
        final_price = symbol_data.iloc[-1]['close']
        final_perf = (final_price - first_candle['open']) / first_candle['open'] * 100

        # Calcul du drawdown maximum depuis la première bougie
        symbol_data['cum_return'] = (symbol_data['close'] / first_candle['open'] - 1) * 100
        rolling_max = symbol_data['cum_return'].expanding().max()
        drawdown = symbol_data['cum_return'] - rolling_max
        max_drawdown = drawdown.min()

        results.append({
            'symbol': symbol,
            'open': first_candle['open'],
            'close': first_candle['close'],
            'btc_vol': first_candle['btc_vol'],
            'volume_usdt_btc_prop': first_candle['volume_usdt_btc_prop'],
            'close_t7': prices.get('close_t7', np.nan),
            'close_t14': prices.get('close_t14', np.nan),
            'close_t30': prices.get('close_t30', np.nan),
            'max_price_t7': prices.get('max_price_t7', np.nan),
            'max_price_t14': prices.get('max_price_t14', np.nan),
            'max_price_t30': prices.get('max_price_t30', np.nan),
            'min_price_t7': prices.get('min_price_t7', np.nan),
            'min_price_t14': prices.get('min_price_t14', np.nan),
            'min_price_t30': prices.get('min_price_t30', np.nan),
            'close_final': final_price,
            'first_candle_perf': first_candle_perf,
            'first_candle_perf_low': first_candle_perf_low,
            'first_candle_perf_high': first_candle_perf_high,
            'first_candle_volatility': first_candle_volatility,
            'final_perf': final_perf,
            'max_drawdown': max_drawdown,
            'data_points': len(symbol_data),
            **performances
        })

    return pd.DataFrame(results)

def create_visualization(df, analysis_results):
    """
    Crée les graphiques d'analyse
    """

    # Configuration du style
    plt.style.use('dark_background')
    fig, axes = plt.subplots(2, 2, figsize=(20, 15))
    fig.suptitle('Analyse de l\'Impact de la Première Bougie sur les Cryptos',
                 fontsize=16, fontweight='bold', color='white')

    # 1. Graphique principal : Evolution des prix avec code couleur
    ax1 = axes[0, 0]

    # Création d'une colormap rouge pour les mauvaises performances
    colors = ['#ff0000', '#ff4444', '#ff8888', '#ffaaaa', '#ffffff', '#aaffaa', '#88ff88', '#44ff44', '#00ff00']
    n_bins = 100
    cmap = LinearSegmentedColormap.from_list('performance', colors, N=n_bins)

    # Normalisation des performances de première bougie pour le code couleur
    first_candle_perfs = analysis_results['first_candle_perf'].values
    norm_min, norm_max = np.percentile(first_candle_perfs, [5, 95])  # Éviter les outliers

    for _, row in analysis_results.iterrows():
        symbol = row['symbol']
        symbol_data = df[df['symbol'] == symbol].copy()

        if len(symbol_data) < 2:
            continue

        # Normalisation des prix (base 100 à l'ouverture de la première bougie)
        first_open = symbol_data.iloc[0]['open']
        symbol_data['normalized_price'] = (symbol_data['close'] / first_open) * 100

        # Couleur basée sur la performance de la première bougie
        perf = row['first_candle_perf']
        color_intensity = np.clip((perf - norm_min) / (norm_max - norm_min), 0, 1)
        color = cmap(color_intensity)

        # Épaisseur de ligne basée sur la volatilité
        line_width = min(3, max(0.5, row['first_candle_volatility'] / 50))

        ax1.plot(range(len(symbol_data)), symbol_data['normalized_price'],
                color=color, alpha=0.7, linewidth=line_width, label=symbol if len(analysis_results) < 10 else "")

    ax1.set_title('Évolution des Prix (Base 100)\nCouleur: Rouge = Mauvaise 1ère bougie, Vert = Bonne 1ère bougie',
                  fontsize=12, color='white')
    ax1.set_xlabel('Jours depuis listing', color='white')
    ax1.set_ylabel('Prix normalisé (Base 100)', color='white')
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=100, color='yellow', linestyle='--', alpha=0.7, label='Prix initial')

    # 2. Scatter plot : Performance première bougie vs Performance finale
    ax2 = axes[0, 1]

    # Ligne de tendance
    z = np.polyfit(analysis_results['first_candle_perf'].dropna(),
                   analysis_results['final_perf'].dropna(), 1)
    p = np.poly1d(z)
    ax2.plot(analysis_results['first_candle_perf'],
             p(analysis_results['first_candle_perf']), "r--", alpha=0.8)

    ax2.set_title('Corrélation: Performance 1ère Bougie vs Performance Finale',
                  fontsize=12, color='white')
    ax2.set_xlabel('Performance 1ère bougie (%)', color='white')
    ax2.set_ylabel('Performance finale (%)', color='white')
    ax2.grid(True, alpha=0.3)
    ax2.axhline(y=0, color='white', linestyle='-', alpha=0.5)
    ax2.axvline(x=0, color='white', linestyle='-', alpha=0.5)

    # 3. Distribution des performances de première bougie
    ax3 = axes[1, 0]
    ax3.hist(analysis_results['first_candle_perf'], bins=30, color='skyblue',
             alpha=0.7, edgecolor='white')
    ax3.axvline(x=analysis_results['first_candle_perf'].mean(), color='red',
               linestyle='--', linewidth=2, label=f'Moyenne: {analysis_results["first_candle_perf"].mean():.1f}%')
    ax3.set_title('Distribution des Performances de 1ère Bougie', fontsize=12, color='white')
    ax3.set_xlabel('Performance 1ère bougie (%)', color='white')
    ax3.set_ylabel('Fréquence', color='white')
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    # 4. Analyse des seuils critiques
    ax4 = axes[1, 1]

    # Définir des seuils
    thresholds = [-50, -30, -20, -10, 0, 10, 20, 30]
    success_rates = []

    for threshold in thresholds:
        if threshold < 0:
            # Pour les seuils négatifs, on regarde les cryptos qui ont fait MIEUX que le seuil
            subset = analysis_results[analysis_results['first_candle_perf'] >= threshold]
        else:
            # Pour les seuils positifs, on regarde les cryptos qui ont fait MIEUX que le seuil
            subset = analysis_results[analysis_results['first_candle_perf'] >= threshold]

        if len(subset) > 0:
            success_rate = (subset['final_perf'] > 0).mean() * 100
        else:
            success_rate = 0
        success_rates.append(success_rate)

    ax4.plot(thresholds, success_rates, 'o-', color='orange', linewidth=2, markersize=8)
    ax4.set_title('Taux de Succès selon le Seuil de 1ère Bougie', fontsize=12, color='white')
    ax4.set_xlabel('Seuil performance 1ère bougie (%)', color='white')
    ax4.set_ylabel('% de cryptos avec performance finale > 0', color='white')
    ax4.grid(True, alpha=0.3)
    ax4.axhline(y=50, color='red', linestyle='--', alpha=0.7, label='50% (aléatoire)')
    ax4.legend()

    plt.tight_layout()
    return fig

def print_analysis_summary(analysis_results):
    """
    Affiche un résumé de l'analyse
    """
    print("="*60)
    print("RÉSUMÉ DE L'ANALYSE DE LA PREMIÈRE BOUGIE")
    print("="*60)

    print(f"Nombre de cryptos analysées: {len(analysis_results)}")
    print(f"Performance moyenne 1ère bougie: {analysis_results['first_candle_perf'].mean():.2f}%")
    print(f"Performance médiane 1ère bougie: {analysis_results['first_candle_perf'].median():.2f}%")
    print(f"Écart-type 1ère bougie: {analysis_results['first_candle_perf'].std():.2f}%")

    print("\n--- SEUILS CRITIQUES ---")
    critical_thresholds = [-30, -20, -10]
    for threshold in critical_thresholds:
        below_threshold = analysis_results[analysis_results['first_candle_perf'] <= threshold]
        if len(below_threshold) > 0:
            avg_final_perf = below_threshold['final_perf'].mean()
            success_rate = len(below_threshold['final_perf']) > 0 / len(analysis_results) * 100
            print(f"Cryptos avec 1ère bougie < {threshold}%: {len(below_threshold)} cryptos")
            print(f"  → Performance finale moyenne: {avg_final_perf:.2f}%")
            print(f"  → Taux de succès (>0%): {success_rate:.1f}%")

    print("\n--- CORRÉLATIONS ---")
    correlation = analysis_results['first_candle_perf'].corr(analysis_results['final_perf'])
    print(f"Corrélation 1ère bougie vs performance finale: {correlation:.3f}")

    if 'perf_7d' in analysis_results.columns:
        corr_7d = analysis_results['first_candle_perf'].corr(analysis_results['perf_7d'])
        print(f"Corrélation 1ère bougie vs performance 7j: {corr_7d:.3f}")

    if 'perf_14d' in analysis_results.columns:
        corr_14d = analysis_results['first_candle_perf'].corr(analysis_results['perf_14d'])
        print(f"Corrélation 1ère bougie vs performance 14j: {corr_14d:.3f}")

    if 'perf_30d' in analysis_results.columns:
        corr_30d = analysis_results['first_candle_perf'].corr(analysis_results['perf_30d'])
        print(f"Corrélation 1ère bougie vs performance 30j: {corr_30d:.3f}")

    print("\n--- EXEMPLES DE PRIX ---")
    print("Échantillon des 5 premières cryptos avec leurs prix:")
    sample_columns = ['symbol', 'open', 'close_t7', 'close_t14', 'close_t30', 'close_final']
    print(analysis_results[sample_columns].head().to_string(index=False))

# Fonction principale
def main(df):
    """
    Fonction principale pour exécuter l'analyse complète
    """
    print("Démarrage de l'analyse...")

    # Analyse des données
    analysis_results = analyze_first_candle_impact(df)

    # Affichage du résumé
    print_analysis_summary(analysis_results)

    # Création des visualisations
    fig = create_visualization(df, analysis_results)

    plt.show()

    return analysis_results, fig

# UTILISATION :
# df = pd.read_csv('votre_fichier.csv')  # Chargez votre DataFrame ici
# results, figure = main(df)


In [None]:
results, figure = main(df_final)

In [22]:
results

Unnamed: 0,symbol,open,close,btc_vol,volume_usdt_btc_prop,close_t7,close_t14,close_t30,max_price_t7,max_price_t14,...,first_candle_perf,first_candle_perf_low,first_candle_perf_high,first_candle_volatility,final_perf,max_drawdown,data_points,perf_7d,perf_14d,perf_30d
0,10000000AIDOGE/USDT:USDT,0.002689,0.002697,-0.122889,0.111032,0.003091,0.001309,0.001099,0.004594,0.004594,...,0.297508,-4.053552,3.904797,7.958349,-54.369654,-104.425437,609,14.949795,-51.320193,-59.129788
1,A/USDT:USDT,0.769200,0.756400,0.059949,0.527592,0.746380,0.669500,0.628900,0.778400,0.778400,...,-1.664067,-3.065523,2.791212,5.856734,-24.676287,-38.636245,363,-2.966719,-12.961518,-18.239730
2,AGT/USDT:USDT,0.029663,0.032688,0.164973,0.196122,0.031602,0.031366,0.032952,0.033578,0.035746,...,10.197890,-3.779119,13.569093,17.348212,-81.920237,-103.863399,411,6.536763,5.741159,11.087887
3,AIN/USDT:USDT,0.102720,0.122780,0.321759,0.678794,0.136080,0.121920,0.128010,0.216710,0.216710,...,19.528816,-4.487928,29.049844,33.537773,7.486371,-72.897196,104,32.476636,18.691589,24.620327
4,ALPINE/USDT:USDT,1.041100,1.102700,-0.614732,0.454692,0.914300,0.972300,1.097600,1.253000,1.253000,...,5.916819,-1.940255,18.144270,20.084526,-0.028816,-76.582461,495,-12.179426,-6.608395,5.426952
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,WAL/USDT:USDT,0.439800,0.392600,-0.529171,0.549555,0.464800,0.390000,0.523100,0.592300,0.592300,...,-10.732151,-22.100955,0.977717,23.078672,9.640746,-88.972260,735,5.684402,-11.323329,18.940427
99,WCT/USDT:USDT,0.380800,0.347700,-0.136810,0.546396,0.394200,0.388400,0.467400,0.535500,0.535500,...,-8.692227,-14.784664,6.512605,21.297269,-4.569328,-282.011555,621,3.518908,1.995798,22.741597
100,XAUT/USDT:USDT,3102.000000,3129.800000,-1.271881,0.031618,3056.200000,3054.800000,3012.300000,3141.700000,3141.700000,...,0.896196,-1.479691,1.431335,2.911025,7.656351,-11.273372,692,-1.476467,-1.521599,-2.891683
101,ZBCN/USDT:USDT,0.004684,0.004736,0.577779,0.000237,0.004765,0.004300,0.005890,0.005390,0.005390,...,1.123020,-0.783552,1.123020,1.906572,-11.892053,-92.894658,377,1.731500,-8.183526,25.746189


In [36]:
results.loc[results["first_candle_perf_high"] > 20]

Unnamed: 0,symbol,open,close,btc_vol,volume_usdt_btc_prop,close_t7,close_t14,close_t30,max_price_t7,max_price_t14,...,first_candle_perf,first_candle_perf_low,first_candle_perf_high,first_candle_volatility,final_perf,max_drawdown,data_points,perf_7d,perf_14d,perf_30d
3,AIN/USDT:USDT,0.10272,0.12278,0.321759,0.678794,0.13608,0.12192,0.12801,0.21671,0.21671,...,19.528816,-4.487928,29.049844,33.537773,7.486371,-72.897196,104,32.476636,18.691589,24.620327
10,B/USDT:USDT,0.2265,0.2896,-0.067647,0.01979,0.2785,0.3306,0.3777,0.308,0.3415,...,27.85872,-9.713024,52.626932,62.339956,103.664459,-60.264901,399,22.958057,45.960265,66.754967
11,B2/USDT:USDT,0.4886,0.5363,0.579845,0.016259,0.5867,0.6317,0.6536,0.647,0.648,...,9.762587,-1.780598,29.65616,31.436758,-18.358576,-82.787556,531,20.077773,29.287761,33.769955
12,BABY/USDT:USDT,0.07337,0.09512,0.340479,1.735617,0.09403,0.13902,0.09557,0.108,0.16774,...,29.644269,-3.175685,42.156195,45.33188,-17.336786,-166.716642,651,28.158648,89.477988,30.257598
15,BANK/USDT:USDT,0.02504,0.03912,0.023047,0.020989,0.044,0.04253,0.04457,0.05748,0.05748,...,56.230032,-14.736422,73.003195,87.739617,139.217252,-193.410543,603,75.71885,69.848243,77.995208
16,BDXN/USDT:USDT,0.1,0.07379,0.113869,0.674788,0.06077,0.05171,0.04534,0.07668,0.07668,...,-26.21,-27.95,170.3,198.25,-65.75,-45.5,327,-39.23,-48.29,-54.66
17,BID/USDT:USDT,0.07492,0.09182,-0.354341,0.397808,0.06892,0.06011,0.05634,0.0959,0.0959,...,22.557395,-2.90977,30.672718,33.582488,13.027229,-124.199146,777,-8.008542,-19.767752,-24.799786
20,BR/USDT:USDT,0.1235,0.1617,-0.941132,0.297686,0.1763,0.17,0.1404,0.197,0.197,...,30.931174,-3.724696,56.761134,60.48583,-59.392713,-136.744939,776,42.753036,37.651822,13.684211
25,CUDIS/USDT:USDT,0.07215,0.10379,0.288304,0.022228,0.11621,0.09941,0.09959,0.17374,0.17374,...,43.853084,0.0,73.596674,73.596674,8.038808,-125.114345,315,61.067221,37.782398,38.031878
29,EPIC/USDT:USDT,1.5377,1.5998,-0.214994,0.598165,1.5912,1.7647,1.3551,1.6947,1.9468,...,4.038499,-5.039995,20.589192,25.629186,19.275541,-74.162711,772,3.479222,14.762307,-11.874878


- BTC Volat >= -0.3%
- BTC Volume prop. >= 0.5%

In [18]:
df["index"].unique()

<DatetimeArray>
['2025-03-13 00:00:00', '2025-03-13 04:00:00', '2025-03-13 08:00:00',
 '2025-03-13 12:00:00', '2025-03-13 16:00:00', '2025-03-13 20:00:00',
 '2025-03-14 00:00:00', '2025-03-14 04:00:00', '2025-03-14 08:00:00',
 '2025-03-14 12:00:00',
 ...
 '2025-07-26 04:00:00', '2025-07-26 08:00:00', '2025-07-26 12:00:00',
 '2025-07-26 16:00:00', '2025-07-26 20:00:00', '2025-07-27 00:00:00',
 '2025-07-27 04:00:00', '2025-07-27 08:00:00', '2025-07-27 12:00:00',
 '2025-07-27 16:00:00']
Length: 821, dtype: datetime64[ns]

In [32]:
df_final.loc[df_final["symbol"] == "ES/USDT:USDT"]

Unnamed: 0,index,open,high,low,close,volume,symbol,volume_btc,open_btc,close_btc,volume_usdt,volume_btc_usdt,volume_usdt_btc_prop,btc_vol
45797,2025-07-16 08:00:00,0.6000,0.6002,0.2555,0.3845,18582290.0,ES/USDT:USDT,11238.965053,118151.0,118741.1,9.147132e+06,1.331211e+09,0.687129,0.499446
45798,2025-07-16 12:00:00,0.3845,0.4012,0.3145,0.3426,36685044.0,ES/USDT:USDT,25878.212184,118741.1,119059.4,1.333685e+07,3.076926e+09,0.433447,0.268062
45799,2025-07-16 16:00:00,0.3426,0.3447,0.3052,0.3223,17069228.0,ES/USDT:USDT,12401.386008,119059.4,119193.7,5.674665e+06,1.477334e+09,0.384115,0.112801
45800,2025-07-16 20:00:00,0.3223,0.3384,0.2937,0.3070,11904508.0,ES/USDT:USDT,8808.025043,119193.7,118602.4,3.745753e+06,1.047257e+09,0.357673,-0.496083
45801,2025-07-17 00:00:00,0.3070,0.3262,0.3005,0.3197,12696002.0,ES/USDT:USDT,9893.050775,118602.4,118091.5,3.978292e+06,1.170812e+09,0.339789,-0.430767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45861,2025-07-27 00:00:00,0.2312,0.2329,0.2308,0.2321,652502.0,ES/USDT:USDT,,,,1.511521e+05,,,
45862,2025-07-27 04:00:00,0.2321,0.2352,0.2321,0.2346,1580964.0,ES/USDT:USDT,,,,3.689179e+05,,,
45863,2025-07-27 08:00:00,0.2346,0.2390,0.2341,0.2360,2083250.0,ES/USDT:USDT,,,,4.901887e+05,,,
45864,2025-07-27 12:00:00,0.2360,0.2380,0.2331,0.2340,1490484.0,ES/USDT:USDT,,,,3.502637e+05,,,
