<a href="https://colab.research.google.com/github/raphaelassoun23/Projet-Python/blob/main/notebooks/MonteCarloCrypto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
#-----Installation des packages et importation des modules------
!pip install yfinance pandas

import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime




In [12]:
#-----Importation des données via API YahooFinance------

def get_data_yf(ticker, name):

    df = yf.download(ticker, period="max")
    df= df.reset_index()  # remettre la date comme colonne

    df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]

    # Renommer les colonnes pour avoir un dataset propre
    df = df.rename(columns={
        "Date": "timestamp",
        "Open": f"{name}_open",
        "High": f"{name}_high",
        "Low": f"{name}_low",
        "Close": f"{name}_price",
        "Volume": f"{name}_volume"
    })

    # On garde uniquement les colonnes utiles
    df = df[["timestamp",
             f"{name}_price",
             f"{name}_volume",
             f"{name}_open",
             f"{name}_high",
             f"{name}_low"]]

    return df

# Récupération BTC & ETH

btc_df = get_data_yf("BTC-USD", "BTC")
eth_df = get_data_yf("ETH-USD", "ETH")

# Récupération indices NASDAQ et SP500
sp500_df = get_data_yf("^GSPC", "SP500")
nasdaq_df = get_data_yf("^IXIC", "NASDAQ")

# Fusionner les deux cryptos et les indices dans un dataset sur la base temporelle commune

df = pd.merge(btc_df, eth_df, on='timestamp', how='inner')
df = df.merge(sp500_df, on="timestamp", how="inner")
df = df.merge(nasdaq_df, on="timestamp", how="inner")

#Taille du dataset
print(btc_df.shape)
print(eth_df.shape)
print(sp500_df.shape)
print(nasdaq_df.shape)
print(df.shape)
#Premiere et dernière date de chaque datset, bitcoin commence en 2014 et eth en 2017 > on reg a partir de 2017
print(btc_df['timestamp'].min(), btc_df['timestamp'].max())
print(eth_df['timestamp'].min(), eth_df['timestamp'].max())
print(sp500_df['timestamp'].min(), sp500_df['timestamp'].max())
print(nasdaq_df['timestamp'].min(), nasdaq_df['timestamp'].max())
print(df['timestamp'].min(), df['timestamp'].max())

#Afficher le dataset
print(df.head())


  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed

(4100, 6)
(2951, 6)
(24600, 6)
(13826, 6)
(2029, 21)
2014-09-17 00:00:00 2025-12-07 00:00:00
2017-11-09 00:00:00 2025-12-07 00:00:00
1927-12-30 00:00:00 2025-12-05 00:00:00
1971-02-05 00:00:00 2025-12-05 00:00:00
2017-11-09 00:00:00 2025-12-05 00:00:00
   timestamp    BTC_price  BTC_volume     BTC_open     BTC_high      BTC_low  \
0 2017-11-09  7143.580078  3226249984  7446.830078  7446.830078  7101.520020   
1 2017-11-10  6618.140137  5208249856  7173.729980  7312.000000  6436.870117   
2 2017-11-13  6559.490234  6263249920  5938.250000  6811.189941  5844.290039   
3 2017-11-14  6635.750000  3197110016  6561.479980  6764.979980  6461.750000   
4 2017-11-15  7315.540039  4200880128  6634.759766  7342.250000  6634.759766   

    ETH_price  ETH_volume    ETH_open    ETH_high  ...  SP500_price  \
0  320.884003   893249984  308.644989  329.451996  ...  2584.620117   
1  299.252991   885985984  320.670990  324.717987  ...  2582.300049   
2  316.716003  1041889984  307.024994  328.415009  ..




In [13]:
#------Nettoyage des données------


# Vérification et suppression des doublons
print("Doublons avant nettoyage :", df.duplicated(subset='timestamp').sum())
df = df.drop_duplicates(subset='timestamp')

# Vérification des valeurs manquantes
print("Valeurs manquantes avant interpolation :\n", df.isna().sum())

# Choix de l'interpolation linéaire pour prix et volume (et OHLC) (ici pas vrmt besoin car 0 valeurs manquantes)
cols_to_interpolate = [
    'BTC_price', 'BTC_volume', 'BTC_open', 'BTC_high', 'BTC_low',
    'ETH_price', 'ETH_volume', 'ETH_open', 'ETH_high', 'ETH_low'
]
df[cols_to_interpolate] = df[cols_to_interpolate].interpolate(method='linear')

# Conversion du timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Vérification finale
print("Valeurs manquantes après interpolation :\n", df.isna().sum())
print(df.info())
print(df.head())

Doublons avant nettoyage : 0
Valeurs manquantes avant interpolation :
 timestamp        0
BTC_price        0
BTC_volume       0
BTC_open         0
BTC_high         0
BTC_low          0
ETH_price        0
ETH_volume       0
ETH_open         0
ETH_high         0
ETH_low          0
SP500_price      0
SP500_volume     0
SP500_open       0
SP500_high       0
SP500_low        0
NASDAQ_price     0
NASDAQ_volume    0
NASDAQ_open      0
NASDAQ_high      0
NASDAQ_low       0
dtype: int64
Valeurs manquantes après interpolation :
 timestamp        0
BTC_price        0
BTC_volume       0
BTC_open         0
BTC_high         0
BTC_low          0
ETH_price        0
ETH_volume       0
ETH_open         0
ETH_high         0
ETH_low          0
SP500_price      0
SP500_volume     0
SP500_open       0
SP500_high       0
SP500_low        0
NASDAQ_price     0
NASDAQ_volume    0
NASDAQ_open      0
NASDAQ_high      0
NASDAQ_low       0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2029 entries,

In [16]:
#-----Création de nouvelles varaibles pour enrichir notre dataset et répondre à la problématique-----


for col in ['BTC', 'ETH', 'SP500', 'NASDAQ']:
  df[f'{col}_return_daily'] = np.log(df[f'{col}_price'] / df[f'{col}_price'].shift(1))

  if col in ['BTC','ETH']:

    # Volatilité rolling 7 jours
    df[f'{col}_volatility_7d'] = df[f'{col}_return_daily'].rolling(window=7).std()

    # Volatilité rolling 30 jours
    df[f'{col}_volatility_30d'] = df[f'{col}_return_daily'].rolling(window=30).std()

    # Moyenne mobile 7 jours
    df[f'{col}_moving_avg_7d'] = df[f'{col}_price'].rolling(window=7).mean()

    # Moyenne mobile 30 jours
    df[f'{col}_moving_avg_30d'] = df[f'{col}_price'].rolling(window=30).mean()

    # Range journalier (High - Low)
    df[f'{col}_range_daily'] = df[f'{col}_high'] - df[f'{col}_low']

    # Variation journalière du volume
    df[f'{col}_volume_change'] = df[f'{col}_volume'].pct_change()


# Vérification des valeurs manquantes
print("Valeurs manquantes sur df:\n", df.isna().sum())

# Gérer tous les NaN du nouveau dataset
df_final = df.dropna().reset_index(drop=True)

# Vérification rapide des infos du dataset
print("Valeurs manquantes sur df_final :\n", df_final.isna().sum())
print(df_final.info())
df_final

Valeurs manquantes sur df:
 timestamp               0
BTC_price               0
BTC_volume              0
BTC_open                0
BTC_high                0
BTC_low                 0
ETH_price               0
ETH_volume              0
ETH_open                0
ETH_high                0
ETH_low                 0
SP500_price             0
SP500_volume            0
SP500_open              0
SP500_high              0
SP500_low               0
NASDAQ_price            0
NASDAQ_volume           0
NASDAQ_open             0
NASDAQ_high             0
NASDAQ_low              0
BTC_return_daily        1
BTC_volatility_7d       7
BTC_volatility_30d     30
BTC_moving_avg_7d       6
BTC_moving_avg_30d     29
BTC_range_daily         0
BTC_volume_change       1
ETH_return_daily        1
ETH_volatility_7d       7
ETH_volatility_30d     30
ETH_moving_avg_7d       6
ETH_moving_avg_30d     29
ETH_range_daily         0
ETH_volume_change       1
SP500_return_daily      1
NASDAQ_return_daily     1
dtype: int

Unnamed: 0,timestamp,BTC_price,BTC_volume,BTC_open,BTC_high,BTC_low,ETH_price,ETH_volume,ETH_open,ETH_high,...,BTC_volume_change,ETH_return_daily,ETH_volatility_7d,ETH_volatility_30d,ETH_moving_avg_7d,ETH_moving_avg_30d,ETH_range_daily,ETH_volume_change,SP500_return_daily,NASDAQ_return_daily
0,2017-12-22,13831.799805,22197999616,15898.000000,15943.400391,11833.000000,674.859985,4977710080,822.643005,827.682983,...,0.343981,-0.196095,0.102098,0.086628,759.534424,508.866338,283.920959,0.394684,-0.000458,-0.000776
1,2017-12-26,16099.799805,13454300160,14036.599609,16461.199219,14028.900391,773.835999,2201159936,763.369995,786.789978,...,-0.393896,0.136855,0.115322,0.087064,770.680141,524.685772,25.854980,-0.557797,-0.001059,-0.003412
2,2017-12-27,15838.500000,12487600128,16163.500000,16930.900391,15114.299805,762.841980,2100029952,774.969971,789.252991,...,-0.071851,-0.014309,0.115226,0.087325,781.879281,539.556638,50.839966,-0.045944,0.000791,0.000445
3,2017-12-28,14606.500000,12336499712,15864.099609,15888.400391,13937.299805,737.023010,2389149952,762.208008,763.318970,...,-0.012100,-0.034432,0.099526,0.087824,773.647566,552.869704,62.131958,0.137674,0.001832,0.001558
4,2017-12-29,14656.200195,13025500160,14695.799805,15279.000000,14307.000000,753.591980,2648969984,740.388977,770.117004,...,0.055851,0.022232,0.098262,0.087524,763.185992,566.877537,40.509033,0.108750,-0.005197,-0.006752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,2025-12-01,86321.570312,87962894424,90389.109375,90398.156250,83862.250000,2800.188965,36679598313,2991.923096,2996.881836,...,0.444481,-0.079636,0.050276,0.040410,2909.775984,3415.114860,276.445312,0.775396,-0.005338,-0.003849
1995,2025-12-02,91350.203125,78546798211,86322.539062,92316.632812,86202.195312,2997.939697,26593645111,2800.223145,3032.761230,...,-0.107046,0.068238,0.051515,0.042008,2933.513428,3382.354175,248.370605,-0.274974,0.002454,0.005901
1996,2025-12-03,93527.804688,77650204986,91345.093750,94060.773438,91056.390625,3191.571777,29949301036,2997.801514,3212.559814,...,-0.011415,0.062588,0.052942,0.043869,2994.352365,3359.514429,224.417725,0.126183,0.002975,0.001725
1997,2025-12-04,92141.625000,64538402681,93454.257812,94038.242188,90976.101562,3134.316406,27434991113,3188.343506,3238.555420,...,-0.168857,-0.018102,0.050473,0.043871,3020.295654,3337.054232,167.245361,-0.083952,0.001080,0.002174


Les valeurs manquantes présentes dans le dataset proviennent uniquement des transformations réalisées (retours, volatilités, moyennes mobiles). Ces NaN correspondent aux premières observations où les fenêtres de calcul ne sont pas encore complètes.
Ils ne représentent donc pas de véritables trous dans les données.
Leur suppression est justifiée, car elle ne retire qu’un très faible nombre d’observations et évite d’introduire un biais par une imputation artificielle
(par la moyenne,par la médiane ou par régression)