<a href="https://colab.research.google.com/github/raphaelassoun23/Projet-Python/blob/main/notebooks/MonteCarloCrypto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
#-----Installation des packages et importation des modules------
!pip install yfinance pandas

import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime




In [26]:
#-----Importation des données via YahooFinance------

def get_crypto_yf(ticker, crypto_name):

    df = yf.download(ticker, period="max")
    df = df.reset_index()  # remettre la date comme colonne

    df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]

    # Renommer les colonnes pour avoir un dataset propre
    df = df.rename(columns={
        "Date": "timestamp",
        "Open": f"{crypto_name}_open",
        "High": f"{crypto_name}_high",
        "Low": f"{crypto_name}_low",
        "Close": f"{crypto_name}_price",
        "Volume": f"{crypto_name}_volume"
    })

    # On garde uniquement les colonnes utiles
    df = df[["timestamp",
             f"{crypto_name}_price",
             f"{crypto_name}_volume",
             f"{crypto_name}_open",
             f"{crypto_name}_high",
             f"{crypto_name}_low"]]

    return df

# Récupération BTC & ETH

btc_df = get_crypto_yf("BTC-USD", "BTC")
eth_df = get_crypto_yf("ETH-USD", "ETH")

# Fusionner les deux cryptos dans un dataset sur la base temporelle commune

crypto_df = pd.merge(btc_df, eth_df, on='timestamp', how='inner')

#Taille du dataset
print(eth_df.shape)
print(eth_df.shape)
print(crypto_df.shape)

#Premiere et dernière date de chaque datset, bitcoin commence en 2014 et eth en 2017 > on reg a partir de 2017
print(btc_df['timestamp'].min(), btc_df['timestamp'].max())
print(eth_df['timestamp'].min(), eth_df['timestamp'].max())
print(crypto_df['timestamp'].min(), crypto_df['timestamp'].max())

#Afficher le dataset
print(crypto_df.head())


  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, period="max")
[*********************100%***********************]  1 of 1 completed

(2950, 6)
(2950, 6)
(2950, 11)
2014-09-17 00:00:00 2025-12-06 00:00:00
2017-11-09 00:00:00 2025-12-06 00:00:00
2017-11-09 00:00:00 2025-12-06 00:00:00
   timestamp    BTC_price  BTC_volume     BTC_open     BTC_high      BTC_low  \
0 2017-11-09  7143.580078  3226249984  7446.830078  7446.830078  7101.520020   
1 2017-11-10  6618.140137  5208249856  7173.729980  7312.000000  6436.870117   
2 2017-11-11  6357.600098  4908680192  6618.609863  6873.149902  6204.220215   
3 2017-11-12  5950.069824  8957349888  6295.450195  6625.049805  5519.009766   
4 2017-11-13  6559.490234  6263249920  5938.250000  6811.189941  5844.290039   

    ETH_price  ETH_volume    ETH_open    ETH_high     ETH_low  
0  320.884003   893249984  308.644989  329.451996  307.056000  
1  299.252991   885985984  320.670990  324.717987  294.541992  
2  314.681000   842300992  298.585999  319.453003  298.191986  
3  307.907990  1613479936  314.690002  319.153015  298.513000  
4  316.716003  1041889984  307.024994  328.41500




In [31]:
#------Nettoyage des données------

df_final = crypto_df.copy()

# Vérification et suppression des doublons
print("Doublons avant nettoyage :", df_final.duplicated(subset='timestamp').sum())
df_final = df_final.drop_duplicates(subset='timestamp')

# Vérification des valeurs manquantes
print("Valeurs manquantes avant interpolation :\n", df_final.isna().sum())

# Choix de l'interpolation linéaire pour prix et volume (et OHLC) (ici pas vrmt besoin car 0 valeurs manquantes)
cols_to_interpolate = [
    'BTC_price', 'BTC_volume', 'BTC_open', 'BTC_high', 'BTC_low',
    'ETH_price', 'ETH_volume', 'ETH_open', 'ETH_high', 'ETH_low'
]
df_final[cols_to_interpolate] = df_final[cols_to_interpolate].interpolate(method='linear')

# Conversion du timestamp en datetime
df_final['timestamp'] = pd.to_datetime(df_final['timestamp'])

# Vérification finale
print("Valeurs manquantes après interpolation :\n", df.isna().sum())
print(df_final.info())
print(df_final.head())

Doublons avant nettoyage : 0
Valeurs manquantes avant interpolation :
 timestamp     0
BTC_price     0
BTC_volume    0
BTC_open      0
BTC_high      0
BTC_low       0
ETH_price     0
ETH_volume    0
ETH_open      0
ETH_high      0
ETH_low       0
dtype: int64
Valeurs manquantes après interpolation :
 timestamp     0
BTC_price     0
BTC_volume    0
BTC_open      0
BTC_high      0
BTC_low       0
ETH_price     0
ETH_volume    0
ETH_open      0
ETH_high      0
ETH_low       0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2950 entries, 0 to 2949
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   timestamp   2950 non-null   datetime64[ns]
 1   BTC_price   2950 non-null   float64       
 2   BTC_volume  2950 non-null   int64         
 3   BTC_open    2950 non-null   float64       
 4   BTC_high    2950 non-null   float64       
 5   BTC_low     2950 non-null   float64       
 6   ETH_price   2

In [33]:
#-----Création de nouvelles varaibles pour enrichir notre dataset et répondre à la problématique-----

cryptos = ['BTC', 'ETH']

for crypto in cryptos:
    # Rendement journalier logarithmique
    df_final[f'{crypto}_return_daily'] = np.log(df_final[f'{crypto}_price'] / df_final[f'{crypto}_price'].shift(1))

    # Volatilité rolling 7 jours
    df_final[f'{crypto}_volatility_7d'] = df_final[f'{crypto}_return_daily'].rolling(window=7).std()

    # Volatilité rolling 30 jours
    df_final[f'{crypto}_volatility_30d'] = df_final[f'{crypto}_return_daily'].rolling(window=30).std()

    # Moyenne mobile 7 jours
    df_final[f'{crypto}_moving_avg_7d'] = df_final[f'{crypto}_price'].rolling(window=7).mean()

    # Moyenne mobile 30 jours
    df_final[f'{crypto}_moving_avg_30d'] = df_final[f'{crypto}_price'].rolling(window=30).mean()

    # Range journalier (High - Low)
    df_final[f'{crypto}_range_daily'] = df_final[f'{crypto}_high'] - df_final[f'{crypto}_low']

    # Variation journalière du volume
    df_final[f'{crypto}_volume_change'] = df_final[f'{crypto}_volume'].pct_change()

# Vérification rapide
print(df_final.info())
df_final



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2950 entries, 0 to 2949
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   timestamp           2950 non-null   datetime64[ns]
 1   BTC_price           2950 non-null   float64       
 2   BTC_volume          2950 non-null   int64         
 3   BTC_open            2950 non-null   float64       
 4   BTC_high            2950 non-null   float64       
 5   BTC_low             2950 non-null   float64       
 6   ETH_price           2950 non-null   float64       
 7   ETH_volume          2950 non-null   int64         
 8   ETH_open            2950 non-null   float64       
 9   ETH_high            2950 non-null   float64       
 10  ETH_low             2950 non-null   float64       
 11  BTC_return_daily    2949 non-null   float64       
 12  BTC_volatility_7d   2943 non-null   float64       
 13  BTC_volatility_30d  2920 non-null   float64     

Unnamed: 0,timestamp,BTC_price,BTC_volume,BTC_open,BTC_high,BTC_low,ETH_price,ETH_volume,ETH_open,ETH_high,...,BTC_moving_avg_30d,BTC_range_daily,BTC_volume_change,ETH_return_daily,ETH_volatility_7d,ETH_volatility_30d,ETH_moving_avg_7d,ETH_moving_avg_30d,ETH_range_daily,ETH_volume_change
0,2017-11-09,7143.580078,3226249984,7446.830078,7446.830078,7101.520020,320.884003,893249984,308.644989,329.451996,...,,345.310059,,,,,,,22.395996,
1,2017-11-10,6618.140137,5208249856,7173.729980,7312.000000,6436.870117,299.252991,885985984,320.670990,324.717987,...,,875.129883,0.614335,-0.069790,,,,,30.175995,-0.008132
2,2017-11-11,6357.600098,4908680192,6618.609863,6873.149902,6204.220215,314.681000,842300992,298.585999,319.453003,...,,668.929688,-0.057518,0.050270,,,,,21.261017,-0.049307
3,2017-11-12,5950.069824,8957349888,6295.450195,6625.049805,5519.009766,307.907990,1613479936,314.690002,319.153015,...,,1106.040039,0.824798,-0.021758,,,,,20.640015,0.915562
4,2017-11-13,6559.490234,6263249920,5938.250000,6811.189941,5844.290039,316.716003,1041889984,307.024994,328.415009,...,,966.899902,-0.300770,0.028204,,,,,21.390015,-0.354259
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2945,2025-12-02,91350.203125,78546798211,86322.539062,92316.632812,86202.195312,2997.939697,26593645111,2800.223145,3032.761230,...,94837.403125,6114.437500,-0.107046,0.068238,0.040427,0.040253,2979.512172,3138.268384,248.370605,-0.274974
2946,2025-12-03,93527.804688,77650204986,91345.093750,94060.773438,91056.390625,3191.571777,29949301036,2997.801514,3212.559814,...,94403.412500,3004.382812,-0.011415,0.062588,0.046202,0.039833,3002.906424,3124.577173,224.417725,0.126183
2947,2025-12-04,92141.625000,64538402681,93454.257812,94038.242188,90976.101562,3134.316406,27434991113,3188.343506,3238.555420,...,94088.449219,3062.140625,-0.168857,-0.018102,0.047074,0.036514,3020.017020,3119.301904,167.245361,-0.083952
2948,2025-12-05,89387.757812,63256398633,92133.648438,92702.640625,88152.140625,3024.432861,28000268228,3134.357422,3192.457031,...,93604.979948,4550.500000,-0.019864,-0.035688,0.049583,0.036172,3018.892508,3105.943962,202.625488,0.020604


