In [1]:
!pip install -r requirements.txt



In [2]:
try:
    import pytorch_forecasting
    print("PyTorch Forecasting est correctement installé !")
except ModuleNotFoundError as e:
    print(f"Erreur : {e}")

PyTorch Forecasting est correctement installé !


In [3]:
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [12]:
# -----------------------------------------------
# Bloc : Importations et Pré-requis
# -----------------------------------------------

# 1. Bibliothèques Standard
import logging
import traceback
from datetime import datetime
from typing import Dict, Any

# 2. Bibliothèques Tierces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import yfinance as yf
import wandb
import ta  # Bibliothèque pour les indicateurs techniques

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss

# Configuration du logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('log_importation_library.log'),  # Sauvegarde dans un fichier
        logging.StreamHandler()              # Affichage dans Colab
    ]
)
logger = logging.getLogger(__name__)


In [13]:
# -----------------------------------------------
# Bloc : Configuration WandB pour TFT
# -----------------------------------------------

# Configuration du logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('log_configuration_WandB.log'),  # Sauvegarde dans un fichier
        logging.StreamHandler()              # Affichage dans Colab
    ]
)
logger = logging.getLogger(__name__)



def init_wandb_config(stock_symbol, mode):
    """
    Initialise la configuration WandB complète pour le modèle TFT.

    Args:
        stock_symbol (str): Symbole du stock à analyser (par défaut 'BTC-USD').
        mode (str): Mode de WandB ('disabled' pour désactiver les logs).

    Returns:
        dict: Configuration WandB initialisée.
    """
    try:
        print("Initialisation de la configuration WandB...")

        # Forcer la fermeture de toute session WandB existante
        wandb.finish()

        # Initialisation de WandB avec la configuration complète
        run = wandb.init(
            project="stock_predictions_TFT",
            name=f"tft_{stock_symbol}_{datetime.now().strftime('%Y%m%d_%H%M')}",
            tags=[
                stock_symbol,
                "multi_horizon",
                "Deep Learning",
                "TFT",
                "Complete_Pipeline",
            ],
            mode=mode,
            config={
                "data": {
                    "symbol": stock_symbol,
                    "end_date": datetime.now().strftime('%Y-%m-%d'),
                },
                "preprocessing": {
                    "sort_index": True,
                    "handle_missing": {
                        "method": "drop",  # Options: ['drop', 'fill_forward', 'interpolate']
                        "max_missing_ratio": 0.1,
                        "interpolation_method": "linear",
                    },
                    "normalization": {
                        "type": "standard",  # ['standard', 'robust', 'minmax']
                        "center": True,
                        "exclude_features": ["Year", "Month", "Day", "series_id"],  # Retiré "Day_of_Week"
                        "clip": {  # Clipping des valeurs
                            "enabled": False,
                            "threshold": 3,  # Nombre d'écarts-types
                        },
                        "normalize_target": False,  # Ne pas normaliser la cible
                    },
                    "required_columns": ["Close", "High", "Low", "Open", "Volume"],

                    "target_transform": {
                        "method": "returns",
                        "prediction_mode": "cumulative",
                        "periods": list(range(1, 31)),
                        "use_log_returns": True,
                        "scaling": {
                            "type": "standard",  # Standardisation des returns
                            "center": True,
                            "clip_threshold": 3.0  # Clip les returns extrêmes à 3 écarts-types
                        }
                    }
                },
                "technical_indicators": {
                    "moving_averages": {
                        "sma_periods": [10, 30],
                        "ema_periods": [10],
                        "volume_ma_period": 10,
                    },
                    "oscillators": {
                        "rsi": {
                            "period": 14,
                            "overbought": 70,
                            "oversold": 30,
                        },
                        "stochastic": {
                            "k_period": 14,
                            "d_period": 3,
                            "overbought": 80,
                            "oversold": 20,
                        },
                    },
                    "bollinger_bands": {
                        "period": 20,
                        "std_dev": 2,
                    },
                    "macd": {
                        "fast_period": 12,
                        "slow_period": 26,
                        "signal_period": 9,
                    },
                    "volume_analysis": {
                        "enable_volume_ma": True,
                        "enable_volume_change": True,
                        "volume_ma_period": 10,  # Ajouté pour correspondre à normalize_data
                    },
                    "price_momentum": {
                        "enable_roc": True,
                        "roc_period": 1,
                    },
                    "atr": {
                        "window": 14,
                    },
                    "adx": {
                        "window": 14,
                    },
                    "ichimoku_cloud": {
                        "enabled": True,  # Ajouté pour activer Ichimoku
                        "window1": 9,
                        "window2": 26,
                        "window3": 52
                    },
                    "vwap": {
                        "window": 14,
                    },
                    "cmf": {
                        "window": 20,
                    },
                    "cci": {
                        "window": 20,
                    },
                    "trix": {
                        "window": 15,
                    },
                    "williams_r": {
                        "window": 14,
                    },
                },
                "feature_engineering": {
                    "price_features": {
                        "use_open": True,
                        "use_high": True,
                        "use_low": True,
                        "use_close": True,
                        "use_volume": True,
                    },
                    "advanced_features": {
                        "use_market_features": True,  # Features universelles
                        "use_extended_features": True,  # Features étendues
                        "market_windows": [5, 10, 20],  # Fenêtres pour les calculs
                        "volatility_windows": [5, 10, 20],  # Fenêtres pour la volatilité
                        "volume_analysis": {
                            "enable_volume_patterns": True,
                            "enable_volume_profile": True,
                            "profile_window": 10
                        },
                        "momentum_analysis": {
                            "enable_acceleration": True,
                            "enable_efficiency": True,
                            "efficiency_windows": [5, 10, 20]
                        },
                        "pattern_recognition": {
                            "enable_candlestick_patterns": True,
                            "enable_support_resistance": True,
                            "sr_windows": [10, 20, 50]
                        },
                    },  # Fermeture de 'advanced_features'
                    "technical_features": {
                        "moving_averages": ["SMA_10", "SMA_30", "EMA_10"],
                        "momentum": ["ROC", "ATR", "ADX"],
                        "oscillators": ["RSI", "Stochastic_%K", "Stochastic_%D", "MACD", "MACD_Signal", "MACD_Diff"],
                        "bollinger_bands": ["Bollinger_Middle", "Bollinger_Upper", "Bollinger_Lower"],
                        "ichimoku_cloud": ["Ichimoku_Conversion_Line", "Ichimoku_Base_Line", "Ichimoku_A", "Ichimoku_B"],
                        "volume_analysis": ["Volume_MA_10", "Volume_Change"],
                        "vwap": ["VWAP"],
                        "cmf": ["CMF"],
                        "cci": ["CCI"],
                        "trix": ["TRIX"],
                        "williams_r": ["Williams_%R"],
                        "time_varying_known_categoricals": ["Day_of_Week"],  # Ajouté ici
                    },
                    "temporal_features": {
                        "use_year": True,
                        "use_month": True,
                        "use_day": True,
                        "use_day_of_week": True,  # Utilisation de "Day_of_Week"
                    },
                    "target": "Close",
                },
                "data_split": {
                    "train_ratio": 0.8,
                    "validation_ratio": 0.1,
                    "test_ratio": 0.1,
                    "shuffle": False,  # Important pour les séries temporelles
                },
                "pytorch_forecasting": {
                    "time_series_params": {
                        "max_prediction_length": 30,
                        "max_encoder_length": 180,
                        "min_encoder_length": 30,
                        "min_prediction_length": 1,
                    },
                    "target_params": {
                        "target": "Close",
                        "normalizer": {
                            "type": "group",
                            "transformation": "softplus",
                            "center": True,
                        },
                    },
                    "training_params": {
                        "batch_size": {"value": 32},
                        "num_workers": 0,
                        "shuffle_training": True,
                    },
                    "dataset_params": {
                        "add_relative_time_idx": True,
                        "add_target_scales": True,
                        "add_encoder_length": True,
                    },
                },
                "model": {
                    "architecture": {
                        "hidden_size": {"value": 64},
                        "attention_head_size": {"value": 4},
                        "dropout": {"value": 0.1},
                        "hidden_continuous_size": {"value": 32},
                        "loss_fn": "QuantileLoss",
                        "log_interval": 10,
                    },
                    "training": {
                        "learning_rate": {"value": 0.001},
                        "optimizer": "adam",
                        "max_epochs": {"value": 30},
                        "gradient_clip_val": 0.1,
                        "reduce_on_plateau_patience": 4,
                    },
                    "early_stopping": {
                        "monitor": "val_loss",
                        "min_delta": 1e-4,
                        "patience": 10,
                        "mode": "min",
                    },
                    "scheduler": {
                        "factor": 0.1,
                        "patience": 4,
                        "mode": "min",
                    },
                },
                "evaluation": {
                    "metrics": {
                        "mae": True,
                        "mse": True,
                        "rmse": True,
                        "r2": True,
                        "mape": True,
                        "mase": True,
                        "directional_accuracy": True,
                        "rolling_window": 30,
                    },
                    "prediction": {
                        "batch_size": 1,
                        "num_workers": 0,
                        "quantile": {  # Intervalles de confiance
                            "lower": 0.1,
                            "upper": 0.9
                        },
                    },
                    "visualization": {
                        "plot_predictions": True,
                        "plot_errors": True,
                        "figure_size": {"width": 15, "height": 12},
                        "max_error_annotations": 3,
                        "show_metrics_on_plot": True,
                        "metrics_box_position": {
                            "x": 0.02,
                            "y": 0.98,
                            "font_size": 10
                        }
                    }
                }
            }
        )

        print("Configuration WandB initialisée avec succès.")
        return run.config

    except Exception as e:
        print(f"Erreur lors de l'initialisation de la configuration WandB: {str(e)}")
        traceback.print_exc()
        raise e

# -----------------------------------------------
# Code d'exécution principal
# -----------------------------------------------
if __name__ == "__main__":
    try:
        # On définit les valeurs une seule fois ici
        stock_symbol = 'BTC-USD'
        mode = 'disabled'

        print("\n=== DÉMARRAGE DE L'INITIALISATION WANDB ===")
        config = init_wandb_config(stock_symbol, mode)

        # Vérification de la configuration
        if config:
            print("\n=== VÉRIFICATION DE LA CONFIGURATION ===")
            print(f"Symbole configuré: {config['data']['symbol']}")
            print(f"Date de fin: {config['data']['end_date']}")
            print("\nConfiguration des indicateurs techniques:")
            print(f"- Périodes SMA: {config['technical_indicators']['moving_averages']['sma_periods']}")
            print(f"- Période RSI: {config['technical_indicators']['oscillators']['rsi']['period']}")

            print("\nConfiguration de la normalisation:")
            print(f"- Type: {config['preprocessing']['normalization']['type']}")
            print(f"- Normalisation de la cible: {config['preprocessing']['normalization']['normalize_target']}")

            print("\n=== CONFIGURATION WANDB RÉUSSIE ===")
        else:
            print("\n!!! ERREUR: La configuration n'a pas été initialisée correctement !!!")

    except Exception as e:
        print("\n!!! ERREUR LORS DE L'INITIALISATION !!!")
        print(f"Nature de l'erreur: {str(e)}")
        traceback.print_exc()

        # S'assurer que wandb est bien fermé en cas d'erreur
        try:
            wandb.finish()
        except:
            pass


=== DÉMARRAGE DE L'INITIALISATION WANDB ===
Initialisation de la configuration WandB...
Configuration WandB initialisée avec succès.

=== VÉRIFICATION DE LA CONFIGURATION ===
Symbole configuré: BTC-USD
Date de fin: 2025-01-18

Configuration des indicateurs techniques:
- Périodes SMA: [10, 30]
- Période RSI: 14

Configuration de la normalisation:
- Type: standard
- Normalisation de la cible: False

=== CONFIGURATION WANDB RÉUSSIE ===


In [14]:
# -----------------------------------------------
# Bloc : Téléchargement et Préparation des Données
# -----------------------------------------------

import pandas as pd
import yfinance as yf
from datetime import datetime
from typing import Dict, Any
import logging

# Configuration du logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('log_telechargement_data.log'),  # Sauvegarde dans un fichier
        logging.StreamHandler()              # Affichage dans Colab
    ]
)
logger = logging.getLogger(__name__)

def add_temporal_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Ajoute les caractéristiques temporelles au DataFrame.

    Args:
        df: DataFrame avec un index datetime

    Returns:
        DataFrame enrichi avec les caractéristiques temporelles
    """
    df = df.copy()

    # Extraction des caractéristiques temporelles de l'index
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Day'] = df.index.day
    df['Day_of_Week'] = df.index.dayofweek  # 0 = Lundi, 6 = Dimanche

    return df

def download_and_prepare_data(config: Dict[str, Any]) -> pd.DataFrame:
    """
    Télécharge et prépare les données financières depuis Yahoo Finance.

    Args:
        config: Configuration contenant 'data' et 'preprocessing'.
            data: Doit contenir 'symbol' et optionnellement 'end_date'.
            preprocessing: Doit contenir 'required_columns' et 'handle_missing'.

    Returns:
        pd.DataFrame: Données financières préparées.
    """
    # Validation de base
    symbol = config.get('data', {}).get('symbol')
    if not symbol:
        raise ValueError("Le symbole du stock est requis")

    end_date = config.get('data', {}).get('end_date', datetime.now().strftime('%Y-%m-%d'))
    required_cols = set(config.get('preprocessing', {}).get('required_columns', []))
    missing_method = config.get('preprocessing', {}).get('handle_missing', {}).get('method', 'drop')

    # Téléchargement des données
    logger.info(f"Téléchargement des données pour {symbol}")
    data = yf.download(symbol, end=end_date)

    if data.empty:
        raise ValueError(f"Aucune donnée disponible pour {symbol}")

    # Gestion du MultiIndex des colonnes
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = [col[0] for col in data.columns]

    # Vérification des colonnes requises
    if required_cols:
        available_cols = set(data.columns)
        missing = required_cols - available_cols
        if missing:
            logger.error(f"Colonnes disponibles: {available_cols}")
            raise ValueError(f"Colonnes manquantes: {missing}")

    # Ajout des caractéristiques temporelles
    data = add_temporal_features(data)

    # Gestion des valeurs manquantes
    if missing_method == 'drop':
        data = data.dropna()
    elif missing_method == 'fill_forward':
        data = data.ffill()
    elif missing_method == 'interpolate':
        data = data.interpolate(method='linear')

    logger.info(f"Données préparées: {len(data)} entrées")
    return data

# Appel de la fonction
df = download_and_prepare_data(config)

# Afficher les premières lignes pour vérifier
print(df.head())

[*********************100%***********************]  1 of 1 completed

                 Close        High         Low        Open    Volume  Year  \
Date                                                                         
2014-09-17  457.334015  468.174011  452.421997  465.864014  21056800  2014   
2014-09-18  424.440002  456.859985  413.104004  456.859985  34483200  2014   
2014-09-19  394.795990  427.834991  384.532013  424.102997  37919700  2014   
2014-09-20  408.903992  423.295990  389.882996  394.673004  36863600  2014   
2014-09-21  398.821014  412.425995  393.181000  408.084991  26580100  2014   

            Month  Day  Day_of_Week  
Date                                 
2014-09-17      9   17            2  
2014-09-18      9   18            3  
2014-09-19      9   19            4  
2014-09-20      9   20            5  
2014-09-21      9   21            6  





In [15]:
# -----------------------------------------------
# Bloc : Ajout des Indicateurs Techniques
# -----------------------------------------------

import ta
import pandas as pd
import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def add_technical_indicators(data: pd.DataFrame, config: Dict[str, Any]) -> pd.DataFrame:
    """
    Ajoute les indicateurs techniques aux données financières.

    Args:
        data: DataFrame avec colonnes OHLCV
        config: Dict de configuration des indicateurs

    Returns:
        DataFrame enrichi avec les indicateurs techniques
    """
    df = data.copy()
    tech_indicators = config.get('technical_indicators', {})

    try:
        # 1. Moyennes Mobiles
        ma_config = tech_indicators.get('moving_averages', {})
        for period in ma_config.get('sma_periods', []):
            df[f'SMA_{period}'] = ta.trend.SMAIndicator(close=df['Close'], window=period).sma_indicator()
            logger.info(f"SMA_{period} calculé.")
        for period in ma_config.get('ema_periods', []):
            df[f'EMA_{period}'] = ta.trend.EMAIndicator(close=df['Close'], window=period).ema_indicator()
            logger.info(f"EMA_{period} calculé.")

        # 2. Oscillateurs
        osc_config = tech_indicators.get('oscillators', {})

        # RSI
        rsi_config = osc_config.get('rsi', {})
        if rsi_config:
            rsi_period = rsi_config.get('period', 14)
            df['RSI'] = ta.momentum.RSIIndicator(close=df['Close'], window=rsi_period).rsi()
            logger.info(f"RSI (période {rsi_period}) calculé.")

        # Stochastique
        stoch_config = osc_config.get('stochastic', {})
        if stoch_config:
            stoch = ta.momentum.StochasticOscillator(
                high=df['High'], low=df['Low'], close=df['Close'],
                window=stoch_config.get('k_period', 14),
                smooth_window=stoch_config.get('d_period', 3)
            )
            df['Stochastic_%K'] = stoch.stoch()
            df['Stochastic_%D'] = stoch.stoch_signal()
            logger.info("Stochastic_%K et Stochastic_%D calculés.")

        # 3. Bandes de Bollinger
        bb_config = tech_indicators.get('bollinger_bands', {})
        if bb_config:
            bollinger = ta.volatility.BollingerBands(
                close=df['Close'],
                window=bb_config.get('period', 20),
                window_dev=bb_config.get('std_dev', 2)
            )
            df[['Bollinger_Middle', 'Bollinger_Upper', 'Bollinger_Lower']] = pd.concat([
                bollinger.bollinger_mavg(),
                bollinger.bollinger_hband(),
                bollinger.bollinger_lband()
            ], axis=1)
            logger.info("Bandes de Bollinger calculées.")

        # 4. MACD
        macd_config = tech_indicators.get('macd', {})
        if macd_config:
            macd = ta.trend.MACD(
                close=df['Close'],
                window_slow=macd_config.get('slow_period', 26),
                window_fast=macd_config.get('fast_period', 12),
                window_sign=macd_config.get('signal_period', 9)
            )
            df[['MACD', 'MACD_Signal', 'MACD_Diff']] = pd.concat([
                macd.macd(), macd.macd_signal(), macd.macd_diff()
            ], axis=1)
            logger.info("MACD calculé.")

        # 5. Volume et Momentum
        volume_analysis = tech_indicators.get('volume_analysis', {})
        if volume_analysis.get('enable_volume_ma', False):
            period = volume_analysis.get('volume_ma_period', 10)
            df[f'Volume_MA_{period}'] = ta.trend.SMAIndicator(close=df['Volume'], window=period).sma_indicator()
            logger.info(f"Volume_MA_{period} calculé.")

        if volume_analysis.get('enable_volume_change', False):
            df['Volume_Change'] = df['Volume'].pct_change() * 100
            logger.info("Volume_Change calculé.")

        # 6. Indicateurs Supplémentaires
        price_momentum = tech_indicators.get('price_momentum', {})
        if price_momentum.get('enable_roc', False):
            roc_period = price_momentum.get('roc_period', 1)
            df['ROC'] = ta.momentum.ROCIndicator(close=df['Close'], window=roc_period).roc()
            logger.info(f"ROC (période {roc_period}) calculé.")

        # ATR
        atr_config = tech_indicators.get('atr', {})
        if atr_config:
            atr_period = atr_config.get('window', 14)
            df['ATR'] = ta.volatility.AverageTrueRange(
                high=df['High'], low=df['Low'], close=df['Close'], window=atr_period
            ).average_true_range()
            logger.info(f"ATR (période {atr_period}) calculé.")

        # ADX
        adx_config = tech_indicators.get('adx', {})
        if adx_config:
            adx_period = adx_config.get('window', 14)
            df['ADX'] = ta.trend.ADXIndicator(
                high=df['High'], low=df['Low'], close=df['Close'], window=adx_period
            ).adx()
            logger.info(f"ADX (période {adx_period}) calculé.")

        # Ichimoku Cloud
        ichimoku_config = tech_indicators.get('ichimoku_cloud', {})
        if ichimoku_config.get('enabled', False):
            ichimoku = ta.trend.IchimokuIndicator(
                high=df['High'], low=df['Low'],
                window1=ichimoku_config.get('window1', 9),
                window2=ichimoku_config.get('window2', 26),
                window3=ichimoku_config.get('window3', 52)
            )
            df['Ichimoku_Conversion_Line'] = ichimoku.ichimoku_conversion_line()
            df['Ichimoku_Base_Line'] = ichimoku.ichimoku_base_line()
            df['Ichimoku_A'] = ichimoku.ichimoku_a()
            df['Ichimoku_B'] = ichimoku.ichimoku_b()
            logger.info("Indicateurs Ichimoku Cloud calculés.")

        # VWAP
        vwap_config = tech_indicators.get('vwap', {})
        if vwap_config:
            vwap_period = vwap_config.get('window', 14)
            df['VWAP'] = ta.volume.VolumeWeightedAveragePrice(
                high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], window=vwap_period
            ).volume_weighted_average_price()
            logger.info(f"VWAP (période {vwap_period}) calculé.")

        # CMF
        cmf_config = tech_indicators.get('cmf', {})
        if cmf_config:
            cmf_period = cmf_config.get('window', 20)
            df['CMF'] = ta.volume.ChaikinMoneyFlowIndicator(
                high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], window=cmf_period
            ).chaikin_money_flow()
            logger.info(f"CMF (période {cmf_period}) calculé.")

        # CCI
        cci_config = tech_indicators.get('cci', {})
        if cci_config:
            cci_period = cci_config.get('window', 20)
            df['CCI'] = ta.trend.CCIIndicator(
                high=df['High'], low=df['Low'], close=df['Close'], window=cci_period
            ).cci()
            logger.info(f"CCI (période {cci_period}) calculé.")

        # TRIX
        trix_config = tech_indicators.get('trix', {})
        if trix_config:
            trix_period = trix_config.get('window', 15)
            df['TRIX'] = ta.trend.TRIXIndicator(close=df['Close'], window=trix_period).trix()
            logger.info(f"TRIX (période {trix_period}) calculé.")


        # Williams %R - Correction du paramètre window en lbp
        williams_r_config = tech_indicators.get('williams_r', {})
        if williams_r_config:
            williams_period = williams_r_config.get('window', 14)
            df['Williams_%R'] = ta.momentum.WilliamsRIndicator(
                high=df['High'],
                low=df['Low'],
                close=df['Close'],
                lbp=williams_period  # Utilisation de lbp au lieu de window
            ).williams_r()
            logger.info(f"Williams_%R (période {williams_period}) calculé.")

        # Nettoyage final
        rows_before = len(df)
        df.dropna(inplace=True)
        logger.info(f"Lignes: {rows_before} → {len(df)} après nettoyage")

        return df

    except Exception as e:
        logger.error(f"Erreur lors du calcul des indicateurs: {e}")
        raise


# 1. D'abord télécharger les données
#df = download_and_prepare_data(config)

# 2. Ensuite ajouter les indicateurs techniques
df_with_indicators = add_technical_indicators(df, config)

# Afficher les colonnes disponibles
print("Colonnes disponibles :")
print(df_with_indicators.columns.tolist())

# Afficher les premières lignes
print("\nPremières lignes :")
print(df_with_indicators.head())

Colonnes disponibles :
['Close', 'High', 'Low', 'Open', 'Volume', 'Year', 'Month', 'Day', 'Day_of_Week', 'SMA_10', 'SMA_30', 'EMA_10', 'RSI', 'Stochastic_%K', 'Stochastic_%D', 'Bollinger_Middle', 'Bollinger_Upper', 'Bollinger_Lower', 'MACD', 'MACD_Signal', 'MACD_Diff', 'Volume_MA_10', 'Volume_Change', 'ROC', 'ATR', 'ADX', 'Ichimoku_Conversion_Line', 'Ichimoku_Base_Line', 'Ichimoku_A', 'Ichimoku_B', 'VWAP', 'CMF', 'CCI', 'TRIX', 'Williams_%R']

Premières lignes :
                 Close        High         Low        Open    Volume  Year  \
Date                                                                         
2014-10-30  345.304993  350.912994  335.071991  335.709015  30177900  2014   
2014-10-31  338.321014  348.045013  337.141998  345.009003  12545400  2014   
2014-11-01  325.748993  340.528992  321.054993  338.649994  16677200  2014   
2014-11-02  325.891998  329.049988  320.626007  326.075012   8603620  2014   
2014-11-03  327.553986  334.002014  325.480988  325.569000  12948

In [271]:
# -----------------------------------------------
# Bloc : Features Avancées de Marché
# -----------------------------------------------


logger = logging.getLogger(__name__)

def add_market_features(data: pd.DataFrame) -> pd.DataFrame:
    """
    Ajoute des features universelles de marché applicables à tout type d'actif.

    Args:
        data: DataFrame avec les données OHLCV
    Returns:
        DataFrame enrichi avec les features de marché
    """
    try:
        df = data.copy()

        # 1. Volatilité relative
        for window in [5, 10, 20]:
            # Range normalisé
            df[f'Normalized_Range_{window}d'] = (
                (df['High'].rolling(window).max() - df['Low'].rolling(window).min()) /
                df['Close'].rolling(window).mean()
            ) * 100

            # Volatilité de Parkinson (basée sur High-Low)
            df[f'Parkinson_Volatility_{window}d'] = (
                np.sqrt(
                    1 / (4 * window * np.log(2)) *
                    ((np.log(df['High'] / df['Low'])) ** 2)
                ).rolling(window).mean() * np.sqrt(252)
            )

        # 2. Volume patterns
        df['Volume_Price_Impact'] = (
            (df['Close'] - df['Open']).abs() /
            (df['Volume'] * df['Close'])
        ).replace([np.inf, -np.inf], np.nan)

        df['Volume_Distribution'] = (
            df['Volume'] / df['Volume'].rolling(20).mean()
        )

        # 3. Momentum et accélération
        df['Price_Acceleration'] = df['Close'].pct_change().diff()
        df['Volume_Acceleration'] = df['Volume'].pct_change().diff()

        # 4. Patterns de trading
        df['Upper_Shadow'] = (
            (df['High'] - df[['Open', 'Close']].max(axis=1)) /
            df['Close']
        ) * 100

        df['Lower_Shadow'] = (
            (df[['Open', 'Close']].min(axis=1) - df['Low']) /
            df['Close']
        ) * 100

        df['Body_Size'] = (
            (df['Close'] - df['Open']).abs() /
            df['Close']
        ) * 100

        # 5. Gap analysis
        df['Daily_Gap'] = (
            (df['Open'] - df['Close'].shift(1)) /
            df['Close'].shift(1)
        ) * 100

        # 6. Momentum confirmations
        df['Price_Volume_Trend'] = (
            df['Close'].pct_change() *
            df['Volume'] /
            df['Volume'].rolling(20).mean()
        )

        # 7. Support/Resistance proxies
        for window in [10, 20, 50]:
            # Distance from moving average
            df[f'MA_Distance_{window}d'] = (
                (df['Close'] - df['Close'].rolling(window).mean()) /
                df['Close'].rolling(window).mean()
            ) * 100

            # Rolling support/resistance levels
            df[f'Support_Distance_{window}d'] = (
                (df['Close'] - df['Low'].rolling(window).min()) /
                df['Low'].rolling(window).min()
            ) * 100

            df[f'Resistance_Distance_{window}d'] = (
                (df['High'].rolling(window).max() - df['Close']) /
                df['Close']
            ) * 100

        # Nettoyage des valeurs infinies ou NA
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(method='ffill').fillna(method='bfill')

        logger.info(f"Features de marché ajoutées: {[col for col in df.columns if col not in data.columns]}")
        return df

    except Exception as e:
        logger.error(f"Erreur lors de l'ajout des features de marché: {str(e)}")
        raise e

def add_extended_market_features(data: pd.DataFrame) -> pd.DataFrame:
    """
    Ajoute des features de marché plus avancées.

    Args:
        data: DataFrame avec les données OHLCV et features de base
    Returns:
        DataFrame enrichi avec les features étendues
    """
    try:
        df = data.copy()

        # 1. Consolidation/Expansion patterns
        for window in [5, 10, 20]:
            # Volatility expansion/contraction
            df[f'Volatility_Change_{window}d'] = (
                df['Close'].rolling(window).std() /
                df['Close'].rolling(window).std().shift(window)
            ) - 1

            # Volume expansion/contraction
            df[f'Volume_Change_{window}d'] = (
                df['Volume'].rolling(window).mean() /
                df['Volume'].rolling(window).mean().shift(window)
            ) - 1

            # Price range expansion/contraction
            df[f'Range_Change_{window}d'] = (
                (df['High'].rolling(window).max() - df['Low'].rolling(window).min()) /
                (df['High'].rolling(window).max().shift(window) -
                 df['Low'].rolling(window).min().shift(window))
            ) - 1

        # 2. Trend strength indicators
        df['ADX_Trend_Strength'] = df['ADX'] / 100 if 'ADX' in df.columns else np.nan
        df['Trend_Intensity'] = (
            df['Close'].diff().rolling(20).apply(lambda x: (x > 0).sum() / 20)
        )

        # 3. Volume profile
        df['Volume_Profile_High'] = (
            df['Volume'] * (df['High'] / df['Close'] - 1)
        ).rolling(10).mean()

        df['Volume_Profile_Low'] = (
            df['Volume'] * (1 - df['Low'] / df['Close'])
        ).rolling(10).mean()

        # 4. Price efficiency
        for window in [5, 10, 20]:
            df[f'Price_Efficiency_{window}d'] = (
                (df['Close'] - df['Close'].shift(window)).abs() /
                (df['High'].rolling(window).max() - df['Low'].rolling(window).min())
            )

        # Nettoyage des valeurs infinies ou NA
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(method='ffill').fillna(method='bfill')

        logger.info(f"Features étendues ajoutées: {[col for col in df.columns if col not in data.columns]}")
        return df

    except Exception as e:
        logger.error(f"Erreur lors de l'ajout des features étendues: {str(e)}")
        raise e

# Étape 1: Télécharger les données
#df = download_and_prepare_data(config)

# Étape 2: Ajouter les indicateurs techniques
#df = add_technical_indicators(df, config)

# Étape 3: Ajouter les features de marché de base
df = add_market_features(df)

# Étape 4: Ajouter les features de marché étendues
df_final = add_extended_market_features(df)

# Afficher les informations
print("\nColonnes disponibles :")
print(df_final.columns.tolist())

print("\nAperçu des données :")
print(df_final.head())

# Optionnel : Sauvegarder les données
#df_final.to_csv('analysis_results.csv')

  df = df.fillna(method='ffill').fillna(method='bfill')



Colonnes disponibles :
['Close', 'High', 'Low', 'Open', 'Volume', 'Year', 'Month', 'Day', 'Day_of_Week', 'Normalized_Range_5d', 'Parkinson_Volatility_5d', 'Normalized_Range_10d', 'Parkinson_Volatility_10d', 'Normalized_Range_20d', 'Parkinson_Volatility_20d', 'Volume_Price_Impact', 'Volume_Distribution', 'Price_Acceleration', 'Volume_Acceleration', 'Upper_Shadow', 'Lower_Shadow', 'Body_Size', 'Daily_Gap', 'Price_Volume_Trend', 'MA_Distance_10d', 'Support_Distance_10d', 'Resistance_Distance_10d', 'MA_Distance_20d', 'Support_Distance_20d', 'Resistance_Distance_20d', 'MA_Distance_50d', 'Support_Distance_50d', 'Resistance_Distance_50d', 'Volatility_Change_5d', 'Volume_Change_5d', 'Range_Change_5d', 'Volatility_Change_10d', 'Volume_Change_10d', 'Range_Change_10d', 'Volatility_Change_20d', 'Volume_Change_20d', 'Range_Change_20d', 'ADX_Trend_Strength', 'Trend_Intensity', 'Volume_Profile_High', 'Volume_Profile_Low', 'Price_Efficiency_5d', 'Price_Efficiency_10d', 'Price_Efficiency_20d']

Aperçu

  df = df.fillna(method='ffill').fillna(method='bfill')


In [272]:
# -----------------------------------------------
# Bloc : Définition des Features et Division des Données
# -----------------------------------------------
import pandas as pd
import numpy as np
from typing import Dict, Any, Tuple, List
import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def add_scale_invariant_features(data: pd.DataFrame) -> pd.DataFrame:
    """
    Ajoute des features invariantes à l'échelle des prix.
    Utilise principalement des variations relatives et des ratios.
    """
    df = data.copy()

    # 1. Returns sur différentes périodes
    for period in [1, 3, 7, 14, 30]:
        # Returns arithmétiques
        df[f'Return_{period}d'] = df['Close'].pct_change(period) * 100
        # Returns logarithmiques
        df[f'Log_Return_{period}d'] = np.log(df['Close'] / df['Close'].shift(period))

    # 2. Ratios de prix (déjà invariants à l'échelle)
    df['High_Low_Ratio'] = df['High'] / df['Low']
    df['Close_Open_Ratio'] = df['Close'] / df['Open']

    # 3. Ratios de volume
    df['Volume_Price_Ratio'] = df['Volume'] / df['Close']  # Volume/Price ratio
    df['Relative_Volume'] = df['Volume'] / df['Volume'].rolling(30).mean()  # Volume relatif

    # 4. Momentum relatif
    for period in [7, 14, 30]:
        df[f'Price_ROC_{period}d'] = (df['Close'] - df['Close'].shift(period)) / df['Close'].shift(period) * 100
        df[f'Volume_ROC_{period}d'] = (df['Volume'] - df['Volume'].shift(period)) / df['Volume'].shift(period) * 100

    # 5. Volatilité relative
    for period in [7, 14, 30]:
        # Volatilité basée sur les returns
        df[f'Volatility_{period}d'] = df['Return_1d'].rolling(window=period).std()
        # Volatilité normalisée par le prix
        df[f'Normalized_TR_{period}d'] = (df['High'] - df['Low']) / df['Close'].rolling(period).mean() * 100

    # 6. Indicateurs techniques normalisés
    # Normaliser Bollinger Bands
    if 'Bollinger_Middle' in df.columns:
        df['BB_Width'] = (df['Bollinger_Upper'] - df['Bollinger_Lower']) / df['Bollinger_Middle'] * 100
        df['BB_Position'] = (df['Close'] - df['Bollinger_Lower']) / (df['Bollinger_Upper'] - df['Bollinger_Lower'])

    # 7. Position relative des prix
    for period in [30, 90]:
        df[f'Price_Position_{period}d'] = (df['Close'] - df['Low'].rolling(period).min()) / \
                                        (df['High'].rolling(period).max() - df['Low'].rolling(period).min())

    return df

def prepare_target_scaling(config: Dict[str, Any]) -> Dict[str, Any]:
    """
    Prépare la configuration pour le scaling de la cible.
    """
    config['preprocessing']['target_transform'] = {
        'method': 'returns',  # ['returns', 'log_returns']
        'prediction_mode': 'cumulative',  # ['cumulative', 'direct']
        'periods': list(range(1, 31)),  # Prédiction à 30 jours
        'use_log_returns': True
    }

    # Modification de la configuration du modèle pour prendre en compte les returns
    config['model']['architecture'].update({
        'output_mode': 'returns',
        'return_scaling': 'log' if config['preprocessing']['target_transform']['use_log_returns'] else 'standard'
    })

    return config


def transform_target_to_returns(data: pd.DataFrame, config: Dict[str, Any]) -> pd.DataFrame:
    """
    Transforme la variable cible en returns pour la rendre invariante à l'échelle.
    """
    df = data.copy()
    target_config = config['preprocessing']['target_transform']

    if target_config['method'] == 'returns':
        # Calculer les returns pour chaque horizon de prédiction
        for period in target_config['periods']:
            if target_config['use_log_returns']:
                df[f'Target_Return_{period}d'] = np.log(df['Close'].shift(-period) / df['Close'])
            else:
                df[f'Target_Return_{period}d'] = (df['Close'].shift(-period) / df['Close'] - 1) * 100

    return df

def prepare_features_and_split(data: pd.DataFrame, config: Dict[str, Any]) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[str], str]:
    """
    Prépare les features et divise les données pour l'entraînement.
    """
    def _validate_split_ratios(ratios: Dict[str, float]) -> None:
        total = sum(ratios.values())
        if not np.isclose(total, 1.0):
            raise ValueError(f"Les ratios doivent sommer à 1.0 (actuel: {total})")

    try:
        # 1. Préparation des données de base
        df = data.copy()

        # 2. Création des returns (cible)
        # Calcul des returns pour différentes périodes
        for period in range(1, 31):
            df[f'Target_Return_{period}d'] = df['Close'].pct_change(period) * 100
            if config.get('preprocessing', {}).get('target_transform', {}).get('use_log_returns', True):
                df[f'Target_Log_Return_{period}d'] = np.log(df['Close']/df['Close'].shift(period)) * 100

        # 3. Features invariantes à l'échelle
        df = add_scale_invariant_features(df)

        # 4. Features avancées de marché
        advanced_config = config.get('feature_engineering', {}).get('advanced_features', {})

        if advanced_config.get('use_market_features', True):
            df = add_market_features(df)
            logger.info("Features de marché universelles ajoutées")

        if advanced_config.get('use_extended_features', True):
            df = add_extended_market_features(df)
            logger.info("Features de marché étendues ajoutées")

        # 5. Sélection des features
        feature_config = config.get('feature_engineering', {})
        features = []

        # Features de prix et techniques
        for category in ['market_features', 'extended_features']:
            if advanced_config.get(f'use_{category}', True):
                features.extend([col for col in df.columns if col.startswith(tuple([
                    'Return_', 'Log_Return_', 'Normalized_', 'Volume_', 'Price_',
                    'MA_', 'Support_', 'Resistance_', 'Volatility_'
                ]))])

        # Features temporelles
        temp_features = feature_config.get('temporal_features', {})
        temp_cols = {'year': 'Year', 'month': 'Month', 'day': 'Day', 'day_of_week': 'Day_of_Week'}
        features.extend([col for key, col in temp_cols.items() if temp_features.get(f'use_{key}', False)])

        # 6. Target selection
        target = 'Target_Return_30d'  # Ou Target_Log_Return_30d selon la config
        if config.get('preprocessing', {}).get('target_transform', {}).get('use_log_returns', True):
            target = 'Target_Log_Return_30d'

        # Retrait de la cible des features
        features = [f for f in features if not f.startswith('Target_')]

        # 7. Gestion des données manquantes
        missing_config = config.get('preprocessing', {}).get('handle_missing', {})
        method = missing_config.get('method', 'drop')

        if method == 'drop':
            df = df.dropna(subset=features + [target])
        elif method == 'fill_forward':
            df = df.ffill().bfill()  # Utilisation de ffill() et bfill() au lieu de fillna
        elif method == 'interpolate':
            df = df.interpolate(method=missing_config.get('interpolation_method', 'linear'))

        # 8. Division des données avec chevauchement
        split_config = config.get('data_split', {})
        overlap_days = 30

        n = len(df)
        train_size = int(n * split_config.get('train_ratio', 0.8))
        val_size = int(n * split_config.get('validation_ratio', 0.1))

        train_data = df.iloc[:train_size]
        val_data = df.iloc[train_size-overlap_days:train_size+val_size]
        test_data = df.iloc[train_size+val_size-overlap_days:]

        # Log des statistiques
        logger.info(f"""
        Division des données (avec chevauchement de {overlap_days} jours):
        Train: {len(train_data)} observations ({train_data.index[0]} à {train_data.index[-1]})
        Val  : {len(val_data)} observations ({val_data.index[0]} à {val_data.index[-1]})
        Test : {len(test_data)} observations ({test_data.index[0]} à {test_data.index[-1]})

        Statistiques des returns cibles ({target}):
        Train: mean={train_data[target].mean():.2f}%, std={train_data[target].std():.2f}%
        Val  : mean={val_data[target].mean():.2f}%, std={val_data[target].std():.2f}%
        Test : mean={test_data[target].mean():.2f}%, std={test_data[target].std():.2f}%
        """)

        return train_data, val_data, test_data, features, target

    except Exception as e:
        logger.error(f"Erreur lors de la préparation des données: {e}")
        raise


# Pipeline complet
# 1. Télécharger les données
#df = download_and_prepare_data(config)

# 2. Ajouter les indicateurs techniques
#df = add_technical_indicators(df, config)

# 3. Ajouter les features de marché
#df = add_market_features(df)
#df = add_extended_market_features(df)

# 4. Configurer le scaling de la cible
config = prepare_target_scaling(config)

# 5. Transformer la cible en returns
df = transform_target_to_returns(df, config)

# 6. Préparer les features et diviser les données
train_data, val_data, test_data, features, target = prepare_features_and_split(df, config)

# Afficher les informations
print("\nFeatures sélectionnées :")
print(features)

print(f"\nVariable cible : {target}")

print("\nDimensions des ensembles :")
print(f"Train : {train_data.shape}")
print(f"Validation : {val_data.shape}")
print(f"Test : {test_data.shape}")

  df = df.fillna(method='ffill').fillna(method='bfill')



Features sélectionnées :
['Normalized_Range_5d', 'Normalized_Range_10d', 'Normalized_Range_20d', 'Volume_Price_Impact', 'Volume_Distribution', 'Price_Acceleration', 'Volume_Acceleration', 'Price_Volume_Trend', 'MA_Distance_10d', 'Support_Distance_10d', 'Resistance_Distance_10d', 'MA_Distance_20d', 'Support_Distance_20d', 'Resistance_Distance_20d', 'MA_Distance_50d', 'Support_Distance_50d', 'Resistance_Distance_50d', 'Return_1d', 'Log_Return_1d', 'Return_3d', 'Log_Return_3d', 'Return_7d', 'Log_Return_7d', 'Return_14d', 'Log_Return_14d', 'Return_30d', 'Log_Return_30d', 'Volume_Price_Ratio', 'Price_ROC_7d', 'Volume_ROC_7d', 'Price_ROC_14d', 'Volume_ROC_14d', 'Price_ROC_30d', 'Volume_ROC_30d', 'Volatility_7d', 'Normalized_TR_7d', 'Volatility_14d', 'Normalized_TR_14d', 'Volatility_30d', 'Normalized_TR_30d', 'Price_Position_30d', 'Price_Position_90d', 'Volatility_Change_5d', 'Volume_Change_5d', 'Volatility_Change_10d', 'Volume_Change_10d', 'Volatility_Change_20d', 'Volume_Change_20d', 'Volu

  df = df.fillna(method='ffill').fillna(method='bfill')


In [273]:
# -----------------------------------------------
# Bloc : Normalisation des données
# -----------------------------------------------
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
import pandas as pd
import numpy as np
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def normalize_data(train_data: pd.DataFrame,
                  val_data: pd.DataFrame,
                  test_data: pd.DataFrame,
                  features: list,
                  config: dict,
                  target: str = 'Close') -> tuple:
    """
    Normalise les données financières en évitant la fuite de données.

    Args:
        train_data: Données d'entraînement
        val_data: Données de validation
        test_data: Données de test
        features: Liste des features à utiliser
        config: Configuration de la normalisation
        target: Variable cible

    Returns:
        tuple: (train_norm, val_norm, test_norm, scalers)
    """
    def clean_outliers(df: pd.DataFrame, cols: list, threshold: float, train_stats: dict = None) -> pd.DataFrame:
        """Nettoie les valeurs aberrantes des données numériques."""
        df_clean = df.copy()
        for col in cols:
            if df_clean[col].dtype in ['float64', 'int64']:
                if train_stats is None:
                    mean, std = df_clean[col].mean(), df_clean[col].std()
                else:
                    mean, std = train_stats[col]
                df_clean[col] = df_clean[col].clip(
                    lower=mean - threshold * std,
                    upper=mean + threshold * std
                )
        return df_clean.replace([np.inf, -np.inf], np.nan).ffill().bfill()

    try:
        # 1. Configuration et validation initiale
        norm_config = config['preprocessing']['normalization']
        normalize_target = norm_config.get('normalize_target', False)
        exclude_features = set(norm_config.get('exclude_features', []))
        logger.info(f"Features exclues de la normalisation: {exclude_features}")
        logger.info(f"Normalisation de la cible {'activée' if normalize_target else 'désactivée'}")

        # 2. Identification des features à normaliser
        features_to_normalize = [
            f for f in features
            if (f not in exclude_features and
                train_data[f].dtype in ['float64', 'int64'])
        ]
        logger.info(f"Features à normaliser: {features_to_normalize}")

        # 3. Préparation des données
        datasets = {
            'train': train_data.copy(),
            'val': val_data.copy(),
            'test': test_data.copy()
        }

        # 4. Nettoyage des outliers si activé
        if norm_config.get('clip', {}).get('enabled', False):
            threshold = norm_config['clip'].get('threshold', 3)
            logger.info(f"Nettoyage des outliers activé (seuil: {threshold})")

            # Calcul des statistiques sur le train set
            train_stats = {
                col: (datasets['train'][col].mean(), datasets['train'][col].std())
                for col in features_to_normalize
            }

            # Application du nettoyage
            datasets['train'] = clean_outliers(datasets['train'], features_to_normalize, threshold)
            datasets['val'] = clean_outliers(datasets['val'], features_to_normalize, threshold, train_stats)
            datasets['test'] = clean_outliers(datasets['test'], features_to_normalize, threshold, train_stats)
        else:
            logger.info("Nettoyage des outliers désactivé")

        # 5. Sélection et validation du scaler
        scaler_map = {
            'standard': StandardScaler,
            'robust': RobustScaler,
            'minmax': MinMaxScaler
        }
        scaler_type = norm_config.get('type', 'standard').lower()
        ScalerClass = scaler_map.get(scaler_type)
        if not ScalerClass:
            raise ValueError(f"Type de normalisation invalide: {scaler_type}")
        logger.info(f"Utilisation du scaler: {scaler_type}")

        # 6. Normalisation
        scalers = {}
        # Correction de l'expression ternaire
        features_and_target = features_to_normalize + [target] if normalize_target else features_to_normalize
        logger.info(f"Features and target pour la normalisation: {features_and_target}")

        # Exclure manuellement la cible si nécessaire
        if not normalize_target:
            exclude_features.add(target)
            logger.info(f"Exclusion manuelle de la cible {target} de la normalisation.")

        normalized_datasets = {k: v.copy() for k, v in datasets.items()}

        for feature in features_and_target:
            if feature not in exclude_features:
                # Création et fit du scaler sur le train set
                scaler = ScalerClass()
                train_values = datasets['train'][feature].values.reshape(-1, 1)
                scaler.fit(train_values)
                scalers[feature] = scaler

                # Application de la transformation sur chaque dataset
                for dataset_type in ['train', 'val', 'test']:
                    values = datasets[dataset_type][feature].values.reshape(-1, 1)
                    normalized_datasets[dataset_type][feature] = scaler.transform(values).flatten()

                # Vérification des statistiques après normalisation
                for dataset_type in ['train', 'val', 'test']:
                    data = normalized_datasets[dataset_type][feature]
                    logger.info(f"{dataset_type} {feature} - Mean: {data.mean():.3f}, Std: {data.std():.3f}")

        # 7. Vérification finale de la qualité des données
        for dataset_type, dataset in normalized_datasets.items():
            # Vérification des valeurs manquantes et infinies
            has_nan = dataset[features_and_target].isna().any().any()
            has_inf = np.isinf(dataset[features_and_target].values).any()

            if has_nan or has_inf:
                raise ValueError(f"Valeurs invalides détectées dans {dataset_type}")

            # Vérification des limites des valeurs normalisées
            if norm_config['type'] == 'minmax':
                if (dataset[features_and_target].values < 0).any() or (dataset[features_and_target].values > 1).any():
                    raise ValueError(f"Valeurs hors limites [0,1] détectées dans {dataset_type}")

        logger.info("Normalisation terminée avec succès")
        return (normalized_datasets['train'],
                normalized_datasets['val'],
                normalized_datasets['test'],
                scalers)

    except Exception as e:
        logger.error(f"Erreur lors de la normalisation: {e}")
        raise

# Pipeline complet
# 1. Télécharger et préparer les données
#df = download_and_prepare_data(config)
#df = add_technical_indicators(df, config)
#df = add_market_features(df)
#df = add_extended_market_features(df)

# 2. Préparer les features et diviser les données
#train_data, val_data, test_data, features, target = prepare_features_and_split(df, config)

# 3. Normaliser les données
train_normalized, val_normalized, test_normalized, scalers = normalize_data(
    train_data=train_data,
    val_data=val_data,
    test_data=test_data,
    features=features,
    config=config,
    target=target
)

# Afficher les résultats
print("\nAperçu des données normalisées :")
print("\nTrain :")
print(train_normalized[features].describe())

print("\nValidation :")
print(val_normalized[features].describe())

print("\nTest :")
print(test_normalized[features].describe())


Aperçu des données normalisées :

Train :
       Normalized_Range_5d  Normalized_Range_10d  Normalized_Range_20d  \
count         3.020000e+03          3.020000e+03          3.020000e+03   
mean         -8.470046e-17         -9.411162e-18          5.646697e-17   
std           1.000166e+00          1.000166e+00          1.000166e+00   
min          -1.359455e+00         -1.424767e+00         -1.540005e+00   
25%          -6.756420e-01         -7.155990e-01         -7.248465e-01   
50%          -2.682604e-01         -2.186006e-01         -2.202685e-01   
75%           4.652737e-01          4.282251e-01          4.833896e-01   
max           7.627660e+00          5.350257e+00          5.195962e+00   

       Volume_Price_Impact  Volume_Distribution  Price_Acceleration  \
count         3.020000e+03         3.020000e+03        3.020000e+03   
mean         -3.764465e-17        -2.070456e-16        1.260949e-17   
std           1.000166e+00         1.000166e+00        1.000166e+00   
min   

In [275]:
# -----------------------------------------------
# Bloc : Utilisation du Pipeline Complet (Mis à Jour)
# -----------------------------------------------

import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def run_pipeline(config: Dict[str, Any]) -> tuple:
    """
    Exécute le pipeline complet de préparation des données.
    """
    print("\n" + "="*50)
    print("DÉBUT DU PIPELINE DE PRÉPARATION DES DONNÉES")
    print("="*50)

    # 1. Validation de la configuration
    print("\n1. VALIDATION DE LA CONFIGURATION")
    print("-"*30)
    required_keys = ['preprocessing', 'data', 'technical_indicators', 'feature_engineering']
    missing_keys = [key for key in required_keys if key not in config]
    if missing_keys:
        raise ValueError(f"Configuration invalide. Clés manquantes: {missing_keys}")
    print(f"Configuration validée - Symbole: {config['data']['symbol']}")

    # 2. Téléchargement et préparation
    print("\n2. TÉLÉCHARGEMENT DES DONNÉES")
    print("-"*30)
    raw_data = download_and_prepare_data(config)
    print(f"Données brutes téléchargées:")
    print(f"- Shape: {raw_data.shape}")
    print(f"- Période: {raw_data.index[0]} à {raw_data.index[-1]}")
    print(f"- Colonnes: {', '.join(raw_data.columns)}")

    # 3. Ajout des indicateurs techniques
    print("\n3. CALCUL DES INDICATEURS TECHNIQUES")
    print("-"*30)
    processed_data = add_technical_indicators(raw_data, config)
    new_indicators = set(processed_data.columns) - set(raw_data.columns)
    print(f"Indicateurs ajoutés ({len(new_indicators)}):")
    print(f"- {', '.join(sorted(new_indicators))}")
    print(f"Lignes avant/après calcul des indicateurs: {len(raw_data)} → {len(processed_data)}")
    print(f"Lignes retirées: {len(raw_data) - len(processed_data)} (période d'initialisation des indicateurs)")

    # 4. Préparation des features et division
    print("\n4. PRÉPARATION ET DIVISION DES DONNÉES")
    print("-"*30)
    train_data, val_data, test_data, features, target = prepare_features_and_split(
        processed_data,
        config
    )
    print(f"Target sélectionnée: {target}")
    print(f"Nombre de features: {len(features)}")
    print(f"Distribution des données:")
    print(f"- Train: {len(train_data)} ({len(train_data)/len(processed_data)*100:.1f}%)")
    print(f"- Validation: {len(val_data)} ({len(val_data)/len(processed_data)*100:.1f}%)")
    print(f"- Test: {len(test_data)} ({len(test_data)/len(processed_data)*100:.1f}%)")

    # 5. Normalisation
    print("\n5. NORMALISATION DES DONNÉES")
    print("-"*30)
    logger.info("Normalisation des données...")
    train_norm, val_norm, test_norm, scalers = normalize_data(
        train_data,
        val_data,
        test_data,
        features,
        config,
        target='Close'
    )
    print("Statistiques après normalisation (ensemble d'entraînement):")
    print(train_norm[features[:5]].describe().round(3))
    print("\nStatistiques de la variable cible (Close) après normalisation:")
    print("-" * 30)
    print(f"Train 'Close'    - Mean: {train_norm['Close'].mean():.3f}, Std: {train_norm['Close'].std():.3f}")
    print(f"Validation 'Close' - Mean: {val_norm['Close'].mean():.3f}, Std: {val_norm['Close'].std():.3f}")
    print(f"Test 'Close'      - Mean: {test_norm['Close'].mean():.3f}, Std: {test_norm['Close'].std():.3f}")

    # Vérification de la configuration de la normalisation
    normalize_target = config.get('preprocessing', {}).get('normalization', {}).get('normalize_target', False)
    logger.info(f"Normalisation de la cible {'activée' if normalize_target else 'désactivée'}")

    # Vérifier que 'Close' n'est pas normalisé
    if normalize_target:
        logger.warning("La normalisation de la cible est activée, ce qui peut causer des incohérences.")
    else:
        logger.info("'Close' est correctement exclu de la normalisation.")

    print("\n" + "="*50)
    print("PIPELINE TERMINÉ AVEC SUCCÈS")
    print("="*50)
    print(f"""
Résumé final:
- Nombre total d'observations: {len(processed_data)}
- Nombre de features: {len(features)}
- Features principales: {', '.join(features[:5])}...
- Période couverte: {processed_data.index[0]} à {processed_data.index[-1]}
- Train: {len(train_norm)} observations
- Validation: {len(val_norm)} observations
- Test: {len(test_norm)} observations
""")

    # Final Verification: Assurer que 'Close' n'est pas normalisé
    if normalize_target:
        # Si normalize_target est True, 'Close' est normalisé
        if 'Close' in scalers:
            logger.error("'Close' a été normalisé malgré la désactivation.")
        else:
            logger.info("'Close' n'a pas été normalisé comme prévu.")
    else:
        # Si normalize_target est False, 'Close' ne doit pas être normalisé
        if 'Close' in scalers:
            logger.error("'Close' a été normalisé alors que cela ne devrait pas être le cas.")
        else:
            logger.info("'Close' n'a pas été normalisé comme prévu.")

    return train_norm, val_norm, test_norm, features, target, scalers

if __name__ == "__main__":
    try:
        # Initialisation de la configuration WandB
        #base_config = init_wandb_config(stock_symbol='BTC-USD', mode='disabled')
        # S'assurer que 'normalize_target' est bien désactivé
        base_config['preprocessing']['normalization']['normalize_target'] = False

        # Exécution du pipeline
        train_data, val_data, test_data, features, target, scalers = run_pipeline(base_config)

    except Exception as e:
        print("\n" + "!"*50)
        print("ERREUR DANS LE PIPELINE")
        print("!"*50)
        logger.error(f"Nature de l'erreur: {str(e)}")
        raise



DÉBUT DU PIPELINE DE PRÉPARATION DES DONNÉES

1. VALIDATION DE LA CONFIGURATION
------------------------------
Configuration validée - Symbole: BTC-USD

2. TÉLÉCHARGEMENT DES DONNÉES
------------------------------


[*********************100%***********************]  1 of 1 completed


Données brutes téléchargées:
- Shape: (3776, 9)
- Période: 2014-09-17 00:00:00 à 2025-01-17 00:00:00
- Colonnes: Close, High, Low, Open, Volume, Year, Month, Day, Day_of_Week

3. CALCUL DES INDICATEURS TECHNIQUES
------------------------------
Indicateurs ajoutés (26):
- ADX, ATR, Bollinger_Lower, Bollinger_Middle, Bollinger_Upper, CCI, CMF, EMA_10, Ichimoku_A, Ichimoku_B, Ichimoku_Base_Line, Ichimoku_Conversion_Line, MACD, MACD_Diff, MACD_Signal, ROC, RSI, SMA_10, SMA_30, Stochastic_%D, Stochastic_%K, TRIX, VWAP, Volume_Change, Volume_MA_10, Williams_%R
Lignes avant/après calcul des indicateurs: 3776 → 3733
Lignes retirées: 43 (période d'initialisation des indicateurs)

4. PRÉPARATION ET DIVISION DES DONNÉES
------------------------------


  df = df.fillna(method='ffill').fillna(method='bfill')
  df = df.fillna(method='ffill').fillna(method='bfill')


Target sélectionnée: Target_Log_Return_30d
Nombre de features: 114
Distribution des données:
- Train: 2986 (80.0%)
- Validation: 403 (10.8%)
- Test: 404 (10.8%)

5. NORMALISATION DES DONNÉES
------------------------------
Statistiques après normalisation (ensemble d'entraînement):
       Volume_MA_10  Volume_Change  Return_1d  Log_Return_1d  Return_3d
count      2986.000       2986.000   2986.000       2986.000   2986.000
mean          0.000         -0.000     -0.000          0.000     -0.000
std           1.000          1.000      1.000          1.000      1.000
min          -0.890         -2.526     -9.748        -12.051     -5.738
25%          -0.884         -0.531     -0.405         -0.384     -0.477
50%          -0.454         -0.170     -0.006          0.013     -0.017
75%           0.681          0.327      0.408          0.420      0.487
max           4.400         14.805      6.532          5.788      8.023

Statistiques de la variable cible (Close) après normalisation:
------

In [255]:
# -----------------------------------------------
# Bloc : Préparation des données pour le TFT
# -----------------------------------------------

import torch
from typing import Tuple, Dict
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import NaNLabelEncoder
import logging

logger = logging.getLogger(__name__)

def prepare_tft_datasets(
    train_data: pd.DataFrame,
    val_data: pd.DataFrame,
    config: Dict
) -> Tuple[TimeSeriesDataSet, torch.utils.data.DataLoader, TimeSeriesDataSet, torch.utils.data.DataLoader]:
    """
    Prépare les datasets et dataloaders pour le TFT (Temporal Fusion Transformer).

    Args:
        train_data: DataFrame d'entraînement
        val_data: DataFrame de validation
        config: Configuration du modèle

    Returns:
        training: Dataset d'entraînement
        train_dataloader: DataLoader d'entraînement
        validation: Dataset de validation
        val_dataloader: DataLoader de validation
    """
    try:
        # 1. Préparation des indices temporels et identifiants
        for df in [train_data, val_data]:
            df['time_idx'] = range(len(df))
            df['series_id'] = config['data'].get('symbol', 'BTC-USD')
            if config['feature_engineering']['temporal_features'].get('use_day_of_week', True):
                df['Day_of_Week'] = df['Day_of_Week'].astype(str)

        # 2. Configuration des features
        excluded_cols = ['Year', 'Month', 'Day', 'series_id', 'time_idx']
        categorical_cols = ['Day_of_Week'] if config['feature_engineering']['temporal_features'].get('use_day_of_week', True) else []

        # Sélection des features numériques (tout sauf les colonnes exclues et catégorielles)
        numeric_features = [col for col in train_data.columns
                          if col not in excluded_cols + categorical_cols + ['Target_Return_30d']]

        # 3. Paramètres du TimeSeriesDataSet
        ts_params = config['pytorch_forecasting']['time_series_params']
        training = TimeSeriesDataSet(
            train_data,
            time_idx="time_idx",
            target="Target_Return_30d",  # Notre nouvelle cible
            group_ids=["series_id"],
            max_encoder_length=ts_params['max_encoder_length'],
            min_encoder_length=ts_params['min_encoder_length'],
            max_prediction_length=ts_params['max_prediction_length'],
            min_prediction_length=ts_params['min_prediction_length'],
            static_categoricals=[],
            time_varying_known_categoricals=categorical_cols,
            time_varying_known_reals=[],
            time_varying_unknown_reals=numeric_features,
            target_normalizer=None,  # Déjà normalisé
            categorical_encoders={col: NaNLabelEncoder() for col in categorical_cols}
        )

        # 4. Création des dataloaders
        batch_size = config['pytorch_forecasting']['training_params']['batch_size']['value']
        train_dataloader = training.to_dataloader(
            train=True,
            batch_size=batch_size,
            num_workers=0,
            shuffle=True
        )

        validation = TimeSeriesDataSet.from_dataset(training, val_data, predict=True)
        val_dataloader = validation.to_dataloader(
            train=False,
            batch_size=batch_size * 2,
            num_workers=0,
            shuffle=False
        )

        # 5. Logging des informations importantes
        logger.info(f"""
        Configuration TFT:
        - Features numériques: {len(numeric_features)}
        - Features catégorielles: {categorical_cols}
        - Longueur d'encodage: {ts_params['max_encoder_length']}
        - Horizon de prédiction: {ts_params['max_prediction_length']}
        - Batch size: {batch_size}
        """)

        return training, train_dataloader, validation, val_dataloader

    except Exception as e:
        logger.error(f"Erreur dans la préparation des données TFT: {str(e)}")
        raise

# Configuration du logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

try:
    # 1. Initialisation de la configuration WandB
    config = init_wandb_config(stock_symbol='BTC-USD', mode='disabled')

    # 2. Préparation des datasets
    training, train_dataloader, validation, val_dataloader = prepare_tft_datasets(
        train_data=train_data,  # De l'étape précédente
        val_data=val_data,      # De l'étape précédente
        config=config
    )

    # 3. Vérification rapide des dataloaders
    batch = next(iter(train_dataloader))
    x, y = batch
    print("\nDimensions d'un batch d'entraînement:")
    print(f"Input dimensions: {x['encoder_cont'].shape}")
    print(f"Target dimensions: {y[0].shape}")

except Exception as e:
    logger.error(f"Erreur lors de la préparation des données: {e}")

Initialisation de la configuration WandB...
Configuration WandB initialisée avec succès.





Dimensions d'un batch d'entraînement:
Input dimensions: torch.Size([32, 180, 161])
Target dimensions: torch.Size([32, 30])


In [259]:
import torch
import logging
from typing import Tuple, Dict, Optional

logger = logging.getLogger(__name__)

def verify_dataloaders(train_dataloader, val_dataloader) -> None:
    """
    Vérifie la structure et les statistiques des dataloaders TFT.

    Args:
        train_dataloader: DataLoader d'entraînement
        val_dataloader: DataLoader de validation
    """
    def get_tensor_stats(tensor: torch.Tensor) -> Optional[Tuple[float, float]]:
        """Calcule moyenne et écart-type d'un tensor."""
        try:
            if not isinstance(tensor, torch.Tensor) or tensor.numel() == 0:
                return None
            return tensor.float().mean().item(), tensor.float().std().item()
        except Exception:
            return None

    def analyze_batch(x: Dict[str, torch.Tensor], y: torch.Tensor, name: str) -> None:
        """Analyse détaillée d'un batch."""
        print(f"\n{name} Dataloader:")

        # Features continues (encoder et decoder)
        for key in ['encoder_cont', 'decoder_cont']:
            if key in x and x[key] is not None:
                stats = get_tensor_stats(x[key])
                if stats:
                    mean, std = stats
                    print(f"  {key:12s}: Shape={x[key].shape}, Mean={mean:.3f}, Std={std:.3f}")

        # Features catégorielles (encoder et decoder)
        for key in ['encoder_cat', 'decoder_cat']:
            if key in x and x[key] is not None:
                stats = get_tensor_stats(x[key])
                if stats:
                    mean, std = stats
                    print(f"  {key:12s}: Shape={x[key].shape}, Mean={mean:.3f}, Std={std:.3f}")

        # Target
        y_tensor = y[0] if isinstance(y, (tuple, list)) else y
        stats = get_tensor_stats(y_tensor)
        if stats:
            mean, std = stats
            print(f"  target      : Shape={y_tensor.shape}, Mean={mean:.3f}, Std={std:.3f}")

        # Informations récapitulatives
        if 'encoder_cont' in x:
            print("\n  Récapitulatif:")
            print(f"  - Batch Size      : {x['encoder_cont'].shape[0]}")
            print(f"  - Sequence Length : {x['encoder_cont'].shape[1]}")
            print(f"  - Nb Features     : {x['encoder_cont'].shape[2]}")

    # Vérification des dataloaders
    for name, loader in [("Train", train_dataloader), ("Validation", val_dataloader)]:
        try:
            batch = next(iter(loader))
            x, y = batch
            analyze_batch(x, y, name)
        except StopIteration:
            logger.warning(f"Dataloader vide pour {name}")
        except Exception as e:
            logger.error(f"Erreur lors de l'analyse du {name} dataloader: {e}")

if __name__ == "__main__":
    # Configuration du logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )

    try:
        verify_dataloaders(train_dataloader, val_dataloader)
    except Exception as e:
        logger.error(f"Erreur lors de la vérification des dataloaders: {e}")

# Configuration du logging si pas déjà fait
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Vérification des dataloaders
verify_dataloaders(train_dataloader, val_dataloader)



Train Dataloader:
  encoder_cont: Shape=torch.Size([32, 180, 161]), Mean=-0.027, Std=0.941
  decoder_cont: Shape=torch.Size([32, 30, 161]), Mean=-0.048, Std=1.048
  encoder_cat : Shape=torch.Size([32, 180, 1]), Mean=2.938, Std=2.022
  decoder_cat : Shape=torch.Size([32, 30, 1]), Mean=2.997, Std=2.021
  target      : Shape=torch.Size([32, 30]), Mean=1.961, Std=23.380

  Récapitulatif:
  - Batch Size      : 32
  - Sequence Length : 180
  - Nb Features     : 161

Validation Dataloader:
  encoder_cont: Shape=torch.Size([1, 180, 161]), Mean=0.058, Std=0.762
  decoder_cont: Shape=torch.Size([1, 30, 161]), Mean=0.196, Std=0.752
  encoder_cat : Shape=torch.Size([1, 180, 1]), Mean=3.028, Std=1.998
  decoder_cat : Shape=torch.Size([1, 30, 1]), Mean=2.833, Std=2.069
  target      : Shape=torch.Size([1, 30]), Mean=11.710, Std=6.113

  Récapitulatif:
  - Batch Size      : 1
  - Sequence Length : 180
  - Nb Features     : 161

Train Dataloader:
  encoder_cont: Shape=torch.Size([32, 180, 161]), Mean

In [264]:
# -----------------------------------------------
# Bloc : Configuration et Entraînement du Modèle TFT
# -----------------------------------------------

import pytorch_lightning as pl
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
import torch
from datetime import datetime
import logging
from typing import Tuple, Dict, Optional

logger = logging.getLogger(__name__)

class TFTModel(pl.LightningModule):
    """Modèle TFT avec Lightning."""

    def __init__(self, tft_model: TemporalFusionTransformer):
        """
        Args:
            tft_model: Instance de TemporalFusionTransformer
        """
        super().__init__()
        self.model = tft_model
        self.save_hyperparameters(ignore=['tft_model'])
        self.loss = QuantileLoss()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass avec gestion des types de tenseurs."""
        # Conversion des tenseurs
        x = {
            k: v.long() if k.endswith('_cat') else v.to(self.device)
            for k, v in x.items()
            if torch.is_tensor(v)
        }
        return self.model(x)

    def training_step(self, batch: Tuple[Dict, torch.Tensor], batch_idx: int) -> torch.Tensor:
        """Étape d'entraînement."""
        x, y = batch
        y = y[0] if isinstance(y, tuple) else y
        loss = self.loss(self(x).prediction, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch: Tuple[Dict, torch.Tensor], batch_idx: int) -> torch.Tensor:
        """Étape de validation."""
        x, y = batch
        y = y[0] if isinstance(y, tuple) else y
        loss = self.loss(self(x).prediction, y)
        self.log("val_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def configure_optimizers(self) -> Dict:
        """Configuration de l'optimiseur et du scheduler."""
        optimizer = torch.optim.Adam(self.parameters(), lr=self.model.hparams.learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="min",
            factor=0.1,
            patience=self.model.hparams.reduce_on_plateau_patience,
            verbose=True,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {"scheduler": scheduler, "monitor": "val_loss", "frequency": 1},
        }

def setup_trainer(config: Dict) -> Tuple[pl.Trainer, pl.loggers.WandbLogger]:
    """
    Configure le trainer et le logger.

    Args:
        config: Configuration du modèle

    Returns:
        trainer: Instance de Trainer configurée
        logger: Instance de WandbLogger
    """
    model_config = config['model']

    # Callbacks
    callbacks = [
        EarlyStopping(
            monitor="val_loss",
            patience=model_config['early_stopping']['patience'],
            mode="min"
        ),
        LearningRateMonitor(logging_interval="epoch"),
        ModelCheckpoint(
            monitor="val_loss",
            dirpath="checkpoints",
            filename="tft-{epoch:02d}-{val_loss:.2f}",
            save_top_k=3,
            mode="min"
        )
    ]

    # Logger
    wandb_logger = pl.loggers.WandbLogger(
        project="stock_predictions_TFT",
        name=f"tft_{config['data']['symbol']}_{datetime.now().strftime('%Y%m%d_%H%M')}",
        log_model=True
    )

    # Trainer
    trainer = pl.Trainer(
        max_epochs=model_config['training']['max_epochs']['value'],
        accelerator='gpu' if torch.cuda.is_available() else 'cpu',
        devices=1,
        gradient_clip_val=model_config['training']['gradient_clip_val'],
        callbacks=callbacks,
        logger=wandb_logger,
        precision=32
    )

    return trainer, wandb_logger

def train_model(
    training_dataset: TimeSeriesDataSet,
    train_dataloader: torch.utils.data.DataLoader,
    val_dataloader: torch.utils.data.DataLoader,
    config: Dict
) -> Tuple[TFTModel, pl.Trainer]:
    """
    Entraîne le modèle TFT.

    Args:
        training_dataset: Dataset d'entraînement
        train_dataloader: DataLoader d'entraînement
        val_dataloader: DataLoader de validation
        config: Configuration du modèle

    Returns:
        model: Modèle entraîné
        trainer: Trainer utilisé
    """
    try:
        # Configuration du trainer et du logger
        trainer, logger = setup_trainer(config)

        # Création du modèle TFT
        architecture = config['model']['architecture']
        tft = TemporalFusionTransformer.from_dataset(
            training_dataset,
            learning_rate=config['model']['training']['learning_rate']['value'],
            hidden_size=architecture['hidden_size']['value'],
            attention_head_size=architecture['attention_head_size']['value'],
            dropout=architecture['dropout']['value'],
            hidden_continuous_size=architecture['hidden_continuous_size']['value'],
            loss=QuantileLoss(),
            log_interval=architecture['log_interval']
        )

        # Création du modèle Lightning
        model = TFTModel(tft)

        # Log des hyperparamètres
        logger.log_hyperparams(architecture)

        # Entraînement
        trainer.fit(model, train_dataloader, val_dataloader)

        return model, trainer

    except Exception as e:
        logger.error(f"Erreur pendant l'entraînement: {str(e)}")
        raise

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    try:
        # Initialisation de la configuration
        config = init_wandb_config(stock_symbol='BTC-USD', mode='disabled')

        # Entraînement
        model, trainer = train_model(training_dataset, train_dataloader, val_dataloader, config)
        logger.info("Entraînement terminé avec succès")

    except Exception as e:
        logger.error(f"Erreur: {str(e)}")


model, trainer, test_data = run_complete_pipeline(symbol='BTC-USD')

ERROR:__main__:Erreur: name 'training_dataset' is not defined
[*********************100%***********************]  1 of 1 completed

Initialisation de la configuration WandB...
Configuration WandB initialisée avec succès.
Initialisation de la configuration WandB...
Configuration WandB initialisée avec succès.



  df = df.fillna(method='ffill').fillna(method='bfill')
  df = df.fillna(method='ffill').fillna(method='bfill')



Train Dataloader:
  encoder_cont: Shape=torch.Size([32, 180, 161]), Mean=0.009, Std=0.963
  decoder_cont: Shape=torch.Size([32, 30, 161]), Mean=0.027, Std=0.970
  encoder_cat : Shape=torch.Size([32, 180, 1]), Mean=2.977, Std=2.011
  decoder_cat : Shape=torch.Size([32, 30, 1]), Mean=2.983, Std=1.997
  target      : Shape=torch.Size([32, 30]), Mean=8.981, Std=23.582

  Récapitulatif:
  - Batch Size      : 32
  - Sequence Length : 180
  - Nb Features     : 161

Validation Dataloader:
  encoder_cont: Shape=torch.Size([1, 180, 161]), Mean=0.058, Std=0.762
  decoder_cont: Shape=torch.Size([1, 30, 161]), Mean=0.196, Std=0.752
  encoder_cat : Shape=torch.Size([1, 180, 1]), Mean=3.028, Std=1.998
  decoder_cat : Shape=torch.Size([1, 30, 1]), Mean=2.833, Std=2.069
  target      : Shape=torch.Size([1, 30]), Mean=11.710, Std=6.113

  Récapitulatif:
  - Batch Size      : 1
  - Sequence Length : 180
  - Nb Features     : 161


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instance

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:
Detected KeyboardInterrupt, attempting graceful shutdown ...
ERROR:__main__:Erreur dans le pipeline: 'WandbLogger' object has no attribute 'error'


AttributeError: 'WandbLogger' object has no attribute 'error'

In [None]:
# -----------------------------------------------
# Bloc : Evaluation du modèle et Prédictions
# -----------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import traceback
import wandb
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pytorch_forecasting import TimeSeriesDataSet

def evaluate_and_predict(model, training_dataset, test_data, config):
    """
    Évalue le modèle et génère des prédictions avec logging WandB.
    """
    # 0. Configuration initiale
    eval_config = config['evaluation']
    device = next(model.parameters()).device
    print(f"Évaluation sur device: {device}")

    # Accéder au normalizer
    target_normalizer = training_dataset.target_normalizer

    def validate_predictions(predictions, name="predictions"):
        """Valide la qualité des prédictions."""
        issues = []
        if np.isnan(predictions).any():
            issues.append("NaN détectés")
        if np.isinf(predictions).any():
            issues.append("Inf détectés")
        if len(issues) > 0:
            print(f"Warning - {name}: {', '.join(issues)}")
        return len(issues) == 0

    def calculate_rolling_metrics(actual, predicted, window=30):
        """Calcule les métriques sur une fenêtre glissante."""
        rolling_metrics = {
            'mae': [],
            'mape': [],
            'r2': []
        }
        for i in range(len(actual) - window + 1):
            slice_actual = actual[i:i+window]
            slice_pred = predicted[i:i+window]
            rolling_metrics['mae'].append(
                mean_absolute_error(slice_actual, slice_pred))
            rolling_metrics['mape'].append(
                np.mean(np.abs((slice_actual - slice_pred) / slice_actual)) * 100)
            rolling_metrics['r2'].append(
                r2_score(slice_actual, slice_pred))
        return rolling_metrics

    # 1. Préparation des données
    test_data = test_data.reset_index()
    test_data['time_idx'] = range(len(test_data))
    test_data['series_id'] = 'ACCOR'
    test_data['Day_of_Week'] = test_data['Day_of_Week'].astype(str)

    # 2. Création du dataset et dataloader
    predict_dataset = TimeSeriesDataSet.from_dataset(
        training_dataset,
        test_data,
        stop_randomization=True,
        predict=True
    )

    predict_dataloader = predict_dataset.to_dataloader(
        train=False,
        batch_size=eval_config['prediction']['batch_size'],
        num_workers=eval_config['prediction']['num_workers'],
        shuffle=False
    )

    # 3. Génération des prédictions
    model.eval()
    all_predictions = []
    all_dates = []
    quantiles_predictions = []  # Pour les intervalles de confiance

    with torch.no_grad():
        for batch_idx, (batch, _) in enumerate(predict_dataloader):
            # Déplacer les données sur le bon device
            batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v
                    for k, v in batch.items()}

            outputs = model.model(batch)  # Accès correct à TFT model

            # Vérifier la forme de outputs.prediction
            # Supposons que outputs.prediction a la forme [batch, prediction_length, num_quantiles]
            pred_mean = outputs.prediction.mean(dim=-1)
            all_predictions.append(pred_mean)

            # Extraire les quantiles spécifiques, par exemple 0.1 et 0.9
            pred_lower = outputs.prediction[..., 0]  # Premier quantile
            pred_upper = outputs.prediction[..., -1]  # Dernier quantile
            quantiles_predictions.extend(zip(pred_lower.flatten(), pred_upper.flatten()))

            # Extraire les dates
            dates = batch.get('decoder_time_idx', None)
            if dates is not None:
                dates = dates.cpu().numpy()
                all_dates.extend(dates.flatten())
            else:
                # Si 'decoder_time_idx' n'est pas disponible, utilisez 'time_idx'
                dates = batch.get('time_idx', None)
                if dates is not None:
                    dates = dates.cpu().numpy()
                    all_dates.extend(dates.flatten())

            # Log de la progression
            wandb.log({
                "prediction_progress": batch_idx / len(predict_dataloader)
            })

    # Concatenation des prédictions
    all_predictions = torch.cat(all_predictions).to(device)
    # Inverse transformation des prédictions
    all_predictions = target_normalizer.inverse_transform({"prediction": all_predictions})["prediction"].cpu().numpy()

    # 4. Création du DataFrame des prédictions
    predictions = pd.DataFrame(
        all_predictions,
        index=all_dates,
        columns=['Predicted_Close']
    )

    # Ajout des intervalles de confiance si disponibles
    if len(quantiles_predictions) > 0:
        quantiles = torch.tensor(quantiles_predictions, dtype=torch.float32).to(device)
        quantiles = target_normalizer.inverse_transform({"prediction": quantiles})["prediction"].cpu().numpy()
        predictions['Lower_bound'] = quantiles[:, 0]
        predictions['Upper_bound'] = quantiles[:, 1]

    predictions = predictions.loc[~predictions.index.duplicated(keep='first')]

    # Inverse transformation des valeurs réelles
    actual_prices_tensor = torch.from_numpy(test_data.loc[predictions.index, 'Close'].values.reshape(-1, 1)).to(device)
    actual_prices = target_normalizer.inverse_transform({"prediction": actual_prices_tensor})["prediction"].cpu().numpy().flatten()

    # Validation des prédictions
    if not validate_predictions(predictions['Predicted_Close'].values):
        print("Attention : Les prédictions contiennent des valeurs invalides.")

    # 5. Calcul des métriques
    metrics = {
        'mae': mean_absolute_error(actual_prices, predictions['Predicted_Close']),
        'mse': mean_squared_error(actual_prices, predictions['Predicted_Close']),
        'rmse': np.sqrt(mean_squared_error(actual_prices, predictions['Predicted_Close'])),
        'r2': r2_score(actual_prices, predictions['Predicted_Close']),
        'mape': np.mean(np.abs((actual_prices - predictions['Predicted_Close']) / actual_prices)) * 100,
    }

    # Calcul du MASE
    naive_forecast = actual_prices[:-1]
    naive_errors = np.abs(np.diff(actual_prices))
    metrics['mase'] = metrics['mae'] / np.mean(naive_errors)

    # Calcul de la précision directionnelle
    direction_actual = np.sign(np.diff(actual_prices))
    direction_pred = np.sign(np.diff(predictions['Predicted_Close'].values))
    directional_accuracy = np.mean(direction_actual == direction_pred) * 100
    metrics['directional_accuracy'] = directional_accuracy

    # 6. Calcul des métriques glissantes
    rolling_metrics = calculate_rolling_metrics(
        actual_prices,
        predictions['Predicted_Close'].values
    )

    # 7. Visualisations
    fig = plt.figure(figsize=(15, 12))
    gs = plt.GridSpec(2, 1, height_ratios=[3, 1])

    # Plot principal
    ax1 = fig.add_subplot(gs[0])
    ax1.plot(test_data.loc[predictions.index, 'Close'].index, actual_prices, label='Prix réels', color='blue')
    ax1.plot(predictions.index, predictions['Predicted_Close'], label='Prédictions', color='orange')

    if 'Lower_bound' in predictions.columns:
        ax1.fill_between(predictions.index,
                         predictions['Lower_bound'],
                         predictions['Upper_bound'],
                         color='orange', alpha=0.2,
                         label='Intervalle de confiance')

    # Affichage des métriques
    metrics_text = (
        f"MAE: {metrics['mae']:.2f}\n"
        f"RMSE: {metrics['rmse']:.2f}\n"
        f"R²: {metrics['r2']:.4f}\n"
        f"MAPE: {metrics['mape']:.2f}%\n"
        f"MASE: {metrics['mase']:.4f}\n"
        f"Dir. Acc: {metrics['directional_accuracy']:.1f}%"
    )

    ax1.text(0.02, 0.98, metrics_text,
            transform=ax1.transAxes,
            bbox=dict(facecolor='white', alpha=0.8),
            verticalalignment='top',
            fontsize=10)

    ax1.set_title('Prédictions vs Prix réels')
    ax1.set_ylabel('Prix de clôture (EUR)')
    ax1.legend()
    ax1.grid(True)

    # Plot des erreurs
    ax2 = fig.add_subplot(gs[1])
    errors = actual_prices - predictions['Predicted_Close'].values
    ax2.plot(predictions.index, errors, color='red', label='Erreur')
    ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Erreur')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()

    # Loguer le graphique avant de l'afficher ou le fermer
    wandb.log({
        "predictions_plot": wandb.Image(fig),
        "metrics": metrics,
        "rolling_metrics": {
            f"rolling_{k}": v for k, v in rolling_metrics.items()
        }
    })

    # Afficher le graphique dans le notebook
    plt.show()

    # 8. Sauvegarde des prédictions
    predictions_path = os.path.join(wandb.run.dir, "predictions.csv")
    predictions.to_csv(predictions_path)
    wandb.save(predictions_path)

    plt.close()

    # 9. Affichage des résultats
    print("\nMétriques d'évaluation détaillées:")
    for metric_name, value in metrics.items():
        print(f"{metric_name.upper()}: {value:.4f}")

    # 10. Analyse statistique détaillée
    error_stats = pd.DataFrame({
        'Prix_reels': actual_prices,
        'Prix_predits': predictions['Predicted_Close'].values,
        'Erreur_absolue': abs(errors),
        'Erreur_relative': (abs(errors) / actual_prices) * 100
    })

    wandb.log({
        "error_statistics": wandb.Table(dataframe=error_stats.describe().round(2))
    })

    return predictions, metrics, error_stats


# -----------------------------------------------
# Bloc : Utilisation de la Fonction d'Évaluation
# -----------------------------------------------
try:
    # Remplacez 'run.config' par 'config'
    predictions, metrics, error_stats = evaluate_and_predict(
        model,
        training_dataset,
        test_data,
        config  # Utilisez 'config' au lieu de 'run.config'
    )
    print("\nÉvaluation terminée avec succès!")
except Exception as e:
    print(f"Erreur lors de l'évaluation: {e}")
    # Optionnel : Loguer l'erreur dans WandB
    wandb.log({"evaluation_error": str(e)})
    # Afficher la trace complète de l'erreur pour le débogage
    traceback.print_exc()


In [None]:
#Cloture de la session WandB
wandb.finish()