In [None]:
# Ajustement du path pour que Colab trouve le module alphabot
import sys
sys.path.append('/content')
sys.path.append('/content/alphabot')

# üîÑ Suivi de Progression et Reprise Automatique

Cette cellule v√©rifie l'√©tat d'avancement du notebook et permet de reprendre l√† o√π le processus s'est arr√™t√©.

In [None]:
# üîÑ Syst√®me de suivi et reprise automatique
import os
import json
import pickle
from datetime import datetime

# D√©finir le chemin de base
base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
os.makedirs(base_path, exist_ok=True)

# Fichier de suivi de progression
progress_file = f'{base_path}/progress_tracker.json'

# √âtat initial des √©tapes
default_progress = {
    'cell_1_setup': False,
    'cell_2_data_download': False,
    'cell_3_data_analysis': False,
    'cell_4_pattern_training': False,
    'cell_5_sentiment_training': False,
    'cell_6_rag_training': False,
    'cell_7_integration': False,
    'cell_8_testing': False,
    'cell_9_deployment': False,
    'last_cell_executed': None,
    'start_time': None,
    'last_update': None
}

# Charger ou initialiser le suivi
try:
    with open(progress_file, 'r') as f:
        progress = json.load(f)
    print("üìä Suivi de progression charg√©")
except:
    progress = default_progress.copy()
    progress['start_time'] = datetime.now().isoformat()
    print("üÜï Nouveau suivi de progression initialis√©")

# Fonction pour mettre √† jour la progression
def update_progress(cell_name):
    if cell_name not in progress:
        if cell_name not in progress:
        progress[cell_name] = True
    else:
        progress[cell_name] = True
    else:
        if cell_name not in progress:
        progress[cell_name] = True
    else:
        progress[cell_name] = True
    progress['last_cell_executed'] = cell_name
    progress['last_update'] = datetime.now().isoformat()
    
    with open(progress_file, 'w') as f:
        json.dump(progress, f, indent=2)
    
    print(f"‚úÖ Progression mise √† jour: {cell_name}")

# Fonction pour v√©rifier l'√©tat
def check_progress():
    print("\nüìã √âtat actuel de la progression:")
    print("=" * 50)
    
    completed = sum(1 for k,v in progress.items() if isinstance(v, bool) and v)  # Compter uniquement True
    total = len([k for k in default_progress.keys() if k.startswith('cell_')])
    
    
    print(f"üìä Progression: {completed}/{total} √©tapes compl√©t√©es ({completed/total*100:.1f}%)")
    print(f"‚è∞ D√©marr√©: {progress.get('start_time', 'N/A')}")
    print(f"üîÑ Derni√®re mise √† jour: {progress.get('last_update', 'N/A')}")
    print(f"üìç Derni√®re cellule: {progress.get('last_cell_executed', 'Aucune')}")
    
    print("\nüìù Statut des √©tapes:")
    steps = [
        ('cell_1_setup', '1. Configuration initiale'),
        ('cell_2_data_download', '2. T√©l√©chargement des donn√©es'),
        ('cell_3_data_analysis', '3. Analyse des donn√©es'),
        ('cell_4_pattern_training', '4. Entra√Ænement Pattern Detector'),
        ('cell_5_sentiment_training', '5. Entra√Ænement Sentiment Analyzer'),
        ('cell_6_rag_training', '6. Entra√Ænement RAG'),
        ('cell_7_integration', '7. Int√©gration'),
        ('cell_8_testing', '8. Tests'),
        ('cell_9_deployment', '9. D√©ploiement')
    ]
    
    for step_key, step_name in steps:
        status = "‚úÖ" if progress.get(step_key, False) else "‚è≥"
        print(f"  {status} {step_name}")
    
    print("=" * 50)
    
    # Sugg√©rer la prochaine √©tape
    if not progress['cell_1_setup']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 1 (Configuration)")
    elif not progress['cell_2_data_download']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 2 (T√©l√©chargement des donn√©es)")
    elif not progress['cell_3_data_analysis']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 3 (Analyse des donn√©es)")
    elif not progress['cell_4_pattern_training']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 4 (Pattern Detector)")
    elif not progress['cell_5_sentiment_training']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 5 (Sentiment Analyzer)")
    elif not progress['cell_6_rag_training']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 6 (RAG)")
    elif not progress['cell_7_integration']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 7 (Int√©gration)")
    elif not progress['cell_8_testing']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 8 (Tests)")
    elif not progress['cell_9_deployment']:
        print("\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 9 (D√©ploiement)")
    else:
        print("\nüéâ Toutes les √©tapes sont compl√©t√©es !")

# V√©rifier l'√©tat actuel
check_progress()

# Instructions pour l'utilisateur
print("\nüí° Instructions:")
print("1. Ex√©cutez cette cellule pour voir l'√©tat d'avancement")
print("2. Chaque cellule mettra √† jour automatiquement sa progression")
print("3. Si le processus s'arr√™te, relancez simplement cette cellule")
print("4. Continuez avec la cellule sugg√©r√©e")
print("\nüîÑ Note: Le syst√®me est con√ßu pour supporter les arr√™ts/red√©marrages")


# üöÄ AlphaBot ML/DL Training - Google Colab

## üìã Vue d'ensemble

Ce notebook entra√Æne les mod√®les Machine Learning et Deep Learning d'AlphaBot :
- Pattern Detector (LSTM + CNN)
- Sentiment Analyzer (FinBERT + RoBERTa)
- RAG Integrator (Embeddings + FAISS)

## ‚ö° Optimisations
- GPU/TPU acceleration
- Mixed precision training
- Memory management
- Automatic checkpoints
- Timeout protection

In [None]:
# CELLULE 1: Setup GPU/TPU optimis√©
import tensorflow as tf
import torch
import json
import logging
from datetime import datetime
import numpy as np
import pandas as pd

# Configuration logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# D√©tection GPU/TPU
try:
    # D√©tecter TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print('‚úÖ TPU d√©tect√©e et configur√©e')
except:
    try:
        # D√©tecter GPU
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            strategy = tf.distribute.MirroredStrategy()
            print(f'‚úÖ {len(gpus)} GPU(s) d√©tect√©e(s)')
        else:
            strategy = tf.distribute.get_strategy()
            print('‚ö†Ô∏è Aucun GPU/TPU d√©tect√©, utilisation du CPU')
    except Exception as e:
        strategy = tf.distribute.get_strategy()
        print(f'‚ö†Ô∏è Erreur de configuration GPU: {e}')

# Activer mixed precision
try:
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
    tf.keras.mixed_precision.set_global_policy(policy)
    print('‚úÖ Mixed precision activ√©e')
except:
    print('‚ö†Ô∏è Mixed precision non disponible')

# Afficher les infos
print(f"\nüìä Configuration:")
print(f"- TensorFlow: {tf.__version__}")
print(f"- PyTorch: {torch.__version__}")
print(f"- Strategy: {strategy}")
print(f"- GPUs disponibles: {tf.config.list_physical_devices('GPU')}")

# V√©rifier CUDA
if torch.cuda.is_available():
    print(f"- CUDA disponible: {torch.version.cuda}")
    print(f"- GPU courant: {torch.cuda.get_device_name(0)}")
    print(f"- M√©moire GPU: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
print("üîß Configuration Google Drive (r√©siliente v2)...")
from google.colab import drive
import os, shutil, time

MOUNT_POINT = '/content/drive'

def _safe_cleanup_mount_point(mp: str):
    try:
        # S√©curiser: si bind√© ou symlink, supprimer l'entr√©e
        if os.path.islink(mp):
            print("‚ÑπÔ∏è Le point de montage est un symlink ‚Äî suppression...")
            os.unlink(mp)
        # Si dossier existe et contient des fichiers r√©siduels locaux (pas Drive), on nettoie
        if os.path.isdir(mp):
            for entry in os.listdir(mp):
                p = os.path.join(mp, entry)
                try:
                    if os.path.isfile(p) or os.path.islink(p):
                        os.remove(p)
                    elif os.path.isdir(p):
                        shutil.rmtree(p)
                except Exception as e:
                    print(f"‚ö†Ô∏è Ignor√© pendant nettoyage: {p} -> {e}")
        else:
            os.makedirs(mp, exist_ok=True)
    except Exception as e:
        print(f"‚ö†Ô∏è Probl√®me nettoyage mount point: {e}")

def _force_unmount():
    try:
        drive.flush_and_unmount()
        print("‚ÑπÔ∏è flush_and_unmount ex√©cut√©")
    except Exception as e:
        print(f"‚ÑπÔ∏è flush_and_unmount non n√©cessaire: {e}")
    # En plus, tenter umount syst√®me si n√©cessaire
    try:
        os.system('fusermount -u /content/drive 2>/dev/null || true')
        os.system('umount /content/drive 2>/dev/null || true')
    except Exception as e:
        print(f"‚ÑπÔ∏è umount non n√©cessaire: {e}")

print("üîé √âtat initial:")
print(f" - ismount: {os.path.ismount(MOUNT_POINT)}")
print(f" - existe: {os.path.exists(MOUNT_POINT)}")
try:
    print(f" - contenu: {os.listdir(MOUNT_POINT) if os.path.isdir(MOUNT_POINT) else 'N/A'}")
except Exception as _:
    print(" - contenu: N/A")

# √âtape 1: forcer un d√©montage (au cas o√π)
_force_unmount()
time.sleep(1)

# √âtape 2: nettoyage du point de montage
_safe_cleanup_mount_point(MOUNT_POINT)
time.sleep(0.5)

# √âtape 3: montage forc√© avec gestion des erreurs
try:
    drive.mount(MOUNT_POINT, force_remount=True)
    print("‚úÖ Drive mont√© (v2)")
except Exception as e:
    msg = str(e)
    print(f"‚ùå drive.mount a √©chou√©: {msg}")
    if 'Mountpoint must not already contain files' in msg or 'symlink' in msg.lower():
        print("üîß Correction approfondie: suppression et recr√©ation du dossier de montage")
        try:
            # Supprimer compl√®tement et recr√©er /content/drive
            if os.path.exists(MOUNT_POINT):
                shutil.rmtree(MOUNT_POINT, ignore_errors=True)
            os.makedirs(MOUNT_POINT, exist_ok=True)
        except Exception as e2:
            print(f"‚ö†Ô∏è Impossible de recr√©er {MOUNT_POINT}: {e2}")
        # Retenter un dernier montage
        drive.mount(MOUNT_POINT, force_remount=True)
        print("‚úÖ Drive mont√© apr√®s recr√©ation du dossier")
    else:
        raise

# √âtape 4: pr√©parer l'arborescence projet
base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
os.makedirs(base_path, exist_ok=True)
for sub in ('data', 'models', 'checkpoints', 'logs'):
    os.makedirs(f"{base_path}/{sub}", exist_ok=True)
print(f"üìÅ R√©pertoires pr√™ts sous: {base_path}")

# V√©rification finale
print("üîé V√©rification finale:")
print(f" - ismount: {os.path.ismount(MOUNT_POINT)}")
try:
    print(f" - contenu: {os.listdir(MOUNT_POINT)}")
except Exception:
    print(" - contenu: N/A")


In [None]:
# CELLULE 3: Code AlphaBot setup
# D√©finir le chemin de base si pas d√©j√† d√©fini
if 'base_path' not in globals():
    base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
    os.makedirs(base_path, exist_ok=True)

import subprocess
import sys
from pathlib import Path

# Cloner le d√©p√¥t AlphaBot
if not Path('/content/alphabot').exists():
    print("üì• Clonage du d√©p√¥t AlphaBot...")
    subprocess.run(['git', 'clone', 'https://github.com/thomy03/alphabot.git', '/content/alphabot'], check=True)
else:
    print("üìÇ D√©p√¥t AlphaBot d√©j√† pr√©sent")

# Installer les d√©pendances
print("üì¶ Installation des d√©pendances...")
try:
    subprocess.run(['pip', 'install', '-r', '/content/alphabot/requirements_colab.txt'], check=True)
    print("‚úÖ D√©pendances install√©es avec succ√®s")
except subprocess.CalledProcessError as e:
    print(f"‚ö†Ô∏è Erreur lors de l'installation: {e}")
    print("üîß Installation des d√©pendances essentielles manuellement...")
    essential_packages = [
        'tensorflow', 'torch', 'transformers', 'sentence-transformers', 
        'faiss-cpu', 'yfinance', 'pandas', 'numpy', 'scikit-learn',
        'matplotlib', 'seaborn', 'tqdm', 'requests'
    ]
    for package in essential_packages:
        try:
            result = subprocess.run(['pip', 'install', package], check=True, capture_output=True, text=True)
            print(f"‚úÖ {package} install√©")
        except subprocess.CalledProcessError as e:
            print(f"‚ùå √âchec installation {package}: {e.stderr.strip() if e.stderr else 'Erreur inconnue'}")
        except Exception as e:
            print(f"‚ùå √âchec installation {package}: {str(e)}")

# Importer les modules AlphaBot
sys.path.append('/content')
sys.path.append('/content/alphabot')

# V√©rifier que le dossier alphabot existe
import os
if not os.path.exists('/content/alphabot/alphabot/ml'):
    print("‚ùå Dossier alphabot/ml non trouv√©")
    print("üìÇ Structure du dossier:")
    if os.path.exists('/content/alphabot'):
        for root, dirs, files in os.walk('/content/alphabot'):
            level = root.replace('/content/alphabot', '').count(os.sep)
            indent = ' ' * 2 * level
            print(f"{indent}{os.path.basename(root)}/")
            subindent = ' ' * 2 * (level + 1)
            for file in files[:5]:  # Limiter √† 5 fichiers par dossier
                print(f"{subindent}{file}")
            if len(files) > 5:
                print(f"{subindent}... et {len(files)-5} autres fichiers")
else:
    try:
        from alphabot.ml.pattern_detector import MLPatternDetector
        from alphabot.ml.sentiment_analyzer import SentimentAnalyzer
        from alphabot.ml.rag_integrator import RAGIntegrator
        print("‚úÖ Modules AlphaBot import√©s avec succ√®s")
        
        # Initialiser les composants uniquement si l'import a r√©ussi
        try:
            pattern_detector = MLPatternDetector()
            sentiment_analyzer = SentimentAnalyzer()
            rag_integrator = RAGIntegrator()
            print("‚úÖ Composants ML initialis√©s")
        except Exception as e:
            print(f"‚ùå Erreur d'initialisation: {e}")
            print("üîß Les composants seront cr√©√©s plus tard dans le notebook")
            
    except Exception as e:
        print(f"‚ùå Erreur d'import: {e}")
        print("üîß Cr√©ation des modules de secours...")
        
        # Cr√©er des classes de secours pour permettre au notebook de continuer
        class MLPatternDetector:
            def __init__(self):
                print("üîß MLPatternDetector de secours cr√©√©")
        
        class SentimentAnalyzer:
            def __init__(self):
                print("üîß SentimentAnalyzer de secours cr√©√©")
        
        class RAGIntegrator:
            def __init__(self):
                print("üîß RAGIntegrator de secours cr√©√©")
        
        # Initialiser les composants de secours
        pattern_detector = MLPatternDetector()
        sentiment_analyzer = SentimentAnalyzer()
        rag_integrator = RAGIntegrator()
        print("‚úÖ Composants de secours initialis√©s")

# Importer les utilitaires (avec gestion d'erreur)
try:
    from colab_utils import ColabMemoryMonitor, create_colab_callbacks
    from drive_manager import DriveManager
    drive_manager = DriveManager(base_path)
    memory_monitor = ColabMemoryMonitor()
    print("‚úÖ Utilitaires import√©s")
except Exception as e:
    print(f"‚ö†Ô∏è Utilitaires non disponibles: {e}")
    # Cr√©er des utilitaires de secours
    class DriveManager:
        def __init__(self, path):
            self.path = path
        def save_model(self, **kwargs):
            print(f"üîß Sauvegarde simul√©e dans {self.path}")
    
    class ColabMemoryMonitor:
        def get_memory_usage(self):
            return {"percent_used": 50.0}
    
    drive_manager = DriveManager(base_path)
    memory_monitor = ColabMemoryMonitor()
    print("‚úÖ Utilitaires de secours cr√©√©s")


In [None]:
# CELLULE 4: T√©l√©chargement des donn√©es
# D√©finir le chemin de base si pas d√©j√† d√©fini
if 'base_path' not in globals():
    base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
    os.makedirs(base_path, exist_ok=True)

import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

# Configuration
symbols = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'AMZN']
end_date = datetime.now()
start_date = end_date - timedelta(days=365*2)  # 2 ans de donn√©es

print(f"üì• T√©l√©chargement des donn√©es pour {symbols}...")
print(f"üìÖ P√©riode: {start_date.strftime('%Y-%m-%d')} √† {end_date.strftime('%Y-%m-%d')}")

# T√©l√©charger les donn√©es
all_data = {}
for symbol in symbols:
    try:
        # Sp√©cifier explicitement auto_adjust pour √©viter le warning
        data = yf.download(symbol, start=start_date, end=end_date, auto_adjust=False)
        if not data.empty:
            all_data[symbol] = data
            print(f"‚úÖ {symbol}: {len(data)} jours de donn√©es")
        else:
            print(f"‚ùå {symbol}: Pas de donn√©es disponibles")
    except Exception as e:
        print(f"‚ùå {symbol}: Erreur de t√©l√©chargement - {e}")

# Sauvegarder les donn√©es
import pickle
data_path = f"{base_path}/data/market_data.pkl"
with open(data_path, 'wb') as f:
    pickle.dump(all_data, f)

print(f"\nüíæ Donn√©es sauvegard√©es dans: {data_path}")
print(f"üìä Total symboles: {len(all_data)}")

# Afficher un exemple
if all_data:
    sample_symbol = list(all_data.keys())[0]
    sample_data = all_data[sample_symbol]
    print(f"\nüìà Exemple pour {sample_symbol}:")
    print(f"- Premi√®re date: {sample_data.index[0].strftime('%Y-%m-%d')}")
    print(f"- Derni√®re date: {sample_data.index[-1].strftime('%Y-%m-%d')}")
    
    # Calculer et afficher les statistiques sans warnings
    mean_price = float(sample_data['Close'].mean())
    volatility = float(sample_data['Close'].pct_change().std() * 100)
    
    print(f"- Prix moyen: ${mean_price:.2f}")
    print(f"- Volatilit√©: {volatility:.2f}%")


In [None]:
print("üß† Entra√Ænement du Pattern Detector (LSTM + CNN)...")

# Importer les biblioth√®ques n√©cessaires
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from datetime import datetime, timedelta
import yfinance as yf

# Charger les donn√©es depuis la cellule pr√©c√©dente
try:
    # Essayer de charger depuis le pickle sauvegard√©
    with open(f'{base_path}/data/market_data.pkl', 'rb') as f:
        all_data = pickle.load(f)
    print("‚úÖ Donn√©es charg√©es depuis le pickle")
except:
    print("üîß Re-t√©l√©chargement des donn√©es...")
    # Re-t√©l√©charger les donn√©es si le pickle n'est pas disponible
    symbols = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'AMZN']
    end_date = datetime.now().strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=730)).strftime('%Y-%m-%d')
    
    all_data = {}
    for symbol in symbols:
        print(f"üì• T√©l√©chargement des donn√©es pour {symbol}...")
        data = yf.download(symbol, start=start_date, end=end_date, auto_adjust=False)
        if not data.empty:
            all_data[symbol] = data
            print(f"‚úÖ {symbol}: {len(data)} jours de donn√©es")
        else:
            print(f"‚ö†Ô∏è {symbol}: Pas de donn√©es disponibles")
    
    # Sauvegarder pour √©viter de re-t√©l√©charger
    try:
        os.makedirs(f'{base_path}/data', exist_ok=True)
        with open(f'{base_path}/data/market_data.pkl', 'wb') as f:
            pickle.dump(all_data, f)
        print(f"üíæ Donn√©es sauvegard√©es dans: {base_path}/data/market_data.pkl")
    except Exception as e:
        print(f"‚ö†Ô∏è Erreur de sauvegarde: {e}")

print(f"üìä Total symboles: {len(all_data)}")

# Pr√©parer les donn√©es
print("üîß Pr√©paration des donn√©es (s√©curis√©e)...")

print("üîß Pr√©paration des donn√©es (s√©curis√©e v2 - yfinance compatibles)...")
def prepare_pattern_training_data(all_data):
    import numpy as np
    import pandas as pd

    def normalize_yf_cols(df):
        # Aplatissement MultiIndex √©ventuel
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = ['_'.join([str(c) for c in col if c is not None]) for col in df.columns]
        else:
            df.columns = [str(c) for c in df.columns]
        # Mapping de colonnes standards possibles
        candidates = {}
        for key in ['Close', 'Adj Close', 'Adj_Close', 'Close_Adj Close']:
            candidates['Close'] = candidates.get('Close') or next((c for c in df.columns if c.lower().replace(' ', '').replace('-', '_') == key.lower().replace(' ', '').replace('-', '_')), None)
        for key in ['Volume']:
            candidates['Volume'] = candidates.get('Volume') or next((c for c in df.columns if c.lower() == key.lower()), None)
        for key in ['High']:
            candidates['High'] = candidates.get('High') or next((c for c in df.columns if c.lower() == key.lower()), None)
        for key in ['Low']:
            candidates['Low'] = candidates.get('Low') or next((c for c in df.columns if c.lower() == key.lower()), None)
        return candidates

    X_train, y_train = [], []
    for symbol, data in all_data.items():
        try:
            if data is None or not hasattr(data, 'empty') or data.empty:
                print(f"‚ö†Ô∏è {symbol}: dataset vide/None, ignor√©")
                continue

            data = data.copy()
            data = data.sort_index()

            # D√©tecter les colonnes r√©elles √† utiliser
            cols = normalize_yf_cols(data)
            required = ['Close', 'Volume', 'High', 'Low']
            if not all(cols.get(k) for k in required):
                print(f"‚ö†Ô∏è {symbol}: colonnes manquantes apr√®s normalisation {cols}, ignor√©")
                continue

            close_col = cols['Close']; vol_col = cols['Volume']; hi_col = cols['High']; lo_col = cols['Low']
            # Nettoyer NA
            data = data.dropna(subset=[close_col, vol_col, hi_col, lo_col])

            n = len(data)
            if n < 36:
                print(f"‚ÑπÔ∏è {symbol}: pas assez de points ({n}), ignor√©")
                continue

            # Fen√™trage
            for i in range(0, n - 35):
                seq = data.iloc[i:i+30]
                next5 = data.iloc[i+30:i+35]
                if seq[[close_col, vol_col, hi_col, lo_col]].isnull().any().any():
                    continue
                if next5[[close_col]].isnull().any().any():
                    continue

                close = np.asarray(seq[close_col].values, dtype=np.float32).reshape(-1, 1)
                volume = np.asarray(seq[vol_col].values, dtype=np.float32).reshape(-1, 1)
                spread = np.asarray((seq[hi_col] - seq[lo_col]).values, dtype=np.float32).reshape(-1, 1)
                features = np.concatenate([close, volume, spread], axis=1)
                if features.shape != (30, 3):
                    continue

                current_price = float(seq[close_col].iloc[-1])
                if current_price == 0:
                    continue
                future_mean = float(np.mean(next5[close_col].values))
                future_return = (future_mean - current_price) / current_price

                if future_return > 0.02:
                    label = 2
                elif future_return < -0.02:
                    label = 0
                else:
                    label = 1

                X_train.append(features)
                y_train.append(label)

        except Exception as e:
            print(f"‚ö†Ô∏è Erreur sur {symbol}, segment ignor√©: {e}")
            continue

    X_train = np.array(X_train, dtype=np.float32)
    y_train = np.array(y_train, dtype=np.int32)
    print(f"‚úÖ Pr√©paration termin√©e: X={X_train.shape}, y={y_train.shape}")
    return X_train, y_train

# Reconstruire X/y avec la nouvelle fonction
X_train, y_train = prepare_pattern_training_data(all_data)
print(f"üìä Donn√©es pr√©par√©es: {X_train.shape[0]} √©chantillons")
X_train, y_train = prepare_pattern_training_data(all_data)
print(f"üìä Donn√©es pr√©par√©es: {X_train.shape[0]} √©chantillons")
print(f"üìä Donn√©es pr√©par√©es: {X_train.shape[0]} √©chantillons")

# Cr√©er le mod√®le GPU optimis√© pour A100
print("üîß Cr√©ation du mod√®le GPU (compatible L4)...")

# V√©rifier la disponibilit√© du GPU
print(f"üìä GPU disponible: {tf.config.list_physical_devices('GPU')}")

# Configuration simple pour √©viter les crashs
# tf.keras.mixed_precision.set_global_policy('mixed_float16')  # D√©sactiv√© pour √©viter les crashs

# Cr√©er le mod√®le
strategy = tf.distribute.get_strategy()

with strategy.scope():
    
# D√©sactiver l'utilisation CuDNN en for√ßant CPU si n√©cessaire
import os
os.environ.setdefault('TF_FORCE_GPU_ALLOW_GROWTH', 'true')
# Important: √©viter le path CuDNN en fixant un device CPU pour LSTM
use_cpu_lstm = True

    
# D√©sactiver l'utilisation CuDNN en for√ßant CPU si n√©cessaire
import os
os.environ.setdefault('TF_FORCE_GPU_ALLOW_GROWTH', 'true')
# Important: √©viter le path CuDNN en fixant un device CPU pour LSTM
use_cpu_lstm = True

    # Utiliser un mod√®le plus simple pour √©viter les crashs GPU
    inputs = tf.keras.Input(shape=(30, 3), name='input_layer')
    
    # Normalisation
    x = tf.keras.layers.BatchNormalization()(inputs)
    
    # Une seule couche LSTM
    import tensorflow as tf
    with tf.device('/CPU:0'):
        import tensorflow as tf
    with tf.device('/CPU:0'):
        x = tf.keras.layers.LSTM(
        activation='tanh', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, unroll=False, time_major=False,
        activation='tanh', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, unroll=False, time_major=False,
        64, 
        return_sequences=False,
        kernel_initializer='glorot_uniform',
        recurrent_initializer='orthogonal',
        name='lstm_main'
    )(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    
    # Couches denses
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    
    outputs = tf.keras.layers.Dense(3, activation='softmax', name='output')(x)
    
    # Cr√©er le mod√®le
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='simplified_gpu_model')
    
    # Compiler avec des param√®tres simples
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

# Afficher le r√©sum√© du mod√®le
print("‚úÖ Mod√®le GPU optimis√© cr√©√©:")
model.summary()

# Callbacks simplifi√©s pour √©viter les crashs
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        patience=10, 
        restore_best_weights=True,
        monitor='val_loss',
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        factor=0.5, 
        patience=5, 
        monitor='val_loss',
        verbose=1
    )
]

# V√©rifier et pr√©parer les donn√©es
print(f"üìä V√©rification des donn√©es:")
print(f"  - X_train shape: {X_train.shape}")
print(f"  - y_train shape: {y_train.shape}")
print(f"  - X_train dtype: {X_train.dtype}")
print(f"  - y_train dtype: {y_train.dtype}")
print(f"  - Valeurs uniques dans y_train: {np.unique(y_train)}")

# S'assurer que les donn√©es sont du bon type pour GPU
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int32)

# Normaliser les donn√©es (ou fallback si vide)
if X_train.shape[0] == 0:
    print("‚ö†Ô∏è Aucun √©chantillon r√©el. G√©n√©ration d'un dataset synth√©tique minimal (CPU)...")
    import numpy as np
    X_train = np.random.randn(256, 30, 3).astype(np.float32)
    y_train = np.random.randint(0, 3, size=(256,)).astype(np.int32)
    print(f"‚úÖ Dataset synth√©tique: X={X_train.shape}, y={y_train.shape}")
# Normaliser les donn√©es pour GPU
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)

# Entra√Æner avec des param√®tres optimis√©s pour A100
print("üöÄ D√©but de l'entra√Ænement GPU (compatible L4)...")
try:
    try:
        history = model.fit(
        X_train_scaled, y_train,
        epochs=20,  # R√©duit pour √©viter les crashs
        batch_size=64,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    print("‚úÖ Entra√Ænement GPU termin√© avec succ√®s")
    
    # Sauvegarder le mod√®le et le scaler
    try:
        model.save(f'{base_path}/models/simplified_gpu_model.keras')
        with open(f'{base_path}/models/simplified_scaler.pkl', 'wb') as f:
            pickle.dump(scaler, f)
        print("‚úÖ Mod√®le GPU et scaler sauvegard√©s")
    except Exception as e:
        print(f"‚ö†Ô∏è Erreur de sauvegarde: {e}")
    
    # Afficher les courbes d'apprentissage
    try:
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.plot(history.history['accuracy'], label='Training')
        if 'val_accuracy' in history.history:
            plt.plot(history.history['val_accuracy'], label='Validation')
        plt.title('Model Accuracy')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history.history['loss'], label='Training')
        if 'val_loss' in history.history:
            plt.plot(history.history['val_loss'], label='Validation')
        plt.title('Model Loss')
        plt.legend()
        
        plt.tight_layout()
        plt.show()
    except Exception as e:

        if "DNN" in str(e) or "CuDNN" in str(e):

            print("‚ö†Ô∏è DNN/CuDNN non support√© sur ce GPU. Passage √† un mod√®le Dense-only CPU...")

            with tf.distribute.get_strategy().scope():

                model = tf.keras.Sequential([

                    tf.keras.layers.Input(shape=(30,3)),

                    tf.keras.layers.Flatten(),

                    tf.keras.layers.Dense(64, activation='relu'),

                    tf.keras.layers.Dropout(0.3),

                    tf.keras.layers.Dense(32, activation='relu'),

                    tf.keras.layers.Dense(3, activation='softmax')

                ])

                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

            history = model.fit(

                X_train_scaled, y_train,

                epochs=10,

                batch_size=64,

                validation_split=0.2,

                verbose=1

            )

        else:

            raise

    except Exception as e:
        print(f"‚ö†Ô∏è Erreur lors de l'affichage des courbes: {e}")
        
except Exception as e:
    print(f"‚ùå Erreur lors de l'entra√Ænement GPU: {e}")
    print("üîß Analyse de l'erreur:")
    print(f"  - Type d'erreur: {type(e).__name__}")
    print(f"  - Message: {str(e)}")
    
    # Si erreur CuDNN, essayer une approche CPU
    if "CuDNN" in str(e) or "DNN" in str(e):
        print("üîß D√©tection d'erreur CuDNN, passage en mode CPU...")
        import os
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
        
        # Recr√©er un mod√®le CPU simple
        with tf.distribute.get_strategy().scope():
            cpu_model = tf.keras.Sequential([
                tf.keras.layers.Input(shape=(30, 3)),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(128, activation='relu'),
                tf.keras.layers.Dropout(0.3),
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.Dropout(0.3),
                tf.keras.layers.Dense(32, activation='relu'),
                tf.keras.layers.Dense(3, activation='softmax')
            ])
            
            cpu_model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
        
        # Entra√Æner le mod√®le CPU
        history = cpu_model.fit(
            X_train_scaled, y_train,
            epochs=15,  # R√©duit pour √©viter les crashs
            batch_size=32,
            validation_split=0.2,
            verbose=1
        )
        
        model = cpu_model
        print("‚úÖ Mod√®le CPU de secours entra√Æn√©")
    else:
        raise e


In [None]:
# CELLULE 6: Entra√Ænement Sentiment Analyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import numpy as np
import pandas as pd

# D√©finir le chemin de base
base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
os.makedirs(base_path, exist_ok=True)

print("üí≠ Entra√Ænement du Sentiment Analyzer (FinBERT + RoBERTa)...")

# Cr√©er un dataset de d√©monstration
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Donn√©es de d√©monstration
demo_texts = [
    "Apple reports record quarterly earnings",
    "Google stock drops on regulatory concerns",
    "Tesla announces new battery technology",
    "Microsoft cloud growth exceeds expectations",
    "Amazon faces antitrust investigation",
    "Meta launches new VR platform",
    "NVIDIA chips power AI revolution",
    "Bitcoin reaches new all-time high",
    "Federal Reserve raises interest rates",
    "Oil prices surge on supply concerns"
]

demo_labels = [2, 0, 2, 2, 0, 2, 2, 2, 0, 0]  # 0=n√©gatif, 1=neutre, 2=positif

# Initialiser FinBERT
model_name = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Cr√©er le dataset
dataset = SentimentDataset(demo_texts, demo_labels, tokenizer)

# Configuration de l'entra√Ænement
training_args = TrainingArguments(
    output_dir=f'{base_path}/checkpoints/sentiment',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    warmup_steps=10,
    weight_decay=0.01,
    logging_dir=f'{base_path}/logs/sentiment',
    logging_steps=1,
    save_steps=10,
    evaluation_strategy="no",
    save_strategy="epoch"
)

# Cr√©er le trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer
)

# Entra√Æner
print("üöÄ D√©but de l'entra√Ænement FinBERT...")
trainer.train()

# Sauvegarder le mod√®le
model.save_pretrained(f'{base_path}/models/finbert_sentiment')
tokenizer.save_pretrained(f'{base_path}/models/finbert_sentiment')
print("‚úÖ Mod√®le FinBERT sauvegard√©")

# Tester le mod√®le
print("\nüß™ Test du mod√®le:")
test_texts = [
    "Strong earnings report drives stock higher",
    "Company faces bankruptcy concerns",
    "Stable performance in challenging market"
]

for text in test_texts:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_idx = torch.argmax(predictions).item()
    sentiment_map = {0: "N√©gatif", 1: "Neutre", 2: "Positif"}
    confidence = predictions[0][sentiment_idx].item()
    print(f"'{text[:50]}...' -> {sentiment_map[sentiment_idx]} ({confidence:.2f})")

In [None]:
# CELLULE 7: Entra√Ænement RAG Integrator
# D√©finir le chemin de base si pas d√©j√† d√©fini
if 'base_path' not in globals():
    base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
    os.makedirs(base_path, exist_ok=True)

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd
from pathlib import Path

print("üîç Entra√Ænement du RAG Integrator (Embeddings + FAISS)...")

# Cr√©er des documents de d√©monstration
demo_documents = [
    "Apple Inc. is a technology company that designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide.",
    "Alphabet Inc. provides various products and platforms in the United States, Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America.",
    "Microsoft Corporation develops, licenses, and supports software, services, devices, and solutions worldwide.",
    "Amazon.com, Inc. engages in the retail sale of consumer products and subscriptions in North America and internationally.",
    "Tesla, Inc. designs, develops, manufactures, leases, and sells electric vehicles, and energy generation and storage systems.",
    "Meta Platforms, Inc. develops products that enable people to connect and share with friends and family through mobile devices, personal computers, virtual reality headsets, and wearables.",
    "NVIDIA Corporation provides graphics, computing, and networking solutions in the United States, Taiwan, China, and internationally.",
    "Netflix, Inc. provides entertainment services worldwide. It offers a TV shows, movies, and games.",
    "The Goldman Sachs Group, Inc. provides a range of financial services worldwide.",
    "JPMorgan Chase & Co. provides financial services worldwide.",
    "The Bank of America Corporation provides banking and financial products and services for consumers, small businesses, and institutions.",
    "Walmart Inc. engages in the operation of retail, wholesale, and other units worldwide.",
    "The Procter & Gamble Company provides branded consumer packaged goods worldwide.",
    "The Coca-Cola Company is a beverage company.",
    "PepsiCo, Inc. manufactures, markets, and distributes various beverages and convenient foods worldwide.",
    "The Home Depot, Inc. operates as a home improvement retailer.",
    "The Boeing Company operates in the aerospace industry.",
    "The Exxon Mobil Corporation explores for and produces crude oil and natural gas.",
    "The Chevron Corporation operates through its Upstream and Downstream segments.",
    "The Johnson & Johnson engages in the research and development, manufacture, and sale of various products in the healthcare field.",
    "The Visa Inc. operates as a payments technology company worldwide."
]

# Initialiser le mod√®le d'embeddings
print("üì• Chargement du mod√®le d'embeddings...")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Cr√©er les embeddings
print("üî¢ Cr√©ation des embeddings...")
document_embeddings = embedding_model.encode(demo_documents)
print(f"‚úÖ Embeddings cr√©√©s: {document_embeddings.shape}")

# Cr√©er l'index FAISS
dimension = document_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(document_embeddings)

print(f"‚úÖ Index FAISS cr√©√© avec {index.ntotal} documents")

# Sauvegarder l'index et les documents
faiss.write_index(index, f'{base_path}/models/faiss_index.bin')

import pickle
with open(f'{base_path}/models/documents.pkl', 'wb') as f:
    pickle.dump(demo_documents, f)

print("üíæ Index et documents sauvegard√©s")

# Tester la recherche s√©mantique
print("\nüß™ Test de recherche s√©mantique:")
test_queries = [
    "electric vehicle company",
    "social media platform",
    "banking services",
    "beverage company",
    "technology giant"
]

for query in test_queries:
    # Cr√©er l'embedding de la requ√™te
    query_embedding = embedding_model.encode([query])[0].reshape(1, -1)
    
    # Rechercher les documents les plus similaires
    k = 3
    distances, indices = index.search(query_embedding, k)
    
    print(f"\nüîç Requ√™te: '{query}'")
    for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
        doc_preview = demo_documents[idx][:80] + "..."
        print(f"  {i+1}. {doc_preview} (distance: {dist:.4f})")

In [None]:
# CELLULE 8: Int√©gration et tests
# D√©finir le chemin de base si pas d√©j√† d√©fini
if 'base_path' not in globals():
    base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
    os.makedirs(base_path, exist_ok=True)

import json
import numpy as np
from datetime import datetime
import pickle

print("üîß Int√©gration des mod√®les et tests finaux...")

# Charger tous les mod√®les
try:
    # Charger le mod√®le LSTM
    lstm_model = tf.keras.models.load_model(f'{base_path}/models/lstm_pattern_model.h5')
    print("‚úÖ Mod√®le LSTM charg√©")
    
    # Charger le mod√®le FinBERT
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    sentiment_tokenizer = AutoTokenizer.from_pretrained(f'{base_path}/models/finbert_sentiment')
    sentiment_model = AutoModelForSequenceClassification.from_pretrained(f'{base_path}/models/finbert_sentiment')
    print("‚úÖ Mod√®le FinBERT charg√©")
    
    # Charger l'index FAISS et les documents
    faiss_index = faiss.read_index(f'{base_path}/models/faiss_index.bin')
    with open(f'{base_path}/models/documents.pkl', 'rb') as f:
        documents = pickle.load(f)
    print("‚úÖ Index FAISS et documents charg√©s")
    
except Exception as e:
    print(f"‚ùå Erreur de chargement des mod√®les: {e}")

# Test d'int√©gration complet
print("\nüß™ Test d'int√©gration complet:")

# 1. Test de d√©tection de patterns
print("\n1. Test Pattern Detector:")
test_symbol = 'AAPL'
if test_symbol in all_data:
    test_data = all_data[test_symbol].tail(50)
    
    # Pr√©parer les features
    prices = test_data['Close'].values
    returns = np.diff(prices) / prices[:-1]
    
    seq_prices = prices[-30:] / prices[-30]
    seq_returns = returns[-30:]
    seq_volume = test_data['Volume'].values[-30:] / np.mean(test_data['Volume'].values[-30:])
    
    features = np.column_stack([seq_prices, seq_returns, seq_volume]).reshape(1, 30, 3)
    
    # Pr√©diction
    prediction = lstm_model.predict(features, verbose=0)
    predicted_class = np.argmax(prediction[0])
    confidence = np.max(prediction[0])
    
    class_map = {0: 'DOWN', 1: 'SIDEWAYS', 2: 'UP'}
    print(f"   Pr√©diction: {class_map[predicted_class]} (confiance: {confidence:.2f})")

# 2. Test d'analyse de sentiment
print("\n2. Test Sentiment Analyzer:")
test_news = [
    "Apple announces breakthrough AI technology",
    "Market volatility concerns investors",
    "Tech sector shows steady growth"
]

for news in test_news:
    inputs = sentiment_tokenizer(news, return_tensors="pt", truncation=True, padding=True)
    outputs = sentiment_model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_idx = torch.argmax(predictions).item()
    sentiment_map = {0: "N√©gatif", 1: "Neutre", 2: "Positif"}
    confidence = predictions[0][sentiment_idx].item()
    print(f"   '{news[:40]}...' -> {sentiment_map[sentiment_idx]} ({confidence:.2f})")

# 3. Test RAG
print("\n3. Test RAG Integrator:")
rag_queries = [
    "Which company focuses on electric vehicles?",
    "Find information about social media companies",
    "What companies are in the banking sector?"
]

for query in rag_queries:
    query_embedding = embedding_model.encode([query])[0].reshape(1, -1)
    k = 2
    distances, indices = faiss_index.search(query_embedding, k)
    
    print(f"   Query: '{query}'")
    for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
        doc_preview = documents[idx][:60] + "..."
        print(f"     Result {i+1}: {doc_preview}")

# Cr√©er un rapport de performance
performance_report = {
    "timestamp": datetime.now().isoformat(),
    "models_trained": {
        "lstm_pattern_detector": {
            "status": "success",
            "accuracy": float(history.history['val_accuracy'][-1]) if 'history' in locals() else 0.0,
            "epochs_trained": len(history.history['accuracy']) if 'history' in locals() else 0
        },
        "finbert_sentiment": {
            "status": "success",
            "epochs_trained": 3,
            "model_size": "base"
        },
        "rag_integrator": {
            "status": "success",
            "documents_indexed": len(documents),
            "embedding_dimension": document_embeddings.shape[1]
        }
    },
    "system_info": {
        "tensorflow_version": tf.__version__,
        "torch_version": torch.__version__,
        "gpu_available": len(tf.config.list_physical_devices('GPU')) > 0,
        "tpu_available": 'tpu' in locals()
    },
    "training_duration": "N/A",
    "data_used": {
        "symbols": list(all_data.keys()),
        "total_data_points": sum(len(data) for data in all_data.values())
    }
}

# Sauvegarder le rapport
report_path = f'{base_path}/exports/performance_report.json'
with open(report_path, 'w') as f:
    json.dump(performance_report, f, indent=2)

print(f"\nüìä Rapport de performance sauvegard√©: {report_path}")
print("‚úÖ Entra√Ænement ML/DL termin√© avec succ√®s!")

# R√©sum√© final
print("\n" + "="*50)
print("üéâ R√âSUM√â DE L'ENTRA√éNEMENT")
print("="*50)
print(f"üìÖ Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üß† Mod√®les entra√Æn√©s: 3 (LSTM, FinBERT, RAG)")
print(f"üìä Donn√©es utilis√©es: {len(all_data)} symboles")
print(f"üíæ Mod√®les sauvegard√©s dans: {base_path}/models/")
print(f"üìã Rapport disponible: {report_path}")
print("\nüöÄ Prochaines √©tapes:")
print("   1. Pousser les modifications sur GitHub")
print("   2. Lancer les tests locaux")
print("   3. D√©ployer en production")
print("="*50)