# üöÄ AlphaBot ML/DL Training - Google Colab

Notebook complet pour entra√Æner les mod√®les ML/DL d'AlphaBot sous Colab (GPU L4 ou TPU), avec suivi/reprise, t√©l√©chargement de donn√©es robuste, et fallback s√ªrs.

In [None]:
# Ajustement du path pour que Colab trouve le module alphabot
import sys
sys.path.append('/content')
sys.path.append('/content/alphabot')

## üîÑ Suivi de Progression et Reprise Automatique

In [None]:
# üîÑ Syst√®me de suivi et reprise automatique
import os
import json
import pickle
from datetime import datetime

# D√©finir le chemin de base
base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
os.makedirs(base_path, exist_ok=True)

# Fichier de suivi de progression
progress_file = f'{base_path}/progress_tracker.json'

# √âtat initial des √©tapes
default_progress = {
    'cell_1_setup': False,
    'cell_2_data_download': False,
    'cell_3_data_analysis': False,
    'cell_4_pattern_training': False,
    'cell_5_sentiment_training': False,
    'cell_6_rag_training': False,
    'cell_7_integration': False,
    'cell_8_testing': False,
    'cell_9_deployment': False,
    'last_cell_executed': None,
    'start_time': None,
    'last_update': None
}

# Charger ou initialiser le suivi
try:
    with open(progress_file, 'r') as f:
        progress = json.load(f)
    print('üìä Suivi de progression charg√©')
except Exception:
    progress = default_progress.copy()
    progress['start_time'] = datetime.now().isoformat()
    print('üÜï Nouveau suivi de progression initialis√©')

def update_progress(cell_name):
    progress.setdefault(cell_name, False)
    progress[cell_name] = True
    progress['last_cell_executed'] = cell_name
    progress['last_update'] = datetime.now().isoformat()
    with open(progress_file, 'w') as f:
        json.dump(progress, f, indent=2)
    print(f'‚úÖ Progression mise √† jour: {cell_name}')

def check_progress():
    print('\nüìã √âtat actuel de la progression:')
    print('=' * 50)
    completed = sum(1 for k, v in progress.items() if k.startswith('cell_') and isinstance(v, bool) and v)
    total = sum(1 for k in default_progress.keys() if k.startswith('cell_'))
    pct = (completed / total * 100) if total else 0.0
    print(f'üìä Progression: {completed}/{total} √©tapes compl√©t√©es ({pct:.1f}%)')
    print(f'‚è∞ D√©marr√©: {progress.get("start_time", "N/A")}')
    print(f'üîÑ Derni√®re mise √† jour: {progress.get("last_update", "N/A")}')
    print(f'üìç Derni√®re cellule: {progress.get("last_cell_executed", "Aucune")}')
    print('\nüìù Statut des √©tapes:')
    steps = [
        ('cell_1_setup', '1. Configuration initiale'),
        ('cell_2_data_download', '2. T√©l√©chargement des donn√©es'),
        ('cell_3_data_analysis', '3. Analyse des donn√©es'),
        ('cell_4_pattern_training', '4. Entra√Ænement Pattern Detector'),
        ('cell_5_sentiment_training', '5. Entra√Ænement Sentiment Analyzer'),
        ('cell_6_rag_training', '6. Entra√Ænement RAG'),
        ('cell_7_integration', '7. Int√©gration'),
        ('cell_8_testing', '8. Tests'),
        ('cell_9_deployment', '9. D√©ploiement')
    ]
    for step_key, step_name in steps:
        status = '‚úÖ' if progress.get(step_key, False) else '‚è≥'
        print(f'  {status} {step_name}')
    print('=' * 50)
    if not progress.get('cell_1_setup', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 1 (Configuration)')
    elif not progress.get('cell_2_data_download', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 2 (T√©l√©chargement des donn√©es)')
    elif not progress.get('cell_3_data_analysis', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 3 (Analyse des donn√©es)')
    elif not progress.get('cell_4_pattern_training', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 4 (Pattern Detector)')
    elif not progress.get('cell_5_sentiment_training', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 5 (Sentiment Analyzer)')
    elif not progress.get('cell_6_rag_training', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 6 (RAG)')
    elif not progress.get('cell_7_integration', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 7 (Int√©gration)')
    elif not progress.get('cell_8_testing', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 8 (Tests)')
    elif not progress.get('cell_9_deployment', False):
        print('\nüöÄ Prochaine √©tape: Ex√©cuter la cellule 9 (D√©ploiement)')
    else:
        print('\nüéâ Toutes les √©tapes sont compl√©t√©es !')

check_progress()
print('\nüí° Instructions:')
print("1. Ex√©cutez cette cellule pour voir l'√©tat d'avancement")
print('2. Chaque cellule mettra √† jour automatiquement sa progression')
print("3. Si le processus s'arr√™te, relancez simplement cette cellule")
print('4. Continuez avec la cellule sugg√©r√©e')
print('\nüîÑ Note: Le syst√®me est con√ßu pour supporter les arr√™ts/red√©marrages')

## 1) Setup GPU/TPU et environnement Colab

In [None]:
import tensorflow as tf
import torch
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print('‚úÖ TPU d√©tect√©e et configur√©e')
except Exception:
    try:
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            strategy = tf.distribute.MirroredStrategy()
            print(f'‚úÖ {len(gpus)} GPU(s) d√©tect√©e(s)')
        else:
            strategy = tf.distribute.get_strategy()
            print('‚ö†Ô∏è Aucun GPU/TPU d√©tect√©, utilisation du CPU')
    except Exception as e:
        strategy = tf.distribute.get_strategy()
        print(f'‚ö†Ô∏è Erreur de configuration GPU: {e}')

try:
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
    tf.keras.mixed_precision.set_global_policy(policy)
    print('‚úÖ Mixed precision activ√©e')
except Exception:
    print('‚ö†Ô∏è Mixed precision non disponible')

print('\nüìä Configuration:')
print(f'- TensorFlow: {tf.__version__}')
print(f'- PyTorch: {torch.__version__}')
print(f'- Strategy: {strategy}')
print(f"- GPUs disponibles: {tf.config.list_physical_devices('GPU')}")
if torch.cuda.is_available():
    print(f'- CUDA: {torch.version.cuda}')
    print(f'- GPU: {torch.cuda.get_device_name(0)}')
update_progress('cell_1_setup')

## 2) Montage Google Drive (r√©silient v2)

In [None]:
print('üîß Configuration Google Drive (r√©siliente v2)...')
from google.colab import drive
import os, shutil, time
MOUNT_POINT = '/content/drive'

def _safe_cleanup_mount_point(mp: str):
    try:
        if os.path.islink(mp):
            os.unlink(mp)
        if os.path.isdir(mp):
            for entry in os.listdir(mp):
                p = os.path.join(mp, entry)
                try:
                    if os.path.isfile(p) or os.path.islink(p): os.remove(p)
                    elif os.path.isdir(p): shutil.rmtree(p)
                except Exception:
                    pass
        else:
            os.makedirs(mp, exist_ok=True)
    except Exception as e:
        print(f'‚ö†Ô∏è Nettoyage mount point: {e}')

def _force_unmount():
    try:
        drive.flush_and_unmount()
    except Exception:
        pass
    try:
        os.system('fusermount -u /content/drive 2>/dev/null || true')
        os.system('umount /content/drive 2>/dev/null || true')
    except Exception:
        pass

_force_unmount(); time.sleep(1)
_safe_cleanup_mount_point(MOUNT_POINT); time.sleep(0.5)
try:
    drive.mount(MOUNT_POINT, force_remount=True)
    print('‚úÖ Drive mont√© (v2)')
except Exception as e:
    print(f'‚ùå drive.mount a √©chou√©: {e}')
    if 'Mountpoint must not already contain files' in str(e):
        try:
            shutil.rmtree(MOUNT_POINT, ignore_errors=True)
            os.makedirs(MOUNT_POINT, exist_ok=True)
            drive.mount(MOUNT_POINT, force_remount=True)
            print('‚úÖ Drive mont√© apr√®s recr√©ation du dossier')
        except Exception as e2:
            print(f'‚ö†Ô∏è Impossible de recr√©er {MOUNT_POINT}: {e2}')
            raise
base_path = '/content/drive/MyDrive/AlphaBot_ML_Training'
for sub in ('data', 'models', 'checkpoints', 'logs', 'exports'):
    os.makedirs(f'{base_path}/{sub}', exist_ok=True)
print(f'üìÅ R√©pertoires pr√™ts sous: {base_path}')
update_progress('cell_2_data_download')

## 3) Setup du d√©p√¥t AlphaBot et d√©pendances Colab

In [None]:
import subprocess, sys, os
from pathlib import Path
if not Path('/content/alphabot').exists():
    print('üì• Clonage du d√©p√¥t AlphaBot...')
    subprocess.run(['git', 'clone', 'https://github.com/thomy03/alphabot.git', '/content/alphabot'], check=True)
else:
    print('üìÇ D√©p√¥t AlphaBot d√©j√† pr√©sent')
print('üì¶ Installation des d√©pendances...')
try:
    subprocess.run(['pip', 'install', '-r', '/content/alphabot/requirements_colab.txt'], check=True)
    print('‚úÖ D√©pendances install√©es')
except subprocess.CalledProcessError as e:
    print(f'‚ö†Ô∏è Erreur requirements_colab.txt: {e}')
    for pkg in ['tensorflow','torch','transformers','sentence-transformers','faiss-cpu','yfinance','pandas','numpy','scikit-learn','matplotlib','seaborn','tqdm','requests']:
        try:
            subprocess.run(['pip', 'install', pkg], check=True)
            print(f'‚úÖ {pkg} install√©')
        except Exception as e2:
            print(f'‚ùå {pkg} √©chec: {e2}')
sys.path.append('/content'); sys.path.append('/content/alphabot')
update_progress('cell_3_data_analysis')

## 4) T√©l√©chargement des donn√©es (robuste)

In [None]:
import yfinance as yf, pandas as pd, numpy as np
from datetime import datetime, timedelta
import time, pickle, os
symbols = ['AAPL','GOOGL','MSFT','TSLA','AMZN']
end_date = datetime.now(); start_date = end_date - timedelta(days=365*2)
print(f'üì• T√©l√©chargement pour {symbols} sur 2 ans...')
def safe_download(symbol, start, end, tries=3, sleep_s=1.0):
    last_err=None
    for _ in range(tries):
        try:
            df = yf.download(symbol, start=start, end=end, auto_adjust=False)
            if df is not None and not df.empty: return df
        except Exception as e: last_err=e
        time.sleep(sleep_s)
    if last_err: print(f'‚ùå {symbol}: {last_err}')
    return None
all_data={}
for s in symbols:
    d=safe_download(s,start_date,end_date)
    if d is not None and not d.empty:
        all_data[s]=d; print(f'‚úÖ {s}: {len(d)} jours')
    else:
        print(f'‚ùå {s}: vide')
data_path=f'{base_path}/data/market_data.pkl'
os.makedirs(f'{base_path}/data',exist_ok=True)
with open(data_path,'wb') as f: pickle.dump(all_data,f)
print(f'üíæ Donn√©es sauvegard√©es: {data_path}')
update_progress('cell_4_pattern_training')

## 5) Entra√Ænement Pattern Detector ‚Äî L4 GPU sans cuDNN (non-fused) + fallback s√ªrs

In [None]:
import os
# D√©sactiver l'op√©rateur cuDNN RNN pour forcer le chemin g√©n√©rique compatible L4
os.environ['TF_KERAS_ALLOW_CUDNN_RNN']='0'
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import numpy as np, pickle, matplotlib.pyplot as plt
try:
    with open(f'{base_path}/data/market_data.pkl','rb') as f:
        all_data = pickle.load(f)
    print('‚úÖ Donn√©es charg√©es')
except Exception:
    print('‚ùå Donn√©es introuvables ‚Äî ex√©cutez la cellule 4')

def prepare_pattern_training_data(all_data):
    import pandas as pd
    def normalize(df):
        if isinstance(df.columns,pd.MultiIndex):
            df.columns=['_'.join([str(c) for c in col if c is not None]) for col in df.columns]
        else:
            df.columns=[str(c) for c in df.columns]
        picks={}
        picks['Close']=picks.get('Close') or next((c for c in df.columns if c.lower().replace(' ','').replace('-','_')=='close'),None)
        picks['Volume']=picks.get('Volume') or next((c for c in df.columns if c.lower()=='volume'),None)
        picks['High']=picks.get('High') or next((c for c in df.columns if c.lower()=='high'),None)
        picks['Low']=picks.get('Low') or next((c for c in df.columns if c.lower()=='low'),None)
        return picks
    X,y=[],[]
    for sym, df in all_data.items():
        try:
            if df is None or not hasattr(df,'empty') or df.empty: continue
            df=df.copy().sort_index(); cols=normalize(df)
            req=['Close','Volume','High','Low']
            if not all(cols.get(k) for k in req): continue
            close,vol,hi,lo=cols['Close'],cols['Volume'],cols['High'],cols['Low']
            df=df.dropna(subset=[close,vol,hi,lo]); n=len(df)
            if n<36: continue
            for i in range(0,n-35):
                seq=df.iloc[i:i+30]; fut=df.iloc[i+30:i+35]
                if seq[[close,vol,hi,lo]].isnull().any().any(): continue
                if fut[[close]].isnull().any().any(): continue
                c=np.asarray(seq[close].values,dtype=np.float32).reshape(-1,1)
                v=np.asarray(seq[vol].values,dtype=np.float32).reshape(-1,1)
                s=np.asarray((seq[hi]-seq[lo]).values,dtype=np.float32).reshape(-1,1)
                feat=np.concatenate([c,v,s],axis=1)
                if feat.shape!=(30,3): continue
                cur=float(seq[close].iloc[-1]);
                if cur==0: continue
                fm=float(np.mean(fut[close].values)); ret=(fm-cur)/cur
                label=2 if ret>0.02 else (0 if ret<-0.02 else 1)
                X.append(feat); y.append(label)
        except Exception:
            continue
    X=np.array(X,dtype=np.float32); y=np.array(y,dtype=np.int32)
    print(f'‚úÖ Pr√©paration: X={X.shape}, y={y.shape}')
    return X,y

X_train,y_train=prepare_pattern_training_data(all_data)
print(f'üìä √âchantillons: {X_train.shape[0]}')
strategy=tf.distribute.get_strategy()
with strategy.scope():
    inputs=tf.keras.Input(shape=(30,3),name='input')
    x=tf.keras.layers.BatchNormalization()(inputs)
    # LSTM non-cuDNN-friendly sur GPU (dropout/recurrent_dropout>0)
    x=tf.keras.layers.LSTM(64,return_sequences=False,activation='tanh',recurrent_activation='sigmoid',
                           use_bias=True,unit_forget_bias=True,unroll=False,
                           dropout=0.1,recurrent_dropout=0.1,
                           kernel_initializer='glorot_uniform',recurrent_initializer='orthogonal',
                           name='lstm_main_ncudnn')(x)
    x=tf.keras.layers.Dropout(0.3)(x)
    x=tf.keras.layers.Dense(32,activation='relu')(x)
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dropout(0.3)(x)
    outputs=tf.keras.layers.Dense(3,activation='softmax',name='output')(x)
    model=tf.keras.Model(inputs,outputs,name='l4_ncudnn_lstm_model')
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
print('‚úÖ Mod√®le L4 (non-cuDNN) cr√©√©')
X_train=X_train.astype(np.float32); y_train=y_train.astype(np.int32)
if X_train.shape[0]==0:
    print('‚ö†Ô∏è Dataset vide. G√©n√©ration synth√©tique minimale...')
    X_train=np.random.randn(256,30,3).astype(np.float32)
    y_train=np.random.randint(0,3,size=(256,)).astype(np.int32)
scaler=StandardScaler(); Xs=scaler.fit_transform(X_train.reshape(-1,3)).reshape(X_train.shape)
callbacks=[tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True,monitor='val_loss',verbose=1),
           tf.keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=3,monitor='val_loss',verbose=1)]
try:
    hist=model.fit(Xs,y_train,epochs=10,batch_size=64,validation_split=0.2,callbacks=callbacks,verbose=1)
    print('‚úÖ Entra√Ænement termin√©')
except Exception as e:
    print(f'‚ùå Erreur entra√Ænement (GPU non-cuDNN). Fallback Dense-only: {e}')
    with tf.distribute.get_strategy().scope():
        cpu_model=tf.keras.Sequential([
            tf.keras.layers.Input(shape=(30,3)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128,activation='relu'), tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(64,activation='relu'),  tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(32,activation='relu'),  tf.keras.layers.Dense(3,activation='softmax')
        ])
        cpu_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    hist=cpu_model.fit(Xs,y_train,epochs=8,batch_size=64,validation_split=0.2,verbose=1)
    model=cpu_model
    print('‚úÖ Mod√®le Dense-only entra√Æn√©')
import os
os.makedirs(f'{base_path}/models',exist_ok=True)
model.save(f'{base_path}/models/pattern_model_l4_ncudnn.keras')
with open(f'{base_path}/models/pattern_scaler.pkl','wb') as f: pickle.dump(scaler,f)
print('üíæ Mod√®le/scaler sauvegard√©s')
try:
    plt.figure(figsize=(12,4));
    if 'accuracy' in hist.history: plt.subplot(1,2,1); plt.plot(hist.history['accuracy']); 
    if 'val_accuracy' in hist.history: plt.plot(hist.history['val_accuracy']); plt.title('Accuracy'); plt.legend(['train','val'])
    if 'loss' in hist.history: plt.subplot(1,2,2); plt.plot(hist.history['loss']);
    if 'val_loss' in hist.history: plt.plot(hist.history['val_loss']); plt.title('Loss'); plt.legend(['train','val']); plt.tight_layout(); plt.show()
except Exception:
    pass
update_progress('cell_5_sentiment_training')

## 6) Entra√Ænement Sentiment Analyzer (FinBERT)

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import os
class SentimentDataset(Dataset):
    def __init__(self,texts,labels,tokenizer,max_length=128):
        self.texts=texts; self.labels=labels; self.tokenizer=tokenizer; self.max_length=max_length
    def __len__(self): return len(self.texts)
    def __getitem__(self,idx):
        t=str(self.texts[idx]); l=self.labels[idx]
        enc=self.tokenizer(t,truncation=True,padding='max_length',max_length=self.max_length,return_tensors='pt')
        return {'input_ids':enc['input_ids'].flatten(),'attention_mask':enc['attention_mask'].flatten(),'labels':torch.tensor(l)}
texts=[
    'Apple reports record quarterly earnings',
    'Google stock drops on regulatory concerns',
    'Tesla announces new battery technology',
    'Microsoft cloud growth exceeds expectations',
    'Amazon faces antitrust investigation']
labels=[2,0,2,2,0]
model_name='yiyanghkust/finbert-tone'
tok=AutoTokenizer.from_pretrained(model_name)
mdl=AutoModelForSequenceClassification.from_pretrained(model_name)
ds=SentimentDataset(texts,labels,tok)
args=TrainingArguments(output_dir=f'{base_path}/checkpoints/sentiment',num_train_epochs=1,per_device_train_batch_size=4,
                       logging_dir=f'{base_path}/logs/sentiment',logging_steps=1,save_strategy='no',evaluation_strategy='no')
trainer=Trainer(model=mdl,args=args,train_dataset=ds,tokenizer=tok)
print('üöÄ Entra√Ænement FinBERT...'); trainer.train()
os.makedirs(f'{base_path}/models',exist_ok=True)
mdl.save_pretrained(f'{base_path}/models/finbert_sentiment'); tok.save_pretrained(f'{base_path}/models/finbert_sentiment')
print('‚úÖ FinBERT sauvegard√©')
update_progress('cell_6_rag_training')

## 7) RAG Integrator (Embeddings + FAISS)

In [None]:
from sentence_transformers import SentenceTransformer
import faiss, numpy as np, pickle, os
docs=[
 'Apple Inc. is a technology company...',
 'Alphabet Inc. provides various products...',
 'Microsoft Corporation develops software...',
 'Amazon.com, Inc. engages in retail...',
 'Tesla, Inc. designs electric vehicles...']
emb=SentenceTransformer('all-MiniLM-L6-v2')
vecs=emb.encode(docs)
index=faiss.IndexFlatL2(vecs.shape[1]); index.add(vecs)
faiss.write_index(index,f'{base_path}/models/faiss_index.bin')
with open(f'{base_path}/models/documents.pkl','wb') as f: pickle.dump(docs,f)
print('‚úÖ Index/documents sauvegard√©s')
update_progress('cell_7_integration')

## 8) Int√©gration et tests rapides

In [None]:
import json, numpy as np, pickle
print('üîß Int√©gration finale...')
try:
    pm=tf.keras.models.load_model(f'{base_path}/models/pattern_model_l4_ncudnn.keras')
    print('‚úÖ Pattern model charg√©')
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    st=AutoTokenizer.from_pretrained(f'{base_path}/models/finbert_sentiment')
    sm=AutoModelForSequenceClassification.from_pretrained(f'{base_path}/models/finbert_sentiment')
    print('‚úÖ FinBERT charg√©')
    import faiss
    fx=faiss.read_index(f'{base_path}/models/faiss_index.bin')
    with open(f'{base_path}/models/documents.pkl','rb') as f: DD=pickle.load(f)
    print('‚úÖ FAISS/documents charg√©s')
except Exception as e:
    print(f'‚ùå Erreur chargement: {e}')

report={'timestamp': datetime.now().isoformat(),'models_trained':{'pattern':'ok','sentiment':'ok','rag':'ok'}}
os.makedirs(f'{base_path}/exports',exist_ok=True)
rp=f'{base_path}/exports/performance_report.json'
with open(rp,'w') as f: json.dump(report,f,indent=2)
print(f'üìä Rapport sauvegard√©: {rp}')
update_progress('cell_8_testing')
print('‚úÖ Pipeline termin√©')