# Pipeline Principal - Prédiction des Allocations d'Actifs

Ce notebook orchestre l'ensemble des modules pour une prédiction complète et modulaire.

In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

# Imports des modules
from data_loading import load_data, explore_data
from data_preparation import prepare_data, split_temporal_data, prepare_features_for_modeling
from feature_engineering import create_all_features
from exploratory_analysis import analyze_correlations, plot_correlations, analyze_features_by_class
from model_training import train_base_models, create_results_dataframe
from model_training import train_lightgbm
from cross_validation import perform_time_series_cross_validation, plot_cv_results, get_feature_importance
from predictions import train_final_model, generate_predictions_with_baseline
from utils import plot_model_comparison, print_summary_report

print("✓ Tous les modules importés avec succès")

## ÉTAPE 1: Chargement et Exploration des Données

In [None]:
X_train, y_train, X_test, sample_submission = load_data()
explore_data(X_train, y_train, X_test)

## ÉTAPE 2: Préparation des Données

In [None]:
train_data, X_test = prepare_data(X_train, y_train, X_test)

## ÉTAPE 3: Ingénierie des Features

In [None]:
train_data, X_test, all_features = create_all_features(train_data, X_test)

## ÉTAPE 4: Analyse Exploratoire

In [None]:
correlations = analyze_correlations(train_data, all_features)
plot_correlations(train_data, all_features)
comparison_df = analyze_features_by_class(train_data, all_features, correlations)

## ÉTAPE 5: Préparation pour la Modélisation

In [None]:
X, y, X_test_features = prepare_features_for_modeling(train_data, X_test, all_features)

# Split temporel
train_mask, test_mask = split_temporal_data(train_data)

X_train_split = X[train_mask]
y_train_split = y[train_mask]
X_val_split = X[test_mask]
y_val_split = y[test_mask]

## ÉTAPE 6: Entraînement des Modèles de Base

In [None]:
results = train_base_models(X_train_split, y_train_split, X_val_split, y_val_split)
summary_df = create_results_dataframe(results)

# Visualisation
fig, ax = plot_model_comparison(summary_df)
plt.show()

## ÉTAPE 7: Cross-Validation

In [None]:
cv_scores, cv_models, mean_score, std_score = perform_time_series_cross_validation(
    X, y, train_data, all_features
)

# Plot results
plot_cv_results(cv_scores, mean_score, std_score)

# Feature importance
feature_importance_df = get_feature_importance(cv_models, all_features)

## ÉTAPE 8: Prédictions Finales

In [None]:
# Train final model
model_final = train_final_model(X, y, all_features)

# Generate predictions
predictions, submissions_info = generate_predictions_with_baseline(
    X, y, X_test_features, model_final, cv_models, sample_submission
)

## ÉTAPE 9: Résumé Exécutif

In [None]:
print_summary_report(
    X, y, X_test_features, all_features, 
    cv_scores, std_score,
    feature_importance_df, summary_df, submissions_info
)