# Load data

In [None]:
import os
import random
import pandas as pd


dataset_path = "/Users/nathanserfaty/.cache/kagglehub/datasets/borismarjanovic/price-volume-data-for-all-us-stocks-etfs/versions/3"
subdirs = ["Stocks", "ETFs"]


available_subdirs = {subdir: os.path.join(dataset_path, subdir) for subdir in subdirs if os.path.exists(os.path.join(dataset_path, subdir))}


stock_files = os.listdir(available_subdirs["Stocks"]) if "Stocks" in available_subdirs else []
etf_files = os.listdir(available_subdirs["ETFs"]) if "ETFs" in available_subdirs else []

def load_data_with_dates(files, subdir):
    subdir_path = available_subdirs.get(subdir, None)
    asset_data = []

    if subdir_path:
        for file in files:
            file_path = os.path.join(subdir_path, file)

            if file.endswith(".txt"):
                try:
                    if os.path.getsize(file_path) > 0:
                        df = pd.read_csv(file_path)

                        if 'Date' in df.columns:
                            df['Date'] = pd.to_datetime(df['Date'])
                            df = df.sort_values(by='Date')

                            # Ajouter l'asset comme colonne
                            df['Asset'] = file.replace('.txt', '')

                            # Ajouter aux données filtrées
                            asset_data.append({
                                'df': df,
                                'start_date': df['Date'].min(),
                                'end_date': df['Date'].max()
                            })
                except pd.errors.EmptyDataError:
                    print(f"⚠️ Fichier vide ignoré : {file_path}")

    return asset_data


stocks_data = load_data_with_dates(stock_files, "Stocks")
etfs_data = load_data_with_dates(etf_files, "ETFs")


max_date_stocks = max([data['end_date'] for data in stocks_data])
max_date_etfs = max([data['end_date'] for data in etfs_data])


filtered_stocks = [
    data['df'] for data in stocks_data
    if pd.Timestamp("1960-01-01") <= data['start_date'] <= pd.Timestamp("1990-12-31")
    and data['end_date'] == max_date_stocks
]

filtered_etfs = [
    data['df'] for data in etfs_data
    if pd.Timestamp("1995-01-01") <= data['start_date'] <= pd.Timestamp("2005-12-31")
    and data['end_date'] == max_date_etfs
]

print(f"📊 Nombre d'actions disponibles après filtrage : {len(filtered_stocks)}")
print(f"📊 Nombre d'ETFs disponibles après filtrage : {len(filtered_etfs)}")


filtered_stocks = random.sample(filtered_stocks, min(60, len(filtered_stocks)))
filtered_etfs = random.sample(filtered_etfs, min(160, len(filtered_etfs)))


df_stocks = pd.concat(filtered_stocks, ignore_index=True) if filtered_stocks else pd.DataFrame()
df_etfs = pd.concat(filtered_etfs, ignore_index=True) if filtered_etfs else pd.DataFrame()


print("✅ Nombre d'actions retenues :", df_stocks['Asset'].nunique() if not df_stocks.empty else 0)
print("✅ Nombre d'ETFs retenus :", df_etfs['Asset'].nunique() if not df_etfs.empty else 0)

df_stocks.head(), df_etfs.head()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(20, 10))
sns.scatterplot(x=df_stocks['Date'], y=df_stocks['Asset'], alpha=0.5, color="blue")
plt.title("Répartition des Actions par Date")
plt.xlabel("Date")
plt.ylabel("Actions")

# Réduire le nombre de labels pour éviter le chevauchement
plt.yticks(rotation=0, fontsize=8)
plt.xticks(rotation=45)

plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

plt.figure(figsize=(20, 10))
sns.scatterplot(x=df_etfs['Date'], y=df_etfs['Asset'], alpha=0.5, color="red")
plt.title("Répartition des ETFs par Date")
plt.xlabel("Date")
plt.ylabel("ETFs")

# Réduire le nombre de labels pour éviter le chevauchement
plt.yticks(rotation=0, fontsize=8)
plt.xticks(rotation=45)

plt.grid(True, linestyle="--", alpha=0.5)
plt.show()


In [None]:
print(df_stocks.columns)
print(df_etfs.columns)


# Analyse de données


In [182]:
import matplotlib.pyplot as plt
import seaborn as sns

# Supprimer les valeurs manquantes
df_stocks_clean = df_stocks.dropna()
df_etfs_clean = df_etfs.dropna()


In [None]:
print("Colonnes disponibles dans df_stocks:", df_stocks_clean.columns)
print("Colonnes disponibles dans df_etfs:", df_etfs_clean.columns)


In [None]:
df_stocks_perf = df_stocks_clean.groupby("Asset")['Close'].agg(['first', 'last'])
df_stocks_perf['Return'] = (df_stocks_perf['last'] - df_stocks_perf['first']) / df_stocks_perf['first'] * 100
df_stocks_perf = df_stocks_perf.sort_values(by="Return", ascending=False).head(10)

df_etfs_perf = df_etfs_clean.groupby("Asset")['Close'].agg(['first', 'last'])
df_etfs_perf['Return'] = (df_etfs_perf['last'] - df_etfs_perf['first']) / df_etfs_perf['first'] * 100
df_etfs_perf = df_etfs_perf.sort_values(by="Return", ascending=False).head(10)

# Affichage des performances des actions et ETFs
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.barplot(x=df_stocks_perf.index, y=df_stocks_perf['Return'], ax=axes[0], palette="Blues_r")
axes[0].set_title("Top 10 des actions les plus performantes (%)")
axes[0].set_xticklabels(df_stocks_perf.index, rotation=45)

sns.barplot(x=df_etfs_perf.index, y=df_etfs_perf['Return'], ax=axes[1], palette="Greens_r")
axes[1].set_title("Top 10 des ETFs les plus performants (%)")
axes[1].set_xticklabels(df_etfs_perf.index, rotation=45)

plt.tight_layout()
plt.show()


In [None]:
volatility_stocks = df_stocks_clean.groupby("Asset")['Close'].std().mean()
volatility_etfs = df_etfs_clean.groupby("Asset")['Close'].std().mean()

plt.figure(figsize=(8, 6))
sns.barplot(x=["Actions", "ETFs"], y=[volatility_stocks, volatility_etfs], palette=["blue", "green"])
plt.title("Volatilité moyenne des actions et des ETFs")
plt.ylabel("Écart-type du prix de clôture")
plt.show()


In [None]:
# Recharger les bibliothèques nécessaires après la réinitialisation
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Redéfinir les DataFrames (df_stocks_clean et df_etfs_clean) manquants après la réinitialisation
# L'utilisateur doit fournir à nouveau les fichiers si nécessaire

# Calcul de la médiane des prix de clôture pour les actions et les ETFs
median_stock_price = df_stocks_clean['Close'].median()
median_etf_price = df_etfs_clean['Close'].median()

# Affichage sous forme de graphique
plt.figure(figsize=(8, 6))
sns.barplot(x=["Actions", "ETFs"], y=[median_stock_price, median_etf_price], palette=["blue", "green"])
plt.title("Comparaison de la médiane des prix de clôture des actions et des ETFs")
plt.ylabel("Médiane du prix de clôture")
plt.show()

# Affichage des valeurs numériques pour plus de clarté
median_stock_price, median_etf_price


In [None]:
# Distribution de la médiane des prix de clôture par entreprise
plt.figure(figsize=(12, 6))
sns.histplot(df_stocks_clean.groupby("Asset")['Close'].median(), bins=50, kde=True, color="blue", label="Actions")
sns.histplot(df_etfs_clean.groupby("Asset")['Close'].median(), bins=50, kde=True, color="green", label="ETFs")
plt.title("Distribution des médianes des prix de clôture par entreprise (Actions vs ETFs)")
plt.xlabel("Médiane du prix de clôture")
plt.ylabel("Fréquence")
plt.legend()
plt.show()

# Boxplot de la médiane des prix de clôture par entreprise
plt.figure(figsize=(12, 6))
sns.boxplot(data=[df_stocks_clean.groupby("Asset")['Close'].median(), df_etfs_clean.groupby("Asset")['Close'].median()], palette=["blue", "green"])
plt.xticks([0, 1], ["Actions", "ETFs"])
plt.title("Boxplot des médianes des prix de clôture (Actions vs ETFs)")
plt.ylabel("Médiane du prix de clôture")
plt.show()

# Distribution de la volatilité (écart-type) des prix de clôture par entreprise
plt.figure(figsize=(12, 6))
sns.histplot(df_stocks_clean.groupby("Asset")['Close'].std(), bins=50, kde=True, color="blue", label="Actions")
sns.histplot(df_etfs_clean.groupby("Asset")['Close'].std(), bins=50, kde=True, color="green", label="ETFs")
plt.title("Distribution de la volatilité des prix de clôture par entreprise (Actions vs ETFs)")
plt.xlabel("Écart-type du prix de clôture")
plt.ylabel("Fréquence")
plt.legend()
plt.show()

# Boxplot de la volatilité des prix de clôture par entreprise
plt.figure(figsize=(12, 6))
sns.boxplot(data=[df_stocks_clean.groupby("Asset")['Close'].std(), df_etfs_clean.groupby("Asset")['Close'].std()], palette=["blue", "green"])
plt.xticks([0, 1], ["Actions", "ETFs"])
plt.title("Boxplot de la volatilité des prix de clôture (Actions vs ETFs)")
plt.ylabel("Écart-type du prix de clôture")
plt.show()


In [None]:
import scipy.stats as stats
import numpy as np

# Sélectionner les 10 meilleures actions et ETFs
top_10_stocks = df_stocks_clean[df_stocks_clean['Asset'].isin(df_stocks_perf.index)]
top_10_etfs = df_etfs_clean[df_etfs_clean['Asset'].isin(df_etfs_perf.index)]

# Créer des graphiques de distribution pour chaque actif du top 10
fig, axes = plt.subplots(5, 2, figsize=(14, 20))

for i, asset in enumerate(df_stocks_perf.index):
    data = top_10_stocks[top_10_stocks['Asset'] == asset]['Close']
    sns.histplot(data, bins=30, kde=True, ax=axes[i//2, i%2], color="blue")
    axes[i//2, i%2].set_title(f"Distribution du prix de clôture - {asset} (Action)")

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(5, 2, figsize=(14, 20))

for i, asset in enumerate(df_etfs_perf.index):
    data = top_10_etfs[top_10_etfs['Asset'] == asset]['Close']
    sns.histplot(data, bins=30, kde=True, ax=axes[i//2, i%2], color="green")
    axes[i//2, i%2].set_title(f"Distribution du prix de clôture - {asset} (ETF)")

plt.tight_layout()
plt.show()

# Tester l'ajustement aux lois normales
distributions = ['norm', 'lognorm', 'expon', 'gamma']
fitted_results = {}

for asset in df_stocks_perf.index:
    data = top_10_stocks[top_10_stocks['Asset'] == asset]['Close']
    best_fit = {}
    
    for dist_name in distributions:
        dist = getattr(stats, dist_name)
        params = dist.fit(data)
        ks_stat, p_value = stats.kstest(data, dist_name, args=params)
        best_fit[dist_name] = p_value

    fitted_results[asset] = max(best_fit, key=best_fit.get)

for asset in df_etfs_perf.index:
    data = top_10_etfs[top_10_etfs['Asset'] == asset]['Close']
    best_fit = {}
    
    for dist_name in distributions:
        dist = getattr(stats, dist_name)
        params = dist.fit(data)
        ks_stat, p_value = stats.kstest(data, dist_name, args=params)
        best_fit[dist_name] = p_value

    fitted_results[asset] = max(best_fit, key=best_fit.get)

# Affichage des résultats
fitted_results


# Machine Learning

In [190]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Création des features

In [216]:
# Moyennes Mobiles
df_stocks_clean['SMA_10'] = df_stocks_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=10).mean())
df_stocks_clean['SMA_50'] = df_stocks_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=50).mean())

df_etfs_clean['SMA_10'] = df_etfs_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=10).mean())
df_etfs_clean['SMA_50'] = df_etfs_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=50).mean())

# Volatilité (Écart-type sur 10 jours)
df_stocks_clean['Volatility_10'] = df_stocks_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=10).std())
df_etfs_clean['Volatility_10'] = df_etfs_clean.groupby('Asset')['Close'].transform(lambda x: x.rolling(window=10).std())

# Momentum
df_stocks_clean['Momentum_10'] = df_stocks_clean['Close'] - df_stocks_clean.groupby('Asset')['Close'].shift(10)
df_etfs_clean['Momentum_10'] = df_etfs_clean['Close'] - df_etfs_clean.groupby('Asset')['Close'].shift(10)

# RSI (Relative Strength Index)
def compute_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

df_stocks_clean['RSI_14'] = df_stocks_clean.groupby('Asset')['Close'].transform(lambda x: compute_rsi(x))
df_etfs_clean['RSI_14'] = df_etfs_clean.groupby('Asset')['Close'].transform(lambda x: compute_rsi(x))


# Définition de la Target

In [217]:
df_stocks_clean['Target'] = (df_stocks_clean.groupby('Asset')['Close'].shift(-1) > df_stocks_clean['Close']).astype(int)
df_etfs_clean['Target'] = (df_etfs_clean.groupby('Asset')['Close'].shift(-1) > df_etfs_clean['Close']).astype(int)

# Suppression des valeurs NaN après les transformations
df_stocks_clean.dropna(inplace=True)
df_etfs_clean.dropna(inplace=True)

# Feature selection + corr Matrix (presentation)

In [None]:
features = ['Open', 'High', 'Low', 'Close', 'SMA_10', 'SMA_50', 'Volatility_10', 'Momentum_10', 'RSI_14']
corr_matrix_stocks = df_stocks_clean[features].corr()
corr_matrix_etfs = df_etfs_clean[features].corr()
selected_features_stocks = features
selected_features_etfs = features

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(corr_matrix_stocks, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5, ax=axes[0])
axes[0].set_title("Matrice de Corrélation - Actions (Stocks)")

sns.heatmap(corr_matrix_etfs, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5, ax=axes[1])
axes[1].set_title("Matrice de Corrélation - ETFs")

# Ajustement de l'affichage
plt.tight_layout()
plt.show()


# Split data

In [219]:
X_stocks = df_stocks_clean[selected_features_stocks]
y_stocks = df_stocks_clean['Target']
X_etfs = df_etfs_clean[selected_features_etfs]
y_etfs = df_etfs_clean['Target']

X_train_stocks, X_test_stocks, y_train_stocks, y_test_stocks = train_test_split(X_stocks, y_stocks, test_size=0.2, random_state=42, shuffle=False)
X_train_etfs, X_test_etfs, y_train_etfs, y_test_etfs = train_test_split(X_etfs, y_etfs, test_size=0.2, random_state=42, shuffle=False)


# Répartition de la target par asset et etf

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Comptage du nombre de 0 et 1 dans Target groupé par Asset
target_distribution = df_stocks_clean.groupby("Asset")["Target"].value_counts().unstack(fill_value=0)

# Renommage des colonnes pour plus de clarté
target_distribution.columns = ["Target_0", "Target_1"]

# Affichage du DataFrame
print(target_distribution)  # Affiche les 10 premières lignes pour éviter trop d'affichage

# Visualisation avec un barplot empilé
plt.figure(figsize=(15, 6))
target_distribution.plot(kind="bar", stacked=True, figsize=(15, 6), width=0.8)
plt.title("Répartition des cibles (Target) par Asset, action")
plt.xlabel("Asset")
plt.ylabel("Nombre d'occurrences")
plt.legend(["Target 0", "Target 1"])
plt.xticks(rotation=90)  # Rotation des labels pour plus de lisibilité
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Comptage du nombre de 0 et 1 dans Target groupé par Asset
target_distribution = df_etfs_clean.groupby("Asset")["Target"].value_counts().unstack(fill_value=0)

# Renommage des colonnes pour plus de clarté
target_distribution.columns = ["Target_0", "Target_1"]

# Affichage du DataFrame
print(target_distribution)  # Affiche les 10 premières lignes pour éviter trop d'affichage

# Visualisation avec un barplot empilé
plt.figure(figsize=(15, 6))
target_distribution.plot(kind="bar", stacked=True, figsize=(15, 6), width=0.8)
plt.title("Répartition des cibles (Target) par etf")
plt.xlabel("etf")
plt.ylabel("Nombre d'occurrences")
plt.legend(["Target 0", "Target 1"])
plt.xticks(rotation=90)  # Rotation des labels pour plus de lisibilité
plt.show()


# Entrainement des modèles

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

models_stocks = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}

print("\n📊 Résultats des modèles - Actions (Stocks) 📊")
for name, model in models_stocks.items():
    model.fit(X_train_stocks, y_train_stocks)
    y_pred = model.predict(X_test_stocks)
    
    acc = accuracy_score(y_test_stocks, y_pred)
    precision = precision_score(y_test_stocks, y_pred)
    recall = recall_score(y_test_stocks, y_pred)
    f1 = f1_score(y_test_stocks, y_pred)
    
    print(f"{name} - Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")

models_etfs = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}

print("\n📊 Résultats des modèles - ETFs 📊")
for name, model in models_etfs.items():
    model.fit(X_train_etfs, y_train_etfs)
    y_pred = model.predict(X_test_etfs)
    
    acc = accuracy_score(y_test_etfs, y_pred)
    precision = precision_score(y_test_etfs, y_pred)
    recall = recall_score(y_test_etfs, y_pred)
    f1 = f1_score(y_test_etfs, y_pred)
    
    print(f"{name} - Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")


# Optimisation avec grid search

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

param_grids = {
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10, 100],  # Régularisation
        "solver": ["lbfgs", "liblinear"]  # Algorithme de résolution
    },
    "KNN": {
        "n_neighbors": [7, 9],  # Nombre de voisins
        "weights": ["uniform", "distance"],  # Poids des voisins
        "metric": ["euclidean", "manhattan"]  # Distance utilisée
    },
    "Random Forest": {
        "n_estimators": [100, 200],  # Nombre d'arbres
        "max_depth": [10, 20],  # Profondeur des arbres
        "min_samples_split": [5, 10],  # Nombre min d'échantillons pour diviser un nœud
        "min_samples_leaf": [2]  # Nombre min d'échantillons par feuille
    },
    "XGBoost": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1],
        "max_depth": [5, 7],
        "subsample": [1]  # Ratio d'échantillonnage des données
    }
}

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}

def optimize_and_evaluate(model_name, model, param_grid, X_train, y_train, X_test, y_test):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring="accuracy", n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return {
        "Modèle": model_name,
        "Meilleurs Paramètres": str(grid_search.best_params_),
        "Accuracy": round(acc, 4),
        "Precision": round(precision, 4),
        "Recall": round(recall, 4),
        "F1-score": round(f1, 4)
    }

print("\n🔍 Optimisation et évaluation des modèles - Actions (Stocks) 🔍")
results_stocks = []
for name, model in models.items():
    results_stocks.append(optimize_and_evaluate(name, model, param_grids[name], X_train_stocks, y_train_stocks, X_test_stocks, y_test_stocks))

print("\n🔍 Optimisation et évaluation des modèles - ETFs 🔍")
results_etfs = []
for name, model in models.items():
    results_etfs.append(optimize_and_evaluate(name, model, param_grids[name], X_train_etfs, y_train_etfs, X_test_etfs, y_test_etfs))

df_results_stocks = pd.DataFrame(results_stocks)
df_results_etfs = pd.DataFrame(results_etfs)

print("\n📊 Résultats des modèles - Actions (Stocks)")
print(df_results_stocks)

print("\n📊 Résultats des modèles - ETFs")
print(df_results_etfs)

def plot_results(df, title):
    plt.figure(figsize=(10, 6))
    df_plot = df.melt(id_vars=["Modèle"], value_vars=["Accuracy", "Precision", "Recall", "F1-score"])
    sns.barplot(x="Modèle", y="value", hue="variable", data=df_plot, palette="viridis")
    
    plt.title(title)
    plt.xlabel("Modèle")
    plt.ylabel("Score")
    plt.ylim(0, 1)
    plt.xticks(rotation=45)
    plt.legend(title="Métrique")
    plt.show()

plot_results(df_results_stocks, "📊 Comparaison des modèles - Actions (Stocks)")

plot_results(df_results_etfs, "📊 Comparaison des modèles - ETFs")

# Feature Importance + Matrice de confusion

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix

feature_models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}

for name, model in feature_models.items():
    model.fit(X_train_stocks, y_train_stocks)  # Entraînement du modèle
    importance = model.feature_importances_
    
    plt.figure(figsize=(10, 5))
    sns.barplot(x=X_train_stocks.columns, y=importance, palette="Blues_r")
    plt.xticks(rotation=45)
    plt.title(f"Feature Importance ({name}) - Actions (Stocks)")
    plt.xlabel("Features")
    plt.ylabel("Importance")
    plt.show()

for name, model in feature_models.items():
    model.fit(X_train_etfs, y_train_etfs)  # Entraînement du modèle
    importance = model.feature_importances_
    
    plt.figure(figsize=(10, 5))
    sns.barplot(x=X_train_etfs.columns, y=importance, palette="Reds_r")
    plt.xticks(rotation=45)
    plt.title(f"Feature Importance ({name}) - ETFs")
    plt.xlabel("Features")
    plt.ylabel("Importance")
    plt.show()

def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="coolwarm", xticklabels=["0", "1"], yticklabels=["0", "1"])
    plt.xlabel("Prédictions")
    plt.ylabel("Vraies valeurs")
    plt.title(title)
    plt.show()

for name, model in models_stocks.items():
    model.fit(X_train_stocks, y_train_stocks)
    y_pred = model.predict(X_test_stocks)
    plot_confusion_matrix(y_test_stocks, y_pred, f"Matrice de Confusion - {name} (Stocks)")

for name, model in models_etfs.items():
    model.fit(X_train_etfs, y_train_etfs)
    y_pred = model.predict(X_test_etfs)
    plot_confusion_matrix(y_test_etfs, y_pred, f"Matrice de Confusion - {name} (ETFs)")
