In [None]:
import pandas as pd
import numpy as np
import math
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns

Définitions des fonctions statistiques et graphiques des caractéristiques

In [None]:
df = pd.read_csv("Plant_V_Seg_all_features.csv")

In [None]:
df.columns

In [None]:
# 1. Vérifier les doublons complets sur tout le DataFrame
n_dup = df.duplicated().sum()
print(f"Il y a {n_dup} doublons exacts dans df.")

In [None]:
# Sélectionne les colonnes quantitatives (float64)
quant_cols = df.select_dtypes(include=['float64']).columns

# Dictionnaire où on stocke les comptes d'outliers par classe pour chaque feature
outlier_stats = {}

for col in quant_cols:
    # Calcul des quartiles et de l'IQR pour la feature
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1

    lower = Q1 - 1.5 * IQR  # <- seuil bas, valeur numérique (float)
    upper = Q3 + 1.5 * IQR  # <- seuil haut, valeur numérique (float)

    # Masque booléen : True si outlier, False sinon
    mask_outlier = (df[col] < lower) | (df[col] > upper)

    # Compte le nombre d'outliers dans chaque classe
    saine = ((mask_outlier) & (df["Est_Saine"] == 1)).sum()
    malade = ((mask_outlier) & (df["Est_Saine"] == 0)).sum()
    outlier_stats[col] = {"saine": saine, "malade": malade}

    # Affiche pour la compréhension
    print(f"Feature: {col}\n  Q1: {Q1:.3f}, Q3: {Q3:.3f}, IQR: {IQR:.3f}")
    print(f"  lower: {lower:.3f}, upper: {upper:.3f}")
    print(f"  Outliers (saine): {saine}, Outliers (malade): {malade}\n")

Outliers

Nombre d’outliers par plante et par maladie

In [None]:
# Colonnes plante/maladie (booléens)
plant_cols = [col for col in df.columns if col.startswith('plant_')]
disease_cols = [col for col in df.columns if col.startswith('disease_')]

# Compter les outliers pour chaque plante
outliers_per_plant = {}
for plant in plant_cols:
    outliers_per_plant[plant.replace('plant_', '')] = df.loc[df[plant] & df['is_outlier']].shape[0]

# Pareil pour les maladies
outliers_per_disease = {}
for disease in disease_cols:
    outliers_per_disease[disease.replace('disease_', '')] = df.loc[df[disease] & df['is_outlier']].shape[0]

Visualisation interactive Plotly

In [None]:
fig = go.Figure()

# Vue plante
fig.add_trace(go.Bar(
    x=list(outliers_per_plant.keys()), 
    y=list(outliers_per_plant.values()),
    name="Plantes",
    visible=True
))
# Vue maladie
fig.add_trace(go.Bar(
    x=list(outliers_per_disease.keys()), 
    y=list(outliers_per_disease.values()),
    name="Maladies",
    visible=False
))

# Dropdown
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="Par Plante", method="update", args=[{"visible": [True, False]},
                                                                {"title": "Nombre d'outliers par Plante"}]),
                dict(label="Par Maladie", method="update", args=[{"visible": [False, True]},
                                                                 {"title": "Nombre d'outliers par Maladie"}]),
            ],
            direction="down"
        )
    ],
    title="Nombre d'outliers par Plante",
    xaxis_title="Plante/Maladie",
    yaxis_title="Nombre d'outliers"
)

fig.show()

In [None]:

quant_cols = df.select_dtypes(include=['float64']).columns

# Dictionnaire pour stocker les résultats
outlier_stats = {}

for col in quant_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    # Masque booléen des outliers sur cette feature
    mask_outlier = (df[col] < lower) | (df[col] > upper)
    # Comptage par classe
    saine = ((mask_outlier) & (df["Est_Saine"] == 1)).sum()
    malade = ((mask_outlier) & (df["Est_Saine"] == 0)).sum()
    outlier_stats[col] = {"saine": saine, "malade": malade}


In [None]:
# Création des traces pour chaque feature
features = list(outlier_stats.keys())
traces = []
for col in features:
    traces.append(go.Bar(
        x=["Saine", "Malade"],
        y=[outlier_stats[col]["saine"], outlier_stats[col]["malade"]],
        name=col,
        visible=False
    ))
# On affiche la première feature par défaut
traces[0].visible = True

fig = go.Figure(data=traces)

# Dropdown menu pour sélectionner la feature
dropdown_buttons = []
for i, col in enumerate(features):
    visible = [False] * len(features)
    visible[i] = True
    dropdown_buttons.append(
        dict(
            label=col,
            method="update",
            args=[{"visible": visible}, {"title": f"Outliers pour la feature '{col}'"}]
        )
    )

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=dropdown_buttons,
            direction="down",
            showactive=True,
        )
    ],
    title=f"Outliers pour la feature '{features[0]}'",
    xaxis_title="Classe",
    yaxis_title="Nombre d'outliers"
)

fig.show()
fig.write_html("outliers_par_feature.html")


In [None]:
features = list(outlier_stats.keys())
outliers_saine = [outlier_stats[feat]["saine"] for feat in features]
outliers_malade = [outlier_stats[feat]["malade"] for feat in features]

fig = go.Figure(data=[
    go.Bar(name='Saine', x=features, y=outliers_saine),
    go.Bar(name='Malade', x=features, y=outliers_malade)
])

# Change la disposition pour afficher les barres côte à côte
fig.update_layout(
    barmode='group',
    title="Nombre d'outliers par feature et par classe",
    xaxis_title="Feature",
    yaxis_title="Nombre d'outliers"
)

fig.show()


Histogrammes interactifs pour objectif 1 

In [None]:
# 0. Prépare plant_label 
plant_cols = [c for c in df.columns if c.startswith("plant_")]
mask = df[plant_cols].applymap(lambda x: x is True or x == 1)
idx = mask.values.argmax(axis=1)
df["plant_label"] = [plant_cols[i].replace("plant_", "") for i in idx]

# 1. Liste de features pour l’Objectif 1
features = [
    'aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7'
]

# 2. Prépare classes et palette
classes = pd.unique(df["plant_label"].dropna()).tolist()
palette = px.colors.qualitative.Plotly

# 3. Calcul des percentiles pour chaque feature
percentile_ranges = {}
for feat in features:
    arr_all = pd.to_numeric(df[feat], errors='coerce').dropna().values
    low, high = np.percentile(arr_all, [2.5, 97.5])
    percentile_ranges[feat] = (low, high)

# 4. Construis la figure et les traces invisibles
fig = go.Figure()
n_classes = len(classes)
for i, feat in enumerate(features):
    # calcul des bins communs
    vals_all = pd.to_numeric(df[feat], errors='coerce').dropna().values
    edges = np.histogram_bin_edges(vals_all, bins=25)
    centers = (edges[:-1] + edges[1:]) / 2
    
    for j, cls in enumerate(classes):
        vals = pd.to_numeric(df.loc[df["plant_label"]==cls, feat], errors='coerce').dropna().values
        counts, _ = np.histogram(vals, bins=edges)
        
        fig.add_trace(go.Bar(
            x=centers,
            y=counts,
            name=str(cls),
            marker_color=palette[j % len(palette)],
            visible=(i == 0),
            legendgroup=str(cls)
        ))

# 5. Prépare les boutons dropdown avec ajustement de l'axe X
buttons = []
for i, feat in enumerate(features):
    vis = [False] * (len(features) * n_classes)
    for j in range(n_classes):
        vis[i * n_classes + j] = True
    low, high = percentile_ranges[feat]
    buttons.append(dict(
        label=feat.replace("_"," "),
        method="update",
        args=[
            {"visible": vis},
            {
                "title": f"Histogramme de {feat.replace('_',' ')}",
                "xaxis": {"title": feat.replace("_"," "), "range": [low, high]}
            }
        ]
    ))

# 6. Finalise la mise en page
fig.update_layout(
    updatemenus=[dict(
        active=0,
        buttons=buttons,
        x=0.1, y=1.15,
        xanchor="left", yanchor="top"
    )],
    barmode="group",
    title=f"Histogramme de {features[0].replace('_',' ')}",
    xaxis_title=features[0].replace("_"," "),
    yaxis_title="Count",
    legend_title="Espèce",
    margin=dict(l=50, r=50, t=100, b=50)
)

fig.show()
fig.write_html("objectif1_histos.html", include_plotlyjs='cdn')


Histogrammes interactifs pour Objectif2

In [None]:
def plot_feature_dists_plotly(df, features, target, n_cols=3, nbins=30):
    # Récupère les classes dans l’ordre d’apparition (pas de tri)
    raw_cats   = pd.unique(df[target].dropna()).tolist()
    cat_labels = [str(c) for c in raw_cats]
    # Palette automatique
    palette = px.colors.qualitative.Plotly
    color_map = {lbl: palette[i % len(palette)] for i, lbl in enumerate(cat_labels)}

    n_plots = len(features)
    n_rows  = math.ceil(n_plots / n_cols)

    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[f.replace("_"," ") for f in features],
        horizontal_spacing=0.04, vertical_spacing=0.08
    )

    for idx, feat in enumerate(features):
        # Pré-calcule des bins communs
        arr = pd.to_numeric(df[feat], errors='coerce').dropna().values
        edges   = np.histogram_bin_edges(arr, bins=nbins)
        centers = (edges[:-1] + edges[1:]) / 2

        row = idx // n_cols + 1
        col = idx % n_cols + 1

        for cat, lbl in zip(raw_cats, cat_labels):
            vals = pd.to_numeric(df.loc[df[target]==cat, feat], errors='coerce').dropna().values
            counts, _ = np.histogram(vals, bins=edges)
            fig.add_trace(
                go.Bar(
                    x=centers,
                    y=counts,
                    name=lbl,
                    legendgroup=lbl,
                    marker_color=color_map[lbl],
                    opacity=0.8,
                    showlegend=(idx==0)
                ),
                row=row, col=col
            )

        fig.update_xaxes(title_text=feat, row=row, col=col)
        fig.update_yaxes(title_text="Count",  row=row, col=col)

    fig.update_layout(
        title_text=f"Distribution des features par '{target}'",
        height=300 * n_rows, width=350 * n_cols,
        barmode='group',
        legend=dict(title=target, x=1.02, y=1),
        margin=dict(l=50, r=150, t=80, b=50)
    )
    fig.show()
    fig.write_html("objectif2_histos.html", include_plotlyjs='cdn')

quant_vars_Est_Saine = [
    'aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7'
]
plot_feature_dists_plotly(df=df, features=quant_vars_Est_Saine, target='Est_Saine', n_cols=3, nbins=25)

Histogrammes interactifs objectif 3

In [None]:
# --- Préparation de 'disease_label' 
disease_cols = [c for c in df.columns if c.startswith("disease_")]
mask_d = df[disease_cols].applymap(lambda x: x is True or x == 1)
idx_d  = mask_d.values.argmax(axis=1)
df["disease_label"] = [disease_cols[i].replace("disease_", "") for i in idx_d]

# --- Features et classes ---
quant_vars_disease = ['aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7']

features = quant_vars_disease  
classes  = pd.unique(df["disease_label"].dropna()).tolist()
palette  = px.colors.qualitative.Plotly

# --- Calcul des percentiles pour limiter l’axe X ---
percentile_ranges = {
    feat: np.percentile(df[feat].dropna(), [2.5, 97.5])
    for feat in features
}

# --- Construction de la figure ---
fig = go.Figure()

# pour chaque feature et chaque classe, on ajoute une trace invisible
for i, feat in enumerate(features):
    edges = np.histogram_bin_edges(df[feat].dropna(), bins=25)
    centers = (edges[:-1] + edges[1:]) / 2
    for j, cls in enumerate(classes):
        vals = df.loc[df["disease_label"]==cls, feat].dropna()
        counts, _ = np.histogram(vals, bins=edges)
        fig.add_trace(go.Bar(
            x=centers, y=counts,
            name=str(cls),
            marker_color=palette[j % len(palette)],
            visible=False,
            legendgroup=str(cls)
        ))

# --- Dropdowns pour feature et classe ---
n_feat   = len(features)
n_class  = len(classes)
buttons  = []

# Dropdown 1 : choisir la feature
for i, feat in enumerate(features):
    vis_feat = [False]*(n_feat*n_class)
    # on souhaite montrer toutes les classes pour la feature i
    for j in range(n_class):
        vis_feat[i*n_class + j] = True
    low, high = percentile_ranges[feat]
    buttons.append(dict(
        method="update",
        label=feat.replace("_"," "),
        args=[{"visible": vis_feat},
              {"xaxis": {"range":[low,high], "title":feat.replace("_"," ")},
               "title": f"Histogramme de {feat} par maladie"}]
    ))

# Dropdown 2 : (optionnel) choisir la classe seule
# … similaire, on crée un autre set de boutons …

fig.update_layout(
    updatemenus=[dict(active=0, buttons=buttons, x=0, y=1.2)],
    barmode="group",
    margin=dict(l=50, r=50, t=100, b=50),
    height=500, width=800
)

fig.show()
fig.write_html("objectif3_histos.html", include_plotlyjs='cdn')


Alternative objectif 3 avec boxplot

In [None]:
# --- Préparation de 'disease_label' 
disease_cols = [c for c in df.columns if c.startswith("disease_")]
mask_d = df[disease_cols].applymap(lambda x: x is True or x == 1)
idx_d  = mask_d.values.argmax(axis=1)
df["disease_label"] = [disease_cols[i].replace("disease_", "") for i in idx_d]

quant_vars_disease = ['aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7']


plots_per_file = 4
n_files = math.ceil(len(quant_vars_disease) / plots_per_file)
classes = pd.unique(df["disease_label"].dropna()).tolist()
palette = px.colors.qualitative.Plotly
color_map = {cls: palette[i % len(palette)] for i, cls in enumerate(classes)}

for k in range(n_files):
    vars_this = quant_vars_disease[k*plots_per_file : (k+1)*plots_per_file]
    n = len(vars_this)
    n_cols = 2
    n_rows = (n + n_cols - 1) // n_cols

    # Augmente l'espacement vertical (jusqu'à 0.30 si besoin)
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[v.replace("_", " ") for v in vars_this],
        vertical_spacing=0.25,  # espace élevé entre les lignes !
        horizontal_spacing=0.10
    )

    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[v.replace("_", " ") for v in vars_this],
        vertical_spacing=0.14, horizontal_spacing=0.10
    )

    for i, feat in enumerate(vars_this):
        row = i // n_cols + 1
        col = i % n_cols + 1
        for j, cls in enumerate(classes):
            y_vals = df.loc[df["disease_label"] == cls, feat]
            fig.add_trace(
                go.Box(
                    y=y_vals,
                    name=str(cls),
                    marker_color=color_map[cls],
                    boxmean='sd',
                    showlegend=(i==0)
                ),
                row=row, col=col
            )
        fig.update_yaxes(title_text=feat.replace("_", " "), row=row, col=col)
        fig.update_xaxes(title_text="Maladie", row=row, col=col)

    fig.update_layout(
        title=f"Boxplots maladies – Partie {k+1}/{n_files}",
        height=600 * n_rows,
        width=900 * n_cols,
        margin=dict(l=40, r=40, t=100, b=60),
        legend=dict(title="Maladie", orientation="v", x=1.03, y=1)
    )

    fig.show()
    html_name = f"Boxplot{k+1}.html"
    fig.write_html(html_name, include_plotlyjs='cdn')
    print(f"→ Sauvegardé : {html_name}")


distribution des features conditionnée à la classe cible

In [None]:
# liste de features 
features = ['aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7']


targets = ["plant_label", "Est_Saine", "disease_label"]

# Préparer toutes les valeurs possibles pour chaque cible
target_labels = {
    t: sorted(df[t].dropna().unique()) for t in targets
}

# On crée toutes les traces (feature x target x classe)
fig = go.Figure()
visibility = []
for i, feat in enumerate(features):
    for j, target in enumerate(targets):
        classes = target_labels[target]
        for k, cls in enumerate(classes):
            # Filtrer selon classe
            data = df.loc[df[target] == cls, feat].dropna().values
            fig.add_trace(go.Histogram(
                x=data,
                nbinsx=30,
                name=f"{feat} - {target}={cls}",
                visible=(i == 0 and j == 0),  # seuls les premiers affichés par défaut
                legendgroup=f"{target}_{cls}",
                opacity=0.6
            ))
            visibility.append(i == 0 and j == 0)

# Préparer les boutons de menu déroulant pour features
n_feats = len(features)
n_targets = len(targets)
menu_buttons = []
for i, feat in enumerate(features):
    for j, target in enumerate(targets):
        classes = target_labels[target]
        vis = []
        for fi in range(len(features)):
            for ti in range(len(targets)):
                for ki in range(len(target_labels[targets[ti]])):
                    # On rend visible uniquement pour la combinaison courante
                    vis.append(fi == i and ti == j)
        menu_buttons.append(dict(
            method="update",
            label=f"{feat} | {target}",
            args=[
                {"visible": vis},
                {"title": f"Distribution de {feat} conditionnée à {target}"}
            ]
        ))

fig.update_layout(
    updatemenus=[dict(
        buttons=menu_buttons,
        direction="down",
        x=0.1, y=1.15,
        xanchor="left", yanchor="top"
    )],
    barmode="overlay",
    title=f"Distribution de {features[0]} conditionnée à {targets[0]}",
    xaxis_title="Valeurs",
    yaxis_title="Effectif",
    height=600, width=900,
    margin=dict(l=50, r=50, t=100, b=50)
)

fig.show()
fig.write_html("distributions_features_targets.html", include_plotlyjs='cdn')


Analyse des corrélations : matrice de corrélation (Heatmap)

In [None]:
# Sélection des features numériques
numerical_features = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

# On enlève éventuellement les colonnes "ID_Image" et les colonnes one-hot 'plant_' et 'disease_' pour la lisibilité
numerical_to_plot = [col for col in numerical_features if not (col.startswith('plant_') or col.startswith('disease_') or col == 'is_black' or col == 'ID_Image')]

# Matrice de corrélation
corr_matrix = df[numerical_to_plot].corr()

# Affichage sous forme de heatmap
plt.figure(figsize=(16,12))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0)
plt.title('Matrice de corrélation des variables numériques')
plt.show()

# extraire les features les plus corrélées avec “Est_Saine”
corrs = corr_matrix['Est_Saine'].drop('Est_Saine').sort_values(key=abs, ascending=False)
print(corrs.head(10))


Heatmap des moyennes par classe

In [None]:
# Moyenne des features numériques par plante
features = ['aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7']

pivot = df.groupby('plant_label')[features].mean()

plt.figure(figsize=(20, 8))
sns.heatmap(pivot, annot=True, cmap="viridis")
plt.title("Heatmap des moyennes des features par plante")
plt.show()

In [None]:
# Heatmap normalisée


# --- 1. Sélection des features numériques ---
features = ['aire', 'périmètre', 'circularité', 'excentricité', 'aspect_ratio', 'netteté',
    'contour_density', 'mean_R', 'mean_G', 'mean_B', 'std_R', 'std_G', 'std_B','mean_H','mean_S','mean_V', 'hog_mean', 'hog_std',
    'hog_entropy', 'fft_energy', 'fft_entropy', 'fft_low_freq_power',
    'fft_high_freq_power', 'hu_1', 'hu_2', 'hu_3', 'hu_4', 'hu_5', 'hu_6', 'hu_7']


groupings = {
    "Par maladie": "disease_label",
    "Par plante": "plant_label",
    "Par statut (Est_Saine)": "Est_Saine"
}

# --- 2. Calcul des matrices standardisées pour chaque groupement ---
heatmaps = {}
for title, group_col in groupings.items():
    pivot = df.groupby(group_col)[features].mean()
    pivot_std = (pivot - pivot.mean()) / pivot.std()
    heatmaps[title] = pivot_std

# --- 3. Création de la figure avec tous les heatmaps (un seul visible au départ) ---
fig = go.Figure()

for i, (title, matrix) in enumerate(heatmaps.items()):
    fig.add_trace(
        go.Heatmap(
            z=matrix.values,
            x=matrix.columns,
            y=matrix.index.astype(str),
            coloraxis="coloraxis",
            visible=(i == 0)
        )
    )

# --- 4. Ajout du menu déroulant ---
buttons = []
for i, title in enumerate(heatmaps.keys()):
    visible = [False] * len(heatmaps)
    visible[i] = True
    buttons.append(
        dict(
            label=title,
            method="update",
            args=[{"visible": visible},
                  {"title": f"Heatmap standardisée (z-score) des features<br>{title}"}]
        )
    )

fig.update_layout(
    coloraxis={'colorscale': 'RdBu', 'cmin': -2.5, 'cmax': 2.5},
    updatemenus=[
        dict(
            buttons=buttons,
            direction="down",
            showactive=True,
            x=1.02, xanchor="left",
            y=1, yanchor="top"
        )
    ],
    title=f"Heatmap standardisée (z-score) des features<br>Par maladie",
    xaxis_title="Feature",
    yaxis_title="Classe",
    height=700,
    width=1200
)

fig.show()
fig.write_html("heatmap_features_grouped.html")
print(" Heatmap interactive enregistrée sous 'heatmap_features_grouped.html'")
