In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
import plotly.express as px
import plotly.graph_objects as go
from skimage.feature.texture import graycomatrix, graycoprops
from skimage.measure import label, regionprops
from skimage.measure import moments_hu
from tqdm import tqdm
from sklearn.cluster import DBSCAN
from scipy.stats import spearmanr
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from collections import defaultdict
import shutil
import hashlib

Définitions des fonctions statistiques et graphiques des caractéristiques

In [None]:
df = pd.read_csv("/workspaces/datasciencetest_reco_plante/notebooks/plant_V_Seg_all_features.csv")
df.head()

In [None]:
df.columns

In [None]:
# Analyse statistique - Violinplot interactif Plotly (distribution par classe)
def plot_violin_interactive(df, feature, classe='nom_plante'):
    fig = px.violin(df, y=feature, x=classe, box=True, points="all", hover_data=df.columns,
        title=f"Distribution de {feature} par {classe}")
    fig.update_layout(xaxis_tickangle=-45)
    fig.show()

In [None]:
# Analyse statistique - Boxplot interactif Plotly
def plot_box_interactive(df, feature, classe='nom_plante'):
    fig = px.box(df, x=classe, y=feature, points="all", title=f"Boxplot de {feature} par {classe}",hover_data=df.columns)
    fig.update_layout(xaxis_tickangle=-45)
    fig.show()

In [None]:
# Analyse statistique - Histogramme (par classe ou global)
def plot_hist_interactive(df, feature, classe='Est_Saine'):
    fig = px.histogram(
        df, x=feature, color=classe,
        marginal="box",  # ou "rug"
        hover_data=df.columns,
        title=f"Histogramme de {feature} (par {classe})"
    )
    fig.show()

In [None]:
# Analyse statistique - Matrice de corrélation interactive (Plotly heatmap)
def plot_corr_heatmap_interactive(root_dir_img, colonnes_features, method='spearman'):
    corr = root_dir_img[colonnes_features].corr(method=method)
    fig = px.imshow(
        corr,
        text_auto=True,
        aspect='auto',
        color_continuous_scale='RdBu_r',
        title=f"Matrice de corrélation ({method})"
    )
    fig.update_layout(width=1000, height=800)
    fig.show()

In [None]:
def correlation_with_target(df, colonnes_features, target):
    le = LabelEncoder()
    y = le.fit_transform(df[target])
    corr_result = {}
    for feat in colonnes_features:
        corr, _ = spearmanr(df[feat], y)
        corr_result[feat] = corr
    corr_df = pd.DataFrame.from_dict(corr_result, orient='index', columns=['Corrélation'])
    corr_df = corr_df.sort_values(by='Corrélation', ascending=False)
    display(corr_df)

In [None]:
# Graphiques recommandés pour Objectif 3 : Identifier la maladie (sur feuilles malades uniquement)
# Comparer les nom_maladie uniquement sur les lignes où Est_Saine == 0

df_malade = df[df["Est_Saine"] == 0]

plot_box_interactive(df_malade, feature='mean_B', classe='nom_maladie')
plot_violin_interactive(df_malade, feature='contrast', classe='nom_maladie')
plot_violin_interactive(df_malade, feature='hu_4', classe='nom_maladie')