In [None]:
# ============================================================
# === üß† IA M√©dicale ‚Äî D√©tection du Diab√®te  ===
# ============================================================

!pip -q install imbalanced-learn xgboost shap gradio

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import shap
import gradio as gr
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score, classification_report,
    roc_auc_score, roc_curve, precision_recall_curve,
    average_precision_score, confusion_matrix
)
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from google.colab import drive

RANDOM_STATE = 42
plt.style.use("seaborn-v0_8-whitegrid")

# === 1Ô∏è‚É£ Charger les donn√©es ===
def load_data():
    drive.mount("/content/drive", force_remount=True)
    path = "/content/drive/MyDrive/IA_Diabete/diabetes.csv"
    if not os.path.exists(path):
        raise FileNotFoundError(f"‚ö†Ô∏è Fichier introuvable : {path}")
    df = pd.read_csv(path)
    print("‚úÖ Fichier charg√© :", path)
    print("Shape :", df.shape)
    return df

# === 2Ô∏è‚É£ Nettoyage et features ===
def clean_and_engineer(df):
    df = df.copy()
    cols_zero = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
    for c in cols_zero:
        df[c] = df[c].replace(0, np.nan).fillna(df[c].median())
    df["Glucose_Insulin_ratio"] = df["Glucose"] / np.clip(df["Insulin"], 1, None)
    df["BMI2"] = df["BMI"] ** 2
    df["Glucose_BMI"] = df["Glucose"] * df["BMI"]
    df["Glucose_Age"] = df["Glucose"] / np.clip(df["Age"], 1, None)
    df["Pregnancies_Age"] = df["Pregnancies"] * df["Age"]
    return df

# === 3Ô∏è‚É£ Pr√©parer les donn√©es ===
def prepare_xy(df):
    X = df.drop(columns=["Outcome"])
    y = df["Outcome"].astype(int)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=RANDOM_STATE
    )
    sm = SMOTE(random_state=RANDOM_STATE)
    X_train_b, y_train_b = sm.fit_resample(X_train, y_train)
    return X, y, X_train_b, X_test, y_train_b, y_test

# === 4Ô∏è‚É£ Entra√Ænement optimis√© ===
def tune_xgb(X_train, y_train):
    xgb = XGBClassifier(eval_metric="logloss", random_state=RANDOM_STATE, n_estimators=400)
    params = {
        "max_depth": [3, 4, 5],
        "learning_rate": [0.03, 0.05, 0.1],
        "subsample": [0.7, 0.9, 1.0],
        "colsample_bytree": [0.7, 0.9, 1.0],
        "min_child_weight": [1, 3, 5],
        "gamma": [0, 0.5, 1.0],
    }
    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
    pipe = Pipeline([("scaler", StandardScaler()), ("model", xgb)])
    rs = RandomizedSearchCV(pipe, param_distributions={
        "model__" + k: v for k, v in params.items()
    }, n_iter=15, cv=cv, scoring="roc_auc", n_jobs=-1, random_state=RANDOM_STATE)
    rs.fit(X_train, y_train)
    print(f"‚úÖ Meilleur AUC (CV) : {rs.best_score_:.3f}")
    return rs.best_estimator_

# === 5Ô∏è‚É£ √âvaluation du mod√®le ===
def evaluate(model, X_test, y_test):
    y_proba = model.predict_proba(X_test)[:, 1]
    y_pred = (y_proba >= 0.5).astype(int)

    print("\nüéØ Performance du mod√®le :")
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("ROC AUC :", roc_auc_score(y_test, y_proba))
    print(classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    print("Matrice de confusion :\n", cm)

    # Courbe ROC
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    plt.figure(figsize=(6,4))
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_test, y_proba):.3f}")
    plt.plot([0, 1], [0, 1], "--", color="gray")
    plt.title("Courbe ROC")
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

    # Courbe Precision-Recall
    precision, recall, _ = precision_recall_curve(y_test, y_proba)
    ap = average_precision_score(y_test, y_proba)
    plt.figure(figsize=(6,4))
    plt.plot(recall, precision, label=f"AP = {ap:.3f}")
    plt.title("Courbe Pr√©cision / Rappel")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.show()

# === 6Ô∏è‚É£ Sauvegarde du pipeline ===
def save_pipeline(pipe):
    path = "/content/drive/MyDrive/IA_Diabete/diabetes_pipeline_prof.pkl"
    joblib.dump(pipe, path)
    print("‚úÖ Mod√®le sauvegard√© :", path)

# === 7Ô∏è‚É£ Pipeline complet ===
def main():
    df = load_data()
    df = clean_and_engineer(df)
    X, y, X_train_b, X_test, y_train_b, y_test = prepare_xy(df)
    best_pipe = tune_xgb(X_train_b, y_train_b)
    evaluate(best_pipe, X_test, y_test)
    save_pipeline(best_pipe)
    return best_pipe, X_test

best_pipe, X_test = main()

# ========= FONCTION DE PR√âDICTION CORRIG√âE =========
def predict_diabetes(Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI,
                     DiabetesPedigreeFunction, Age):
    """Pr√©diction avec analyse SHAP fonctionnelle"""

    # Cr√©ation du dataframe patient
    df_patient = pd.DataFrame([{
        "Pregnancies": Pregnancies,
        "Glucose": Glucose,
        "BloodPressure": BloodPressure,
        "SkinThickness": SkinThickness,
        "Insulin": Insulin,
        "BMI": BMI,
        "DiabetesPedigreeFunction": DiabetesPedigreeFunction,
        "Age": Age,
        "Glucose_Insulin_ratio": Glucose / max(Insulin, 1),
        "BMI2": BMI ** 2,
        "Glucose_BMI": Glucose * BMI,
        "Glucose_Age": Glucose / max(Age, 1),
        "Pregnancies_Age": Pregnancies * Age
    }])[X_test.columns]

    # Pr√©diction
    scaler = best_pipe.named_steps["scaler"]
    model = best_pipe.named_steps["model"]
    X_scaled = scaler.transform(df_patient)
    y_prob = float(model.predict_proba(X_scaled)[0, 1])
    y_pred = int(y_prob >= 0.5)

    # Rapport m√©dical
    verdict = "ü©∫ **Diab√©tique**" if y_pred == 1 else "‚úÖ **Non diab√©tique**"
    niveau = "√©lev√©" if y_prob > 0.7 else ("mod√©r√©" if y_prob > 0.4 else "faible")
    couleur = "üî¥" if y_prob > 0.7 else ("üü†" if y_prob > 0.4 else "üü¢")

    markdown = f"""
## {couleur} Diagnostic IA : {verdict}

**Probabilit√© de diab√®te :** {y_prob*100:.1f}%
**Niveau de risque :** {niveau}

### üìã R√©sum√© des param√®tres :
- **Glyc√©mie** : {Glucose} mg/dL
- **IMC** : {BMI:.1f} kg/m¬≤
- **√Çge** : {Age} ans
- **Insuline** : {Insulin} ¬µU/mL
"""

    # ========= G√âN√âRATION DU GRAPHIQUE SHAP =========
    try:
        explainer = shap.Explainer(model)
        shap_values = explainer(X_scaled)

        # Cr√©ation du dataframe pour les impacts
        shap_df = pd.DataFrame({
            "Feature": X_test.columns,
            "Impact": shap_values.values[0]
        }).sort_values("Impact", key=abs, ascending=False).head(10)

        # Cr√©ation du graphique
        plt.figure(figsize=(8, 5))
        colors = ["#00bfa6" if v > 0 else "#ff6b6b" for v in shap_df["Impact"]]
        plt.barh(shap_df["Feature"], shap_df["Impact"], color=colors)
        plt.title("üß† Analyse SHAP ‚Äî Impact des variables (Leith Chqoubi)", fontsize=13, pad=10)
        plt.xlabel("Influence sur le risque de diab√®te", fontsize=11)
        plt.gca().invert_yaxis()
        plt.tight_layout()

        # Sauvegarde du graphique
        shap_path = "/tmp/shap_patient.png"
        plt.savefig(shap_path, bbox_inches="tight", dpi=120)
        plt.close()

        return markdown, round(y_prob * 100, 2), shap_path

    except Exception as e:
        print(f"‚ùå Erreur SHAP : {e}")
        return markdown + f"\n\n‚ö†Ô∏è Erreur lors de la g√©n√©ration SHAP : {e}", round(y_prob * 100, 2), None

# ========= FONCTION SHAP GLOBALE =========
def global_shap_summary():
    """G√©n√®re l'analyse SHAP globale du mod√®le"""
    try:
        X_sample = X_test.sample(min(200, len(X_test)), random_state=42)
        xgb_model = best_pipe.named_steps["model"]
        scaler = best_pipe.named_steps["scaler"]
        X_scaled = scaler.transform(X_sample)

        explainer = shap.Explainer(xgb_model)
        shap_values = explainer(X_scaled)

        plt.figure(figsize=(10, 6))
        shap.summary_plot(shap_values.values, X_sample, show=False)
        plt.title("üìä Analyse SHAP globale du mod√®le XGBoost", fontsize=14, pad=15)

        shap_global_path = "/tmp/shap_global.png"
        plt.savefig(shap_global_path, bbox_inches="tight", dpi=120)
        plt.close()

        return shap_global_path

    except Exception as e:
        print(f"‚ùå Erreur SHAP global : {e}")
        return None

# ========= INTERFACE GRADIO =========
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="gray")) as app:
    gr.HTML("""
    <div style='text-align:center; margin-bottom:20px'>
        <h1>üß† IA M√©dicale ‚Äî D√©tection du Diab√®te</h1>
        <h3>Analyse intelligente d√©velopp√©e par <b style='color:#00bfa6'>Leith Chqoubi</b></h3>
        <p>Mod√®le XGBoost optimis√© (SMOTE + standardisation + SHAP explainer)</p>
        <hr style='margin:10px 0'>
    </div>
    """)

    with gr.Tab("üîç Pr√©diction IA"):
        with gr.Row():
            with gr.Column(scale=1):
                Pregnancies = gr.Slider(0, 15, 2, step=1, label="Grossesses (Pregnancies)")
                Glucose = gr.Slider(50, 220, 120, step=1, label="Glyc√©mie (mg/dL)")
                BloodPressure = gr.Slider(40, 120, 70, step=1, label="Pression art√©rielle (mmHg)")
                SkinThickness = gr.Slider(5, 80, 25, step=1, label="√âpaisseur de peau (mm)")
                Insulin = gr.Slider(0, 400, 80, step=1, label="Insuline (¬µU/mL)")
                BMI = gr.Slider(15.0, 60.0, 28.5, step=0.1, label="IMC (kg/m¬≤)")
                DPF = gr.Slider(0.0, 2.5, 0.5, step=0.01, label="Facteur h√©r√©ditaire (DPF)")
                Age = gr.Slider(18, 90, 35, step=1, label="√Çge")
                btn = gr.Button("‚ö° Lancer l'analyse IA", variant="primary")

            with gr.Column(scale=1):
                out_text = gr.Markdown(label="üß© Rapport m√©dical IA")
                out_prob = gr.Number(label="Probabilit√© (%)", precision=2)
                out_img = gr.Image(label="Explication SHAP (impact des variables)")

        btn.click(
            predict_diabetes,
            inputs=[Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DPF, Age],
            outputs=[out_text, out_prob, out_img]
        )

    with gr.Tab("üìä Analyse Interpr√©table SHAP"):
        gr.Markdown("""
        ### üåê Analyse globale du mod√®le XGBoost
        Ce graphique montre **l'influence moyenne de chaque variable** sur la pr√©diction du diab√®te.
        Les points rouges indiquent des valeurs √©lev√©es, les bleus des valeurs faibles.
        """)

        shap_btn = gr.Button("üìà G√©n√©rer l'analyse SHAP compl√®te", variant="primary")
        shap_img = gr.Image(label="SHAP Summary Plot (vue globale du mod√®le)")

        shap_btn.click(fn=global_shap_summary, inputs=[], outputs=[shap_img])

    gr.HTML("""
    <hr>
    <div style='text-align:center; font-size:14px; color:gray'>
        D√©velopp√© avec üí° Intelligence Artificielle ‚Äî <b>Leith Chqoubi</b><br>
        <i>Prototype acad√©mique de diagnostic IA m√©dicale (XGBoost + SHAP)</i>
    </div>
    """)

app.launch(share=True)

# Nouvelle section

# Nouvelle section