In [12]:
# advanced_stroke_app.py
# Full fixed Gradio app — copy / run

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
DATA_PATH = "/content/healthcare-dataset-stroke-data (1).csv"
df = pd.read_csv(DATA_PATH)
df = df.dropna().reset_index(drop=True)
categorical_cols = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
encoders = {}
for col in categorical_cols:
    enc = LabelEncoder()
    enc.fit(df[col].astype(str))   # fit on original string values
    encoders[col] = enc
    df[col] = enc.transform(df[col].astype(str))
X = df.drop(['id', 'stroke'], axis=1)
y = df['stroke']
FEATURE_ORDER = X.columns.tolist()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
log_reg = LogisticRegression(max_iter=2000)
log_reg.fit(X_train, y_train)

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
def safe_encode(encoder, value):
    """Encode a string value using fitted LabelEncoder. If unseen, return 0 (fallback)."""
    try:
        # value might already be numeric (0/1), handle that
        if isinstance(value, (int, np.integer, float, np.floating)):
            return int(value)
        return int(encoder.transform([str(value)])[0])
    except Exception:
        # fallback: choose the most common class (index 0) to avoid crash
        return 0

def make_progress_html(prob):
    """Return a small inline progress bar HTML showing probability%."""
    pct = float(prob) * 100.0
    # color gradient: green < 30%, orange < 70%, red otherwise
    if pct < 30:
        color = "#28a745"
    elif pct < 70:
        color = "#ffc107"
    else:
        color = "#dc3545"
    bar = f"""
    <div style="width:100%; background:#eee; border-radius:8px; padding:2px;">
      <div style="width:{pct:.2f}%; background:{color}; height:18px; border-radius:6px;"></div>
    </div>
    <div style="font-size:0.9rem; margin-top:4px;">Risk: {pct:.2f}%</div>
    """
    return bar

# ---------- PREDICTION FUNCTION ----------
def predict_stroke(
    gender, age, hypertension, heart_disease, ever_married, work_type,
    residence_type, avg_glucose_level, bmi, smoking_status, model_choice
):
    """Return (readable_text, probability_percent_number, progress_html)"""
    try:
        # encode all inputs using the same encoders used at training
        encoded = {
            'gender': safe_encode(encoders['gender'], gender),
            'age': float(age),
            'hypertension': int(hypertension),
            'heart_disease': int(heart_disease),
            'ever_married': safe_encode(encoders['ever_married'], ever_married),
            'work_type': safe_encode(encoders['work_type'], work_type),
            'Residence_type': safe_encode(encoders['Residence_type'], residence_type),
            'avg_glucose_level': float(avg_glucose_level),
            'bmi': float(bmi),
            'smoking_status': safe_encode(encoders['smoking_status'], smoking_status)
        }

        # Ensure columns in correct order -> build a DataFrame with FEATURE_ORDER
        input_df = pd.DataFrame([encoded])[FEATURE_ORDER]

        # scale
        input_scaled = scaler.transform(input_df)

        # choose model
        model = log_reg if model_choice == "Logistic Regression" else rf

        pred = int(model.predict(input_scaled)[0])
        prob = float(model.predict_proba(input_scaled)[0][1])  # probability of class 1 (stroke)

        if pred == 1:
            text = f"⚠️ High risk of stroke — probability {prob*100:.2f}%"
        else:
            text = f"✅ Low risk of stroke — probability {prob*100:.2f}%"

        html = make_progress_html(prob)
        return text, round(prob*100, 2), html

    except Exception as e:
        # return error info so user can see what went wrong in the UI
        msg = f"Error during prediction: {e}"
        return msg, 0.0, f"<div style='color:red'>{msg}</div>"

# ---------- VISUALS FOR INSIGHTS ----------
def show_confusion_matrix():
    cm = confusion_matrix(y_test, rf.predict(X_test))
    fig, ax = plt.subplots(figsize=(4.5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Confusion Matrix (Random Forest)")
    plt.tight_layout()
    return fig

def show_roc_curve():
    y_prob = rf.predict_proba(X_test)[:,1]
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    auc = roc_auc_score(y_test, y_prob)
    fig, ax = plt.subplots(figsize=(5,4))
    ax.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
    ax.plot([0,1],[0,1], '--', color='gray')
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.set_title("ROC Curve (Random Forest)")
    ax.legend()
    plt.tight_layout()
    return fig

def show_feature_importance():
    importances = rf.feature_importances_
    feat_df = pd.DataFrame({'Feature': FEATURE_ORDER, 'Importance': importances}).sort_values('Importance', ascending=False)
    fig, ax = plt.subplots(figsize=(6,4.5))
    sns.barplot(x='Importance', y='Feature', data=feat_df, ax=ax)
    ax.set_title("Feature Importance (Random Forest)")
    plt.tight_layout()
    return fig

# ---------- GRADIO UI ----------
gender_options = list(encoders['gender'].classes_)
ever_married_options = list(encoders['ever_married'].classes_)
work_type_options = list(encoders['work_type'].classes_)
residence_type_options = list(encoders['Residence_type'].classes_)
smoking_status_options = list(encoders['smoking_status'].classes_)

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 Stroke Prediction (Fixed & Improved)")
    gr.Markdown("Enter patient details and choose a model. The app returns readable result, probability, and a progress bar.")

    with gr.Row():
        gender = gr.Dropdown(choices=gender_options, value=gender_options[0], label="Gender")
        age = gr.Slider(minimum=0, maximum=120, value=45, step=1, label="Age")
        bmi = gr.Number(value=25.0, label="BMI")

    with gr.Row():
        hypertension = gr.Radio([0,1], value=0, label="Hypertension (0=No, 1=Yes)")
        heart_disease = gr.Radio([0,1], value=0, label="Heart Disease (0=No, 1=Yes)")
        ever_married = gr.Dropdown(choices=ever_married_options, value=ever_married_options[0], label="Ever Married")

    with gr.Row():
        work_type = gr.Dropdown(choices=work_type_options, value=work_type_options[0], label="Work Type")
        residence_type = gr.Dropdown(choices=residence_type_options, value=residence_type_options[0], label="Residence Type")
        smoking_status = gr.Dropdown(choices=smoking_status_options, value=smoking_status_options[0], label="Smoking Status")

    avg_glucose_level = gr.Slider(minimum=40, maximum=300, value=100, step=1, label="Average Glucose Level")
    model_choice = gr.Radio(["Logistic Regression", "Random Forest"], value="Random Forest", label="Model")

    predict_btn = gr.Button("🔍 Predict")
    output_text = gr.Textbox(label="Prediction (readable)")
    prob_num = gr.Number(label="Probability (%)")
    progress_html = gr.HTML()

    predict_btn.click(
        fn=predict_stroke,
        inputs=[gender, age, hypertension, heart_disease, ever_married, work_type,
                residence_type, avg_glucose_level, bmi, smoking_status, model_choice],
        outputs=[output_text, prob_num, progress_html]
    )

    gr.Markdown("### 📊 Model Insights")
    with gr.Row():
        cm_plot = gr.Plot()
        roc_plot = gr.Plot()
        feat_plot = gr.Plot()

    gr.Button("Show Confusion Matrix").click(show_confusion_matrix, inputs=None, outputs=cm_plot)
    gr.Button("Show ROC Curve").click(show_roc_curve, inputs=None, outputs=roc_plot)
    gr.Button("Show Feature Importance").click(show_feature_importance, inputs=None, outputs=feat_plot)
if __name__ == "__main__":
    demo.launch(share=False)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>