In [59]:
import numpy as np
import gradio as gr
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import csv
import os

In [61]:
# Load the data
def load_and_prepare_data():
    url = "https://raw.githubusercontent.com/juliocezarcarneiro/diabetes-risk-prediction-model/main/Resources/diabetes_binary_5050split_health_indicators_BRFSS2015.csv"
    df = pd.read_csv(url)
    X = df.drop("Diabetes_binary", axis=1)
    y = df["Diabetes_binary"].ravel()
    return X, y, df

# Display the data
display(df.head())

Unnamed: 0,Diabetes_binary,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
0,0.0,1.0,0.0,1.0,26.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,3.0,5.0,30.0,0.0,1.0,4.0,6.0,8.0
1,0.0,1.0,1.0,1.0,26.0,1.0,1.0,0.0,0.0,1.0,...,1.0,0.0,3.0,0.0,0.0,0.0,1.0,12.0,6.0,8.0
2,0.0,0.0,0.0,1.0,26.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,1.0,0.0,10.0,0.0,1.0,13.0,6.0,8.0
3,0.0,1.0,1.0,1.0,28.0,1.0,0.0,0.0,1.0,1.0,...,1.0,0.0,3.0,0.0,3.0,0.0,1.0,11.0,6.0,8.0
4,0.0,0.0,0.0,1.0,29.0,1.0,0.0,0.0,1.0,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,8.0,5.0,8.0


In [63]:
# Train model
def train_model(X_train, y_train):
    model = RandomForestClassifier(
        n_estimators=200,
        max_depth=10,
        class_weight='balanced',
        random_state=42
    )
    model.fit(X_train, y_train)
    return model

In [65]:


def create_gradio_interface(model, scaler, feature_names):
   
    # BMI calculation block
    def update_bmi(weight, height):
        bmi = (weight / (height ** 2)) * 703
        return round(bmi, 1)

    # Prediction block with CSV logging
    def predict(weight, height, age, high_bp, gen_hlth, high_chol):
        bmi = (weight / (height ** 2)) * 703

        input_values = np.zeros(len(feature_names))
        input_values[feature_names.index('BMI')] = bmi
        input_values[feature_names.index('Age')] = age
        input_values[feature_names.index('HighBP')] = high_bp
        input_values[feature_names.index('GenHlth')] = gen_hlth
        input_values[feature_names.index('HighChol')] = high_chol
        input_values[feature_names.index('CholCheck')] = 1
        input_values[feature_names.index('Smoker')] = 0

        scaled_input = scaler.transform([input_values])
        proba = model.predict_proba(scaled_input)[0]

        risk_level = "High" if proba[1] > 0.5 else "Low"
        prob = round(proba[1] * 100, 1)
        conf = round(max(proba) * 100, 1)

        # Save user input and prediction to CSV
        filename = "user_predictions.csv"
        file_exists = os.path.isfile(filename)
        with open(filename, mode='a', newline='') as file:
            writer = csv.writer(file)
            if not file_exists:
                writer.writerow([
                    "weight", "height", "bmi", "age", "high_bp", 
                    "gen_hlth", "high_chol", "risk_level", "probability", "confidence"
                ])
            writer.writerow([
                weight, height, round(bmi, 1), age, high_bp, gen_hlth, high_chol, risk_level, prob, conf
            ])

        return risk_level, prob, conf, round(bmi, 1)

    # Blocks UI
    with gr.Blocks() as demo:
        gr.Markdown("## 🩺 Diabetes Risk Predictor\n_BMI is auto-calculated from your weight and height_")

        with gr.Row():
            weight_input = gr.Slider(50, 400, step=0.1, label="Weight (lbs)", value=150)
            height_input = gr.Slider(48, 84, step=0.1, label="Height (inches)", value=65)
            bmi_output = gr.Number(label="BMI (auto-calculated)", interactive=False)

        weight_input.change(fn=update_bmi, inputs=[weight_input, height_input], outputs=bmi_output)
        height_input.change(fn=update_bmi, inputs=[weight_input, height_input], outputs=bmi_output)

        age_input = gr.Dropdown(list(range(18, 100)), label="Age", value=45)
        high_bp_input = gr.Radio([0, 1], label="High Blood Pressure (0=No, 1=Yes)", value=0)
        gen_hlth_input = gr.Slider(1, 5, step=1, label="General Health (1=Poor, 5=Excellent)", value=3)
        high_chol_input = gr.Radio([0, 1], label="High Cholesterol (0=No, 1=Yes)", value=0)

        predict_btn = gr.Button("Predict Risk")

        risk_label = gr.Label(label="Risk Level")
        prob_output = gr.Number(label="Diabetes Probability (%)")
        conf_output = gr.Number(label="Confidence Score (%)")
        bmi_display = gr.Number(label="Calculated BMI")

        predict_btn.click(
            fn=predict,
            inputs=[weight_input, height_input, age_input, high_bp_input, gen_hlth_input, high_chol_input],
            outputs=[risk_label, prob_output, conf_output, bmi_display]
        )

    return demo


# Execute block
if __name__ == "__main__":
    X, y, df = load_and_prepare_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = train_model(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    print(f"✅ Model Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print("\n🔍 Top 5 Features:")
    print(pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False).head(5))

    # Save artifacts
    joblib.dump(model, "diabetes_model.pkl")
    joblib.dump(scaler, "scaler.pkl")
    joblib.dump(X.columns.tolist(), "feature_names.pkl")

    # Launch Gradio
    demo = create_gradio_interface(model, scaler, X.columns.tolist())
    demo.launch(share=True)


  y = df["Diabetes_binary"].ravel()


✅ Model Accuracy: 0.75

🔍 Top 5 Features:
GenHlth     0.231163
HighBP      0.211973
BMI         0.134882
Age         0.098457
HighChol    0.088772
dtype: float64
* Running on local URL:  http://127.0.0.1:7868
* Running on public URL: https://911cf4116bc76d5aca.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
