<a href="https://colab.research.google.com/github/saket1923/Multiple-Disease-Prediction-Model/blob/main/ProjectFinal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required libraries
!pip install gradio scikit-learn pandas numpy matplotlib seaborn plotly -q



In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report
import gradio as gr
import traceback
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import logging



In [3]:
# Configure logging for debugging
logging.basicConfig(level=logging.DEBUG)


In [4]:
# Global variables
global_model = None
evaluation_results = ""
feature_names = ['Age', 'BMI', 'BloodPressure', 'Cholesterol', 'Glucose',
                 'Exercise', 'Smoking', 'FamilyHistory']
data_summary = None


In [5]:
# Enhanced training function with corrected output handling
def load_and_train_model(file, progress=gr.Progress()):
    global global_model, evaluation_results, data_summary
    try:
        progress(0, "Reading CSV file...")
        logging.debug(f"File input received: {file}")
        if file is None or not hasattr(file, 'name'):
            return "Error: Please upload a valid CSV file", None

        df = pd.read_csv(file.name)

        required_columns = feature_names + ['Diabetes', 'HeartDisease', 'Stroke']
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            return f"Error: Missing columns: {', '.join(missing_columns)}", None

        progress(0.2, "Preparing data...")
        X = df[feature_names]
        y = df[['Diabetes', 'HeartDisease', 'Stroke']]

        # Data summary
        data_summary = f"Dataset Size: {len(df)} records\n"
        data_summary += f"Features: {', '.join(feature_names)}\n"
        data_summary += "Target Variables: Diabetes, HeartDisease, Stroke\n"

        if len(df) > 10000:
            df = df.sample(n=10000, random_state=42)
            X = X.loc[df.index]
            y = y.loc[df.index]

        progress(0.4, "Splitting data...")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        progress(0.6, "Training model...")
        rf = RandomForestClassifier(n_estimators=100, max_depth=15, n_jobs=-1, random_state=42)
        model = MultiOutputClassifier(rf)
        model.fit(X_train, y_train)

        # Feature importance visualization
        feature_importance_fig = plt.figure(figsize=(10, 6))
        importances = np.mean([est.feature_importances_ for est in model.estimators_], axis=0)
        sns.barplot(x=importances, y=feature_names)
        plt.title("Feature Importance Across All Predictions")
        plt.xlabel("Importance Score")

        progress(0.8, "Evaluating model...")
        y_pred = model.predict(X_test)
        cv_scores = cross_val_score(model, X, y, cv=5)

        eval_results = f"Cross-validation Score: {cv_scores.mean():.2f} (±{cv_scores.std()*2:.2f})\n\n"
        for i, disease in enumerate(['Diabetes', 'HeartDisease', 'Stroke']):
            acc = accuracy_score(y_test.iloc[:, i], y_pred[:, i])
            eval_results += f"\n**{disease} Prediction**\n"
            eval_results += f"Accuracy: {acc:.2f}\n"
            eval_results += f"Detailed Report:\n{classification_report(y_test.iloc[:, i], y_pred[:, i])}\n"

        global_model = model
        evaluation_results = eval_results
        progress(1.0, "Complete!")
        return data_summary + "\nModel trained successfully!\n" + eval_results, feature_importance_fig

    except Exception as e:
        logging.error(f"Training error: {str(e)}")
        return f"Error during training: {str(e)}\n{traceback.format_exc()}", None


In [6]:
# Enhanced prediction function with corrected output handling
def predict_disease(age, bmi, blood_pressure, cholesterol, glucose, exercise, smoking, family_history):
    if global_model is None:
        return "⚠️ Please train a model first", None, "⚠️ No suggestions available", "⚠️ No risk summary available"

    try:
        input_data = pd.DataFrame(
            [[age, bmi, blood_pressure, cholesterol, glucose, exercise, int(smoking), int(family_history)]],
            columns=feature_names
        )
        prediction = global_model.predict(input_data)[0]
        prob = global_model.predict_proba(input_data)

        # Detailed prediction results
        result = "### Health Risk Assessment\n"
        diseases = ['Diabetes', 'Heart Disease', 'Stroke']
        probabilities = [prob[i][0, 1] for i in range(3)]

        for i, disease in enumerate(diseases):
            pred = prediction[i]
            prob_positive = probabilities[i]
            status = "High Risk" if pred == 1 else "Low Risk"
            color = "#e74c3c" if pred == 1 else "#2ecc71"
            result += f"- **{disease}**: <span style='color: {color};'>{status}</span> (Confidence: {prob_positive:.2%})\n"

        # Interactive Plotly visualization
        fig = px.bar(
            x=diseases,
            y=probabilities,
            text=[f'{p:.2%}' for p in probabilities],
            title="Disease Risk Probabilities",
            labels={'y': 'Probability', 'x': 'Disease'},
            color=probabilities,
            color_continuous_scale='RdYlGn_r'
        )
        fig.update_traces(textposition='auto')
        fig.update_layout(showlegend=False, height=400)

        # Detailed health suggestions
        suggestions = "### Personalized Health Recommendations\n"
        risk_factors = []

        if prediction[0] == 1 or glucose > 140 or bmi > 30:
            suggestions += "- **Diabetes Prevention**: Monitor blood glucose, reduce refined carbs, aim for BMI < 25\n"
            risk_factors.append("Diabetes")
        if prediction[1] == 1 or blood_pressure > 140 or cholesterol > 200:
            suggestions += "- **Heart Health**: Reduce sodium (<2300mg/day), increase omega-3 intake, regular cardio\n"
            risk_factors.append("Heart Disease")
        if prediction[2] == 1 or (blood_pressure > 140 and smoking):
            suggestions += "- **Stroke Prevention**: Quit smoking immediately, monitor BP, maintain healthy weight\n"
            risk_factors.append("Stroke")
        if smoking:
            suggestions += "- **Smoking**: Join cessation program, consider nicotine replacement therapy\n"
        if exercise < 3:
            suggestions += f"- **Activity**: Increase to {max(3, exercise+1)} hours/week of moderate exercise\n"
        if not risk_factors:
            suggestions += "- **Maintenance**: Continue healthy habits, annual check-ups recommended\n"

        # Risk factor summary
        risk_summary = f"### Risk Factor Summary\nIdentified risks: {', '.join(risk_factors) if risk_factors else 'None'}\n"

        return result, fig, suggestions, risk_summary

    except Exception as e:
        logging.error(f"Prediction error: {str(e)}")
        return f"⚠️ Prediction error: {str(e)}", None, f"⚠️ Error: {str(e)}", f"⚠️ Error: {str(e)}"


In [7]:
# Advanced UI Theme
theme = gr.themes.Default(
    primary_hue="blue",
    secondary_hue="gray",
    neutral_hue="slate",
    radius_size="lg",
    text_size="md",
)


In [8]:
# Create enhanced Gradio interface
with gr.Blocks(theme=theme, title="Health Risk Prediction Suite") as interface:
    gr.Markdown(
        """
        # 🩺 Health Risk Prediction Suite
        AI-powered disease risk assessment and prevention tool
        """,
        elem_classes="header"
    )

    with gr.Tabs():
        # Training Tab
        with gr.Tab("📊 Model Training"):
            gr.Markdown("## Model Training & Data Analysis", elem_classes="section-title")
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(label="Upload Health Data (CSV)", file_types=['.csv'])
                    train_button = gr.Button("Train Model", variant="primary", size="lg")
                with gr.Column(scale=2):
                    train_output = gr.Markdown(label="Training Results & Metrics")
                    feature_plot = gr.Plot(label="Feature Importance")

            train_button.click(
                fn=load_and_train_model,
                inputs=file_input,
                outputs=[train_output, feature_plot]
            )

        # Prediction Tab
        with gr.Tab("🔍 Risk Assessment"):
            gr.Markdown("## Personal Health Risk Assessment", elem_classes="section-title")
            with gr.Row():
                with gr.Column(scale=1):
                    with gr.Accordion("Patient Data Input", open=True):
                        age = gr.Slider(20, 100, step=1, label="Age", info="Years")
                        bmi = gr.Slider(15, 50, step=0.1, label="BMI", info="kg/m²")
                        bp = gr.Slider(80, 200, step=1, label="Blood Pressure", info="mmHg")
                        chol = gr.Slider(100, 300, step=1, label="Cholesterol", info="mg/dL")
                        gluc = gr.Slider(50, 200, step=1, label="Glucose", info="mg/dL")
                        exer = gr.Slider(0, 7, step=0.5, label="Exercise", info="Hours/week")
                        smoke = gr.Checkbox(label="Smoking", info="Current smoker?")
                        fam = gr.Checkbox(label="Family History", info="Of chronic diseases?")
                        predict_button = gr.Button("Assess Risks", variant="primary", size="lg")

                with gr.Column(scale=2):
                    with gr.Tabs():  # Replaced TabbedInterface with Tabs
                        with gr.TabItem("Results"):
                            prediction_text = gr.Markdown()
                            prediction_plot = gr.Plot(label="Risk Visualization")
                        with gr.TabItem("Recommendations"):
                            suggestion_text = gr.Markdown()
                            risk_summary = gr.Markdown()

            predict_button.click(
                fn=predict_disease,
                inputs=[age, bmi, bp, chol, gluc, exer, smoke, fam],
                outputs=[prediction_text, prediction_plot, suggestion_text, risk_summary]
            )

    # Custom CSS
    interface.css = """
    .header { text-align: center; color: #1a3c5e; padding: 20px; background: #f0f6ff; border-radius: 10px; }
    .section-title { color: #2c5282; border-bottom: 2px solid #bee3f8; padding-bottom: 5px; }
    button { transition: all 0.3s ease; }
    button:hover { transform: scale(1.05); }
    .gr-accordion { background: #f7fafc; border-radius: 8px; padding: 15px; }
    .gr-plot { border: 1px solid #e2e8f0; border-radius: 8px; padding: 10px; }
    """


In [None]:
# Launch with additional options
interface.launch(
    debug=True,
    show_error=True,
    favicon_path="https://cdn-icons-png.flaticon.com/512/684/684908.png",
    share=True
)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7aafabf8285187ebac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
