In [1]:
# ==============================================================
# 📘 Multi-Disease Prediction - Professional & Clean Output
# ==============================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# ==============================================================
# Load datasets & prepare
# ==============================================================
def load_and_clean(path):
    df = pd.read_csv(path)
    df = df.drop(columns=[col for col in df.columns if df[col].isna().all() or 'Unnamed' in col])
    return df

heart_df = load_and_clean("/content/heart.csv")
diabetes_df = load_and_clean("/content/diabetes.csv")
breast_df = load_and_clean("/content/data.csv")

def prepare(df, target=None):
    df = df.copy()
    if target is None:
        possible_targets = ["target", "Outcome", "diagnosis", "Class", "class"]
        for t in possible_targets:
            if t in df.columns:
                target = t
                break
        else:
            raise ValueError("Target column not found")
    df = df.dropna(subset=[target])
    if df[target].dtype == "object":
        df[target] = df[target].astype("category").cat.codes
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    X = df.drop(columns=[target])
    y = df[target]
    numeric_features = X.select_dtypes(include=["number"]).columns
    categorical_features = X.select_dtypes(include=["object", "category"]).columns

    numeric_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])
    categorical_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = ColumnTransformer([
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

    X_processed = preprocessor.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
    print(f"{target.upper()} dataset ready → {X.shape[0]} samples, {X.shape[1]} features (processed: {X_processed.shape[1]})")
    return (X_train, X_test, y_train, y_test), preprocessor

# Prepare datasets
(heart_X_train, _, heart_y_train, _), heart_pre = prepare(heart_df)
(diabetes_X_train, _, diabetes_y_train, _), diab_pre = prepare(diabetes_df)
(breast_X_train, _, breast_y_train, _), breast_pre = prepare(breast_df)

# Train models
def train_all(X_train, y_train):
    models = {
        "SVM": SVC(probability=True, random_state=42),
        "LogisticRegression": LogisticRegression(max_iter=500),
        "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
        "XGBoost": XGBClassifier(eval_metric="logloss")
    }
    for name, model in models.items():
        model.fit(X_train, y_train)
    return models

models = {
    "heart": train_all(heart_X_train, heart_y_train),
    "diabetes": train_all(diabetes_X_train, diabetes_y_train),
    "breast": train_all(breast_X_train, breast_y_train)
}

preprocessors = {
    "heart": heart_pre,
    "diabetes": diab_pre,
    "breast": breast_pre
}

feature_columns = {
    "heart": heart_df.drop(columns=["target"]).columns.tolist(),
    "diabetes": diabetes_df.drop(columns=["Outcome"]).columns.tolist(),
    "breast": breast_df.drop(columns=["diagnosis"]).columns.tolist()
}

print("✅ All models trained successfully!\n")

# ==============================================================
# Create professional input widgets with blank default (no 0)
# ==============================================================
input_widgets = {}

for disease, columns in feature_columns.items():
    disease_widgets = {}
    for col in columns:
        if col in preprocessors[disease].transformers_[0][2]:  # numeric
            widget_obj = widgets.FloatText(description=col, value=None)  # blank by default
        else:  # categorical
            unique_vals = heart_df[col].dropna().unique() if col in heart_df else ["male","female"]
            if len(unique_vals) <= 10:
                widget_obj = widgets.Dropdown(description=col, options=unique_vals)
            else:
                widget_obj = widgets.Text(description=col, value="")
        disease_widgets[col] = widget_obj
    input_widgets[disease] = disease_widgets

# Button to run prediction
predict_button = widgets.Button(description="Predict Diseases", button_style='success')
output = widgets.Output()

# ==============================================================
# Function to convert 0/1 to emojis/colors
def format_prediction(val):
    if val in [0, "0"]:
        return "❌ Negative"
    elif val in [1, "1"]:
        return "✅ Positive"
    else:
        return str(val)

# ==============================================================
# Prediction handler
# ==============================================================
def on_predict_button_clicked(b):
    with output:
        clear_output()
        user_data = {}
        for disease, widgets_dict in input_widgets.items():
            for col, w in widgets_dict.items():
                user_data[col] = w.value
        results = {}
        for disease, model_set in models.items():
            preprocessor = preprocessors[disease]
            X_input = pd.DataFrame([user_data])
            try:
                X_processed = preprocessor.transform(X_input)
                preds = {name: format_prediction(clf.predict(X_processed)[0]) for name, clf in model_set.items()}
                results[disease] = preds
            except Exception as e:
                results[disease] = f"Error: {e}"

        # Display attractive output
        html_str = "<h2 style='color:blue'>🩺 Multi-Disease Prediction Results</h2>"
        for disease, preds in results.items():
            html_str += f"<h3>{disease.capitalize()}</h3><ul>"
            if isinstance(preds, dict):
                for model, val in preds.items():
                    html_str += f"<li><b>{model}:</b> {val}</li>"
            else:
                html_str += f"<li>{preds}</li>"
            html_str += "</ul>"
        display(HTML(html_str))

predict_button.on_click(on_predict_button_clicked)

# ==============================================================
# Display widgets
# ==============================================================
for disease, widgets_dict in input_widgets.items():
    print(f"\n--- {disease.capitalize()} Dataset ---")
    display(widgets.VBox(list(widgets_dict.values())))

display(predict_button, output)


TARGET dataset ready → 1025 samples, 13 features (processed: 13)
OUTCOME dataset ready → 768 samples, 8 features (processed: 8)
DIAGNOSIS dataset ready → 569 samples, 31 features (processed: 31)
✅ All models trained successfully!


--- Heart Dataset ---


VBox(children=(FloatText(value=0.0, description='age'), FloatText(value=0.0, description='sex'), FloatText(val…


--- Diabetes Dataset ---


VBox(children=(FloatText(value=0.0, description='Pregnancies'), FloatText(value=0.0, description='Glucose'), F…


--- Breast Dataset ---


VBox(children=(FloatText(value=0.0, description='id'), FloatText(value=0.0, description='radius_mean'), FloatT…

Button(button_style='success', description='Predict Diseases', style=ButtonStyle())

Output()