In [29]:
import pandas as pd

# Load the data
df = pd.read_csv("heart (1).csv")
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [30]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Separate target from features
y = df["HeartDisease"]
df = df.drop("HeartDisease", axis=1)

# Define columns that need to be standardized and encoded
numeric_features = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
categorical_features = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Create the transformers
transformers = [
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(drop='first'), categorical_features)
]

# Instantiate and fit the column transformer
column_transformer = ColumnTransformer(transformers)
column_transformer.fit(df)


In [45]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import Pipeline

# Define models
log_reg = Pipeline([
    ('preprocessor', column_transformer),
    ('classifier', LogisticRegression())
])

rf_clf = Pipeline([
    ('preprocessor', column_transformer),
    ('classifier', RandomForestClassifier())
])

gb_clf = Pipeline([
    ('preprocessor', column_transformer),
    ('classifier', GradientBoostingClassifier())
])

# Train models
log_reg.fit(df, y)
rf_clf.fit(df, y)
gb_clf.fit(df, y)

models = {'Logistic Regression': log_reg, 'Random Forest': rf_clf, 'Gradient Boosting': gb_clf}


In [46]:
def compute_risk_score(input_data):
    """Compute the average risk score based on model predictions and provide a recommendation."""
    probabilities = []
    
    # Predict the probability of heart disease for each model
    for name, model in models.items():
        prob = model.predict_proba(input_data)[:, 1][0]
        probabilities.append(prob)

    # Compute the average risk score
    average_risk = sum(probabilities) / len(probabilities) * 100
    
    # Provide a recommendation based on the risk score
    if average_risk < 30:
        recommendation = "Low risk. Maintain a healthy lifestyle."
        analysis = "You have a low probability of heart disease based on the provided data. Continue with regular check-ups."
    elif average_risk < 70:
        recommendation = "Moderate risk. Consult with your doctor."
        analysis = "You have a moderate probability of heart disease. It's recommended to consult with a healthcare professional."
    else:
        recommendation = "High risk. Please see a cardiologist immediately."
        analysis = "You have a high probability of heart disease. Immediate medical attention is advised."

    return average_risk, recommendation, analysis


In [47]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Define input widgets
age_widget = widgets.IntSlider(value=50, min=20, max=100, description="Age:")
sex_widget = widgets.Dropdown(options=['M', 'F'], value='M', description="Sex:")
cp_widget = widgets.Dropdown(options=df["ChestPainType"].unique(), description="Chest Pain Type:")
restbp_widget = widgets.IntSlider(value=120, min=80, max=200, description="Resting BP:")
chol_widget = widgets.IntSlider(value=200, min=100, max=400, description="Cholesterol:")
fbs_widget = widgets.IntSlider(value=1, min=0, max=1, description="Fasting Blood Sugar:")
restecg_widget = widgets.Dropdown(options=df["RestingECG"].unique(), description="Resting ECG:")
maxhr_widget = widgets.IntSlider(value=150, min=50, max=220, description="Max Heart Rate:")
exang_widget = widgets.Dropdown(options=['Y', 'N'], value='N', description="Exercise Angina:")
oldpeak_widget = widgets.FloatSlider(value=1, min=0, max=5, step=0.1, description="Oldpeak:")
slope_widget = widgets.Dropdown(options=df["ST_Slope"].unique(), description="ST Slope:")

compute_button = widgets.Button(description="Compute Risk")
output = widgets.Output()

def on_compute_button_click(button):
    """Function to compute and display risk score, recommendation, and analysis when button is clicked."""
    # Gather input data
    data = {
        'Age': [age_widget.value],
        'Sex': [sex_widget.value],
        'ChestPainType': [cp_widget.value],
        'RestingBP': [restbp_widget.value],
        'Cholesterol': [chol_widget.value],
        'FastingBS': [fbs_widget.value],
        'RestingECG': [restecg_widget.value],
        'MaxHR': [maxhr_widget.value],
        'ExerciseAngina': [exang_widget.value],
        'Oldpeak': [oldpeak_widget.value],
        'ST_Slope': [slope_widget.value]
    }
    df = pd.DataFrame(data)

    # Compute risk score, recommendation, and analysis
    risk_score, recommendation, analysis = compute_risk_score(df)

    with output:
        clear_output(wait=True)
        print(f"Predicted Risk Score: {risk_score:.2f}%")
        print("\nRecommendation:", recommendation)
        print("\nAnalysis:", analysis)
compute_button.on_click(on_compute_button_click)

# Display widgets
widgets.VBox([
    age_widget, sex_widget, cp_widget, restbp_widget, chol_widget,
    fbs_widget, restecg_widget, maxhr_widget, exang_widget, oldpeak_widget,
    slope_widget, compute_button, output
])


VBox(children=(IntSlider(value=50, description='Age:', min=20), Dropdown(description='Sex:', options=('M', 'F'…