<a href="https://colab.research.google.com/github/sandaruwank/Home_Workout_Kotlin_Android_App/blob/master/Convert_Heart_Disease_Risk_probability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import joblib

# Load your dataset (replace 'your_dataset.csv' with your file)
df = pd.read_csv('/content/heart_disese_risk.csv')

# Define features and target
features = ['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
            'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope', 'Spo2', 'BMI', 'ECG']
X = df[features]
y = df['HeartDisease']

# Define feature categories
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
numerical_cols_to_impute = ['RestingBP', 'Cholesterol', 'MaxHR', 'Spo2', 'BMI']
numerical_cols_no_impute = ['Age', 'Oldpeak', 'ECG', 'FastingBS']

# Create preprocessing steps
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ('num_impute_scale', Pipeline([
        ('impute', SimpleImputer(strategy='median', missing_values=0)),
        ('scale', StandardScaler())
    ]), numerical_cols_to_impute),
    ('num_scale', StandardScaler(), numerical_cols_no_impute)
])

# Create a pipeline with preprocessing and model
model = Pipeline([
    ('preprocess', preprocessor),
    ('clf', LogisticRegression())
])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, 'heart_disease_model.pkl')

['heart_disease_model.pkl']

In [2]:
# Example: Predict probability for a test sample
probas = model.predict_proba(X_test)
heart_risk_prob = probas[:, 1]  # Probability of heart disease (1)

In [5]:
# Example new patient data as a DataFrame
new_patient = pd.DataFrame({
    'Age': [55], 'Sex': ['M'], 'ChestPainType': ['ASY'], 'RestingBP': [140],
    'Cholesterol': [200], 'FastingBS': [0], 'RestingECG': ['Normal'],
    'MaxHR': [150], 'ExerciseAngina': ['N'], 'Oldpeak': [1.0], 'ST_Slope': ['Up'],
    'Spo2': [98], 'BMI': [25], 'ECG': [500]
})
probability = model.predict_proba(new_patient)[0, 1]  # e.g., 0.75 (75%)

In [6]:
import joblib

# Assuming 'model' is your trained pipeline
joblib.dump(model, 'heart_disease_model.pkl')

['heart_disease_model.pkl']