In [None]:
# ------------------------------------------
# 1. Import Libraries
# ------------------------------------------
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as xgb

# ------------------------------------------
# 2. Load & Prepare Data
# ------------------------------------------
df = pd.read_csv("heart.csv")

# Separate features and target
X = df.drop("target", axis=1)
y = df["target"] 

# ------------------------------------------
# 3. Train/Test Split
# ------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ------------------------------------------
# 4. Scale Features
# ------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert back to DataFrames (prevents warnings)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns)

# ------------------------------------------
# 5. Train Models
# ------------------------------------------

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_scaled, y_train)

# Random Forest + Grid Search
param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
}

grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring="f1"
)
grid_search.fit(X_train_scaled, y_train)

best_rand_forest = grid_search.best_estimator_

# XGBoost
xgb_model = xgb.XGBClassifier(
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42
)
xgb_model.fit(X_train_scaled, y_train)

# ------------------------------------------
# 6. Evaluate Models
# ------------------------------------------
print("\n=== LOGISTIC REGRESSION ===")
print(classification_report(y_test, log_reg.predict(X_test_scaled)))

print("\n=== RANDOM FOREST (Best) ===")
print("Best Parameters:", grid_search.best_params_)
print(classification_report(y_test, best_rand_forest.predict(X_test_scaled)))

print("\n=== XGBOOST ===")
print(classification_report(y_test, xgb_model.predict(X_test_scaled)))

# ------------------------------------------
# 7. Save Model & Scaler
# ------------------------------------------
joblib.dump(best_rand_forest, "heart_disease_model.joblib")
joblib.dump(scaler, "scaler.joblib")
print("\nModel + Scaler saved successfully!")



=== LOGISTIC REGRESSION ===
              precision    recall  f1-score   support

           0       0.85      0.72      0.78       102
           1       0.76      0.87      0.81       103

    accuracy                           0.80       205
   macro avg       0.80      0.79      0.79       205
weighted avg       0.80      0.80      0.79       205


=== RANDOM FOREST (Best) ===
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205


=== XGBOOST ===
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy            

In [3]:
import pandas as pd
import joblib

# Load saved model & scaler
model = joblib.load("heart_disease_model.joblib")
scaler = joblib.load("scaler.joblib")

# Correct feature order
FEATURE_COLUMNS = [
    "age","sex","cp","trestbps","chol","fbs","restecg",
    "thalach","exang","oldpeak","slope","ca","thal"
]

# -----------------------------
# Categorical Encoding Mappings
# -----------------------------
def encode_inputs(user_input):
    return {
        "sex": 1 if user_input["sex"] == "Male" else 0,

        "cp": {
            "Typical Angina": 0,
            "Atypical Angina": 1,
            "Non-Anginal Pain": 2,
            "Asymptomatic": 3
        }[user_input["cp"]],

        "fbs": 1 if user_input["fbs"] == "Yes" else 0,

        "restecg": {
            "Normal": 0,
            "ST-T Abnormality": 1,
            "Left Ventricular Hypertrophy": 2
        }[user_input["restecg"]],

        "exang": 1 if user_input["exang"] == "Yes" else 0,

        "slope": {
            "Upsloping": 0,
            "Flat": 1,
            "Downsloping": 2
        }[user_input["slope"]],

        "thal": {
            "Normal": 1,
            "Fixed Defect": 2,
            "Reversible Defect": 3
        }[user_input["thal"]],
    }


In [5]:
def predict_heart_disease(user_input):
    # Encode categorical fields
    encoded = encode_inputs(user_input)

    # Build a single-row dataframe in correct order
    data = pd.DataFrame([[
        user_input["age"],
        encoded["sex"],
        encoded["cp"],
        user_input["trestbps"],
        user_input["chol"],
        encoded["fbs"],
        encoded["restecg"],
        user_input["thalach"],
        encoded["exang"],
        user_input["oldpeak"],
        encoded["slope"],
        user_input["ca"],
        encoded["thal"]
    ]], columns=FEATURE_COLUMNS)

    # Scale the input
    data_scaled = scaler.transform(data)

    # Predict
    prediction = model.predict(data_scaled)[0]
    probability = model.predict_proba(data_scaled)[0][1]

    return prediction, probability
