In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

Load the dataset

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", 
           "oldpeak", "slope", "ca", "thal", "target"]

data = pd.read_csv(url, header=None, names=columns, na_values="?")
data = data.dropna()


Encode target variable

In [5]:
data['target'] = data['target'].apply(lambda x: 1 if x > 0 else 0)

Apply thresholds

In [7]:
data['age_high_risk'] = data.apply(lambda row: 1 if (row['age'] > 45 and row['sex'] == 1) or (row['age'] > 55 and row['sex'] == 0) else 0, axis=1)
data['chol_high'] = data['chol'].apply(lambda x: 1 if x >= 200 else 0)
data['bp_high'] = data['trestbps'].apply(lambda x: 1 if x >= 130 else 0)
data['thalach_low'] = data['thalach'].apply(lambda x: 1 if x < 150 else 0)
data['exang_positive'] = data['exang']
data['oldpeak_high'] = data['oldpeak'].apply(lambda x: 1 if x > 1.0 else 0)
data['ca_high'] = data['ca'].apply(lambda x: 1 if x >= 1 else 0)
data['thal_risk'] = data['thal'].apply(lambda x: 1 if x in [6, 7] else 0)

X = data[['age_high_risk', 'chol_high', 'bp_high', 'thalach_low', 
          'exang_positive', 'oldpeak_high', 'ca_high', 'thal_risk']]
y = data['target']

Standardize the data

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Save the scaler

In [11]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

Split the dataset

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

Train the logistic regression model

In [16]:
model = LogisticRegression()
model.fit(X_train, y_train)

Save the model

In [25]:
joblib.dump(model, 'logistic_model.pkl')

['logistic_model.pkl']

In [27]:
# Evaluate the model
y_pred_prob = model.predict_proba(X_test)[:, 1]
y_pred = (y_pred_prob >= 0.5).astype(int)

In [29]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.85

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.89      0.88        36
           1       0.83      0.79      0.81        24

    accuracy                           0.85        60
   macro avg       0.85      0.84      0.84        60
weighted avg       0.85      0.85      0.85        60



In [31]:
# Load the pre-trained model and scaler
model = joblib.load('logistic_model.pkl')
scaler = joblib.load('scaler.pkl')

In [33]:
# Define risk level function
def risk_level(prob):
    if prob < 0.4:
        return "Low Risk"
    elif 0.4 <= prob < 0.7:
        return "Moderate Risk"
    else:
        return "High Risk"

In [35]:
# Manual input for testing
def manual_test():
    print("Enter the following test parameters:")
    
    age = int(input("Age: "))
    sex = int(input("Sex (1 = Male, 0 = Female): "))
    chol = int(input("Cholesterol Level: "))
    trestbps = int(input("Resting Blood Pressure: "))
    thalach = int(input("Maximum Heart Rate: "))
    exang = int(input("Exercise Induced Angina (1 = Yes, 0 = No): "))
    oldpeak = float(input("ST Depression: "))
    ca = int(input("Number of Major Vessels (0-3): "))
    thal = int(input("Thalassemia (3 = Normal, 6 = Fixed Defect, 7 = Reversible Defect): "))
    
    # Calculate thresholds
    input_data = {
        'age_high_risk': 1 if (age > 45 and sex == 1) or (age > 55 and sex == 0) else 0,
        'chol_high': 1 if chol >= 200 else 0,
        'bp_high': 1 if trestbps >= 130 else 0,
        'thalach_low': 1 if thalach < 150 else 0,
        'exang_positive': exang,
        'oldpeak_high': 1 if oldpeak > 1.0 else 0,
        'ca_high': 1 if ca >= 1 else 0,
        'thal_risk': 1 if thal in [6, 7] else 0
    }

    # Scale input
    input_df = pd.DataFrame([input_data])
    input_scaled = scaler.transform(input_df)

    # Predict
    probability = model.predict_proba(input_scaled)[:, 1][0]
    prediction = "Found" if probability >= 0.5 else "Not Found"
    risk = risk_level(probability)

    print(f"\nHeart Disease: {prediction}")
    print(f"Risk Level: {risk}")
    print(f"Probability: {probability:.2f}")

In [41]:
# Call the function for manual testing
manual_test()

Enter the following test parameters:


Age:  54
Sex (1 = Male, 0 = Female):  1
Cholesterol Level:  210
Resting Blood Pressure:  140
Maximum Heart Rate:  145
Exercise Induced Angina (1 = Yes, 0 = No):  0
ST Depression:  2.3
Number of Major Vessels (0-3):  2
Thalassemia (3 = Normal, 6 = Fixed Defect, 7 = Reversible Defect):  7



Heart Disease: Found
Risk Level: High Risk
Probability: 0.91
