In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, confusion_matrix, roc_auc_score

# Read the heart data data set file
df = pd.read_csv('heart_data.csv')

#drop index, id columns                 
df.drop(df.iloc[:,0:2], inplace =True, axis =1)

#Translate age in days to age in years - divide by 365.25 and convert to int
ageInYrs =  df['age'] = (df['age']/ 365.25).astype(int)

#skip negative ap hi and lo values
df.drop(df[df['ap_hi'] < 20].index, inplace=True)
df.drop(df[df['ap_lo'] < 20].index, inplace=True)

df.drop(df[df['ap_hi'] > 900].index, inplace=True)
df.drop(df[df['ap_lo'] > 910].index, inplace=True)

#Standardize data
age_std_scale = StandardScaler()
gender_std_scale = StandardScaler()
height_std_scale = StandardScaler()
weight_std_scale = StandardScaler()
aphi_std_scale = StandardScaler()
aplo_std_scale = StandardScaler()
cholesterol_std_scale = StandardScaler()

df['age'] = age_std_scale.fit_transform(df[['age']])
df['gender'] = gender_std_scale.fit_transform(df[['gender']])
df['height'] = height_std_scale.fit_transform(df[['height']])
df['weight'] = weight_std_scale.fit_transform(df[['weight']])
df['ap_hi'] = aphi_std_scale.fit_transform(df[['ap_hi']])
df['ap_lo'] = aplo_std_scale.fit_transform(df[['ap_lo']])
df['cholesterol'] = cholesterol_std_scale.fit_transform(df[['cholesterol']])

# Modeling - Logistic Regression
X = df[['age','gender','height','weight','ap_hi','ap_lo','cholesterol','gluc','smoke','alco','active']]
y = df['cardio']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print(model.intercept_, model.coef_)

# Modeling - Test
y_pred = model.predict(X_test)

#Modeling - Performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC score:", roc_auc_score(y_test, y_pred))
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print(f"Confusion Matrix: [[{tn}, {fp}], [{fn}, {tp}]]")
print(f"True Positives: {tp}")
print(f"False Positives: {fp}")
print(f"True Negatives: {tn}")
print(f"False Negatives: {fn}")

[0.39576892] [[ 0.35495661 -0.00537564 -0.02543488  0.16694067  0.99925189  0.04729565
   0.34705196 -0.12137391 -0.14694369 -0.21181215 -0.22613254]]
Accuracy: 0.7250671896564248
AUC score: 0.7238757884610187
Confusion Matrix: [[5465, 1575], [2210, 4517]]
True Positives: 4517
False Positives: 1575
True Negatives: 5465
False Negatives: 2210
