In [2]:
# Heart Disease Logistic Regression
# Author: Kaushik Kancharla
# Date: 12/28/2021
# Purpose: The purpose of this project is to create a Logistic Regression Model to predict if a patient has heart 
# disease (HD) or not given several factors. This model is a form of Machine Learning as it will take a few data 
# points, learn from them, and then be able to predict the dependent variable (in this case it is having HD) from
# several independent input variables. 

# Acknowledgements: Code modeled around "Kindson the Genius'" Logistic Regression video on Youtube
# and Krish Naik's video on Multiple Linear Regression on Youtube

In [3]:
# Import the Required Modules
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd

In [4]:
# Generate the Dataset
data = pd.read_csv("Heart_Disease_Prediction.csv")
data.shape

(270, 14)

In [5]:
# Get rid of some collumns
max_column = data.pop("Max HR")
last_column = data.pop("Heart Disease")
data.insert(5, "Max HR", max_column)
data.insert(6, "Heart Disease", last_column)
data = data.drop(columns = ["Chest pain type"], axis = 1)
data = data.iloc[:, :6]
data

Unnamed: 0,Age,Sex,BP,Cholesterol,Max HR,Heart Disease
0,70,1,130,322,109,Presence
1,67,0,115,564,160,Absence
2,57,1,124,261,141,Presence
3,64,1,128,263,105,Absence
4,74,0,120,269,121,Absence
...,...,...,...,...,...,...
265,52,1,172,199,162,Absence
266,44,1,120,263,173,Absence
267,56,0,140,294,153,Absence
268,57,1,140,192,148,Absence


In [6]:
# Set X and Y
X = data.iloc[:, :-1]
Y = data.iloc[:, -1]
# Split the Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
X

Unnamed: 0,Age,Sex,BP,Cholesterol,Max HR
0,70,1,130,322,109
1,67,0,115,564,160
2,57,1,124,261,141
3,64,1,128,263,105
4,74,0,120,269,121
...,...,...,...,...,...
265,52,1,172,199,162
266,44,1,120,263,173
267,56,0,140,294,153
268,57,1,140,192,148


In [7]:
# Perform Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, Y_train)

LogisticRegression()

In [8]:
# Make Prediction using the Model
Y_pred = log_reg.predict(X_test)

In [9]:
# Display the Confusion Matrix
CM = confusion_matrix(Y_test, Y_pred)
CM

array([[24,  6],
       [10, 14]], dtype=int64)

In [11]:
# Percent Correct
def percentage(cm):
    return (cm[0][0]+cm[1][1])/(cm[0][0]+cm[0][1]+cm[1][0]+cm[1][1])
percentage(CM)

0.7037037037037037

In [None]:
# Future Directions:
# Neural Nets, Decision Trees, other ML/AI models