In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
heart_data = pd.read_csv('heart_disease_data.csv')

In [3]:
# print first 5 rows of the dataset
heart_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
# getting some info about the data
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
# checking the distribution of Target Variable
# 1 --> Defective Heart
# 0 --> Healthy Heart
heart_data['target'].value_counts()

target
1    165
0    138
Name: count, dtype: int64

In [6]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

In [7]:
#Splitting the Data into Training data & Test Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [8]:
model = LogisticRegression()

In [9]:
# training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [11]:
print('Accuracy on Training data : ', training_data_accuracy)

Accuracy on Training data :  0.8512396694214877


In [12]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [13]:
print('Accuracy on Test data : ', test_data_accuracy)

Accuracy on Test data :  0.819672131147541


In [16]:
def get_user_input():
    age = int(input("Enter age: "))
    sex = int(input("Enter gender (1 = male, 0 = female): "))
    cp = int(input("Enter chest pain type[0-Typical angina, 1-Atypical angina, 2-Non-anginal pain, 3-Asymptomatic(no pain)]: "))
    trestbps = int(input("Enter resting blood pressure (in mm Hg): "))
    chol = int(input("Enter serum cholesterol in mg/dl: "))
    fbs = int(input("Enter fasting blood sugar > 120 mg/dl (1 = true, 0 = false): "))
    restecg = int(input("Enter resting electrocardiographic results [0-Normal, 1-ST-T wave abnormality, 2-probable/definite left ventricular hypertrophy]: "))
    thalach = int(input("Enter maximum heart rate achieved: "))
    exang = int(input("Enter exercise induced angina (1 = yes, 0 = no): "))
    oldpeak = float(input("Enter ST depression induced by exercise relative to rest: "))
    slope = int(input("Enter the slope of the peak exercise ST segment [0-Upsloping(better heart health), 1-Flat, 2-Downsloping(worse heart health)]: "))
    ca = int(input("Enter number of major vessels (0-3) colored by fluoroscopy: "))
    thal = int(input("Enter thalassemia (1 = normal, 2 = fixed defect, 3 = reversible defect): "))
    
    user_data = (age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal)
    return np.asarray(user_data).reshape(1, -1)

# Get user input
input_data = get_user_input()

# Make a prediction
prediction = model.predict(input_data)

# Output the prediction result
if prediction[0] == 0:
    print('This System Predicts that This Person does not have Heart Disease')
else:
    print('This System Predicts that The Person may have a Heart Disease. Consult your Doctor/Psysician Immediately !!!')

Enter age: 63
Enter gender (1 = male, 0 = female): 0
Enter chest pain type[0-Typical angina, 1-Atypical angina, 2-Non-anginal pain, 3-Asymptomatic(no pain)]: 0
Enter resting blood pressure (in mm Hg): 150
Enter serum cholesterol in mg/dl: 290
Enter fasting blood sugar > 120 mg/dl (1 = true, 0 = false): 0
Enter resting electrocardiographic results [0-Normal, 1-ST-T wave abnormality, 2-probable/definite left ventricular hypertrophy]: 1
Enter maximum heart rate achieved: 145
Enter exercise induced angina (1 = yes, 0 = no): 1
Enter ST depression induced by exercise relative to rest: 3.6
Enter the slope of the peak exercise ST segment [0-Upsloping(better heart health), 1-Flat, 2-Downsloping(worse heart health)]: 2
Enter number of major vessels (0-3) colored by fluoroscopy: 3
Enter thalassemia (1 = normal, 2 = fixed defect, 3 = reversible defect): 3
This System Predicts that This Person does not have Heart Disease


