In [2]:
#importing all the libraries that will be used in Heart Disease Prediction System

import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import KFold,train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
heartData = pd.read_csv('E:/ML-IDS/datasets/heart-disease/heartData.csv')

#Separating the input and output from heartData dataset - X is input set and y is output set
X = heartData.drop(columns='target')
y = heartData['target']

#saving input and output data as csv files 
X.to_csv('heart_disease_input_data.csv',index=False)
y.to_csv('heart_disease_output_data.csv',index=False)

#reading input and output data from their respective files and then this data will be used in further steps
input_data = pd.read_csv('E:/ML-IDS/datasets/heart-disease/heart_disease_input_data.csv')
output_data = pd.read_csv('E:/ML-IDS/datasets/heart-disease/heart_disease_output_data.csv')

In [3]:
#making folds of dataset to predict model for better accuracy

kf = KFold(n_splits=3,shuffle=False)

training_data = {};
testing_data = {};
input_training_data = {};
output_training_data = {};
input_testing_data = {};
output_testing_data = {};
no_of_folds = 0;


acc_scores = []
for train_index, test_index in kf.split(X):


    #Training Data
    training_data[no_of_folds]=heartData.iloc[train_index]
    input_training_data[no_of_folds]=input_data.iloc[train_index]
    output_training_data[no_of_folds]=output_data.iloc[train_index]


    # Testing Data
    testing_data[no_of_folds]=heartData.iloc[test_index]  
    input_testing_data[no_of_folds]=input_data.iloc[test_index]
    output_testing_data[no_of_folds]=output_data.iloc[test_index]


    #saving folds
    no_of_folds += 1
    training_data[no_of_folds-1].to_csv(r'training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    input_training_data[no_of_folds-1].to_csv(r'input-training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    output_training_data[no_of_folds-1].to_csv(r'output-training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    testing_data[no_of_folds-1].to_csv(r'testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    input_testing_data[no_of_folds-1].to_csv(r'input-testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    output_testing_data[no_of_folds-1].to_csv(r'output-testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)


In [4]:
#saving the trained models which will be trained in bellow cell
def save(svc_model):
    pickle.dump(svc_model, open('svc_trained_model_0'+str(i+1)+'.pkl', 'wb'))

In [None]:
#Training the Support Vector Classifier Model

for i in range(no_of_folds):
    # Train the Models
    # svc_model = SVC(gamma='auto',random_state=0)
    svc_model = LogisticRegression(random_state=0)
    svc_model.fit(input_training_data[i],output_training_data[i])
    # make predictions on new input data
    predictions = svc_model.predict(input_testing_data[i])
    # print the predicted outputs
    print(predictions)

    save(svc_model)
    print(svc_model)


In [6]:
# Loading the Saved Models

svc_trained_model = {}

# Load the Saved Models
for i in range(no_of_folds):
    svc_trained_model[i] = pickle.load(open('svc_trained_model_0'+str(i+1)+'.pkl', 'rb'))

In [None]:
# Evaluate the Performance of Trained Models


# Provide Test data to the Trained Models

accuracy_list = []
for i in range(no_of_folds): 
        
    model_predications = svc_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Have Disease"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print(model_predications_data)
    
    # Save the Predictions into CSV File 
    
    model_predications_data.to_csv(r'model-predictions-iteration-0' + str(i+1) + '.csv', index = False, header = True)


    # Calculate the Accuracy of each Iteration
    
    print("\n\nAccuracy Score:")
    accuracy = accuracy_score(model_predications_data["Have Disease"],model_predications_data["Predictions"])
    accuracy_list.append(accuracy);
    print(accuracy_list)
    for x in accuracy_list:
        print(x)
    print(round(accuracy,2))


In [None]:
# Calculating the Average/Mean Accuracy

print("\n\nAverage Accuracy Score:")
print("=======================")   
accuracy_average = sum(accuracy_list) / len(accuracy_list)
print(round(accuracy_average,2))

In [49]:
# Take Input from User

''' 
*---------------- TAKE_USER_INPUT ----------------*
'''

age_input = input("\nPlease enter Age here:  ").strip()
gender_input = input("\nPlease enter Gender here (((1 if Male, 0 if Female))) : ").strip()
chest_pain_input = input("\nPlease enter Chest Pain here (0,1,2,3) : ").strip()
max_heart_rate_input = input("\nPlease enter Maximum Heart Rate Achivied here:  ").strip()

In [None]:
# Convert User Input into Feature Vector

user_input = pd.DataFrame({ 'age': [age_input],'sex': [gender_input],'cp': [chest_pain_input],'thalach': [max_heart_rate_input]})

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)


unseen_data_features = user_input.copy()

In [None]:
# Load the Best Model and make prediction
# svc_trained_model_01 has Highest Accuracy

model = pickle.load(open('svc_trained_model_03.pkl', 'rb'))

# Make a Prediction on Unseen Data

predicted_survival = model.predict(unseen_data_features)

if(predicted_survival == 1): 
    prediction = "Have Disease"
if(predicted_survival == 0):
    prediction = "NOT Have Disease"

print(prediction)

In [10]:
#Training the model with train test split

X_train,y_train,X_test,y_test = train_test_split(X,y,test_size=0.33,random_state=42)

logreg = LogisticRegression(random_state=0)
logreg.fit(X,y)
prediction = logreg.predict(y_train)

#saving the trained models which will be trained in bellow cell
pickle.dump(logreg, open('logreg_train_test_trained_model.pkl', 'wb'))

# Loading the Saved Model
logreg_trained_model = pickle.load(open('logreg_train_test_trained_model.pkl', 'rb'))

#calculating the accuracy of the model
accuracy = accuracy_score(y_pred=prediction,normalize=True,y_true=y_test)
print(accuracy)


# Take Input from User
age_input = input("\nPlease enter Age here:  ").strip()
gender_input = input("\nPlease enter Gender here (((1 if Male, 0 if Female))) : ").strip()
chest_pain_input = input("\nPlease enter Chest Pain here (0,1,2,3) : ").strip()
max_heart_rate_input = input("\nPlease enter Maximum Heart Rate Achivied here:  ").strip()

# Convert User Input into Feature Vector
user_input = pd.DataFrame({ 'age': [age_input],'sex': [gender_input],'cp': [chest_pain_input],'thalach': [max_heart_rate_input]})
print("\n\nUser Input Feature Vector:")
print(user_input)

unseen_data_features = user_input.copy()

# Load the Best Model and make prediction

logreg_trained_model = pickle.load(open('svc_trained_model_03.pkl', 'rb'))

# Make a Prediction on Unseen Data
predicted_survival = logreg_trained_model.predict(unseen_data_features)

if(predicted_survival == 1): 
    prediction = "Have Disease"
if(predicted_survival == 0):
    prediction = "NOT Have Disease"

print(prediction)



0.8529411764705882


User Input Feature Vector:
  age sex cp thalach
0  58   1  2     165
Have Disease
