In [2]:
# Import Libraries

import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.metrics import accuracy_score

from prettytable import PrettyTable   
from astropy.table import Table, Column

from google.colab import drive
import os

In [3]:
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [4]:
cwd = os.getcwd()
print(cwd)

/content


In [5]:
# path =pd.read_csv(cwd + '/drive/MyDrive/data/Heart_Disease_Prediction.csv')
# print(path)
sample_data =pd.read_csv(cwd + '/drive/MyDrive/data/Heart_Disease.csv')

#  sample_data = pd.read_csv("Heart_Disease_Prediction.csv")
print("\n\nSample Data:")
print("============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data[:5])




Sample Data:

   ST depression  Slope of ST  Number of vessels fluro  Thallium Heart Disease
0            2.4            2                        3         3      Presence
1            1.6            2                        0         7       Absence
2            0.3            1                        0         7      Presence
3            0.2            2                        1         7       Absence
4            0.2            1                        1         3       Absence


In [6]:
# Understand Sample Data

print("\n\nAttributes in Sample Data:")
print("==========================\n")

print(sample_data.columns)

print("\n\nNumber of Instances in Sample Data:",sample_data["ST depression"].count())
print("========================================\n")



Attributes in Sample Data:

Index(['ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium',
       'Heart Disease'],
      dtype='object')


Number of Instances in Sample Data: 270



In [7]:
Heart_Disease = pd.DataFrame({"Heart Disease":["Presence","Absence"]})
Heart_Disease_label_encoder = LabelEncoder()

Heart_Disease_label_encoder.fit(np.ravel(Heart_Disease))

LabelEncoder()

In [8]:
sample_data_encoded = sample_data.copy()
original_sample_data = sample_data.copy()

# Transform Output of into Numerical Representation

print("\n\nSurvived Attribute After Label Encoding:")
print("========================================\n")
sample_data["encoded_Heart_Disease"] = Heart_Disease_label_encoder.transform(sample_data['Heart Disease'])
print(sample_data[["Heart Disease", "encoded_Heart_Disease"]])

# Print Original and Encoded Ouput Sample Data

sample_data_encoded[['ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium', 'Heart Disease']] = sample_data[['ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium', 'encoded_Heart_Disease']]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("\n\nOriginal Sample Data:")
print("=====================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(original_sample_data[:5])
print("\n\nSample Data after Label Encoding of Output:")
print("===========================================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded[:5])

# Save the Transformed Features into CSV File 

sample_data_encoded.to_csv(r'sample-data-encoded-output.csv', index = False, header = True)



Survived Attribute After Label Encoding:

    Heart Disease  encoded_Heart_Disease
0        Presence                      1
1         Absence                      0
2        Presence                      1
3         Absence                      0
4         Absence                      0
5         Absence                      0
6        Presence                      1
7        Presence                      1
8        Presence                      1
9        Presence                      1
10        Absence                      0
11        Absence                      0
12        Absence                      0
13       Presence                      1
14        Absence                      0
15        Absence                      0
16       Presence                      1
17       Presence                      1
18        Absence                      0
19        Absence                      0
20       Presence                      1
21        Absence                      0
22        Abs

In [9]:
input_vector_sample_data = sample_data.iloc[: , :4]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(input_vector_sample_data[:5])



   ST depression  Slope of ST  Number of vessels fluro  Thallium
0            2.4            2                        3         3
1            1.6            2                        0         7
2            0.3            1                        0         7
3            0.2            2                        1         7
4            0.2            1                        1         3


In [10]:
output_label_sample_data = sample_data.iloc[: ,-1]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("   Heart_Disease")
print(output_label_sample_data[:5]) 


   Heart_Disease
0    1
1    0
2    1
3    0
4    0
Name: encoded_Heart_Disease, dtype: int64


In [11]:
cv = KFold(n_splits=3, random_state=0, shuffle=True)

training_data = {};
testing_data = {};
input_training_data = {};
output_training_data = {};
input_testing_data = {};
output_testing_data = {};
no_of_folds = 0;

for train_index, test_index in cv.split(input_vector_sample_data):
    
    # Training Data
    
    training_data[no_of_folds]=sample_data_encoded.iloc[train_index]
    input_training_data[no_of_folds]=input_vector_sample_data.iloc[train_index]
    output_training_data[no_of_folds]=output_label_sample_data.iloc[train_index]
    
    # Testing Data
    
    testing_data[no_of_folds]=sample_data_encoded.iloc[test_index]  
    input_testing_data[no_of_folds]=input_vector_sample_data.iloc[test_index]
    output_testing_data[no_of_folds]=output_label_sample_data.iloc[test_index]
    
    no_of_folds += 1
    
    # Save To CSV Files
    
    training_data[no_of_folds-1].to_csv(r'training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    input_training_data[no_of_folds-1].to_csv(r'input-training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    output_training_data[no_of_folds-1].to_csv(r'output-training-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    testing_data[no_of_folds-1].to_csv(r'testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    input_testing_data[no_of_folds-1].to_csv(r'input-testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)
    output_testing_data[no_of_folds-1].to_csv(r'output-testing-data-iteration-0'+str(no_of_folds)+'.csv', index = False, header = True)

In [12]:
# Save the Models in a Pkl File

def save(svc_model):
    pickle.dump(svc_model, open('svc_trained_model_0'+str(i+1)+'.pkl', 'wb'))

In [13]:
# Train the Support Vector Classifier


for i in range(no_of_folds):
    
    # Print Training Data of Each Iteration
     
    print("\n\nTraining Data Input Vectors (Feature Vectots) for Iteration 0" + str(i+1) + " :")
    print("================================================================\n")
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print("\n",(input_training_data[i])[:5])
    print("\n\nTraining Data Outputs/Labels for Iteration 0" + str(i+1) + " :")
    print("=============================================\n")
    print("  Heart Disease")
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print((output_training_data[i])[:5])
    
    # Train the Models
    
    print("\n\nTraining the Support Vector Classifier for Iteration 0" + str(i+1) + " :")
    print("=========================================================\n")
    print("Parameters and their values:")
    print("============================\n")
    svc_model = svm.SVC(gamma='auto',random_state=0)
    svc_model.fit(input_training_data[i],output_training_data[i])
    save(svc_model)
    print(svc_model)



Training Data Input Vectors (Feature Vectots) for Iteration 01 :


    ST depression  Slope of ST  Number of vessels fluro  Thallium
0            2.4            2                        3         3
1            1.6            2                        0         7
2            0.3            1                        0         7
3            0.2            2                        1         7
4            0.2            1                        1         3


Training Data Outputs/Labels for Iteration 01 :

  Heart Disease
0    1
1    0
2    1
3    0
4    0
Name: encoded_Heart_Disease, dtype: int64


Training the Support Vector Classifier for Iteration 01 :

Parameters and their values:

SVC(gamma='auto', random_state=0)


Training Data Input Vectors (Feature Vectots) for Iteration 02 :


    ST depression  Slope of ST  Number of vessels fluro  Thallium
1            1.6            2                        0         7
5            0.4            1                        0         7
6     

In [14]:

svc_trained_model = {}

# Load the Saved Models

for i in range(no_of_folds):
    svc_trained_model[i] = pickle.load(open('svc_trained_model_0'+str(i+1)+'.pkl', 'rb'))

In [15]:
# Evaluate the Performance of Trained Models



# Provide Test data to the Trained Models

accuracy_list = []
for i in range(no_of_folds): 
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    print("\nPredictions returned by svc_trained_model 0" + str(i+1) + " :")
    print("==============================================\n")
    
    model_predications = svc_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Heart Disease"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print(model_predications_data[:5])
    
    # Save the Predictions into CSV File 
    
    model_predications_data.to_csv(r'model-predictions-iteration-0' + str(i+1) + '.csv', index = False, header = True)
    
    # Calculate the Accuracy Score

  

    # Calculate the Accuracy of each Iteration
    
    print("\n\nAccuracy Score:")
    print("===============")
    accuracy = accuracy_score(model_predications_data["Heart Disease"],model_predications_data["Predictions"])
    accuracy_list.append(accuracy);
    print(round(accuracy,2))



Testing Phase for Iteration 01 :

Predictions returned by svc_trained_model 01 :

    ST depression  Slope of ST  Number of vessels fluro  Thallium  \
5             0.4            1                        0         7   
7             1.2            2                        1         7   
8             1.2            2                        2         7   
12            0.0            1                        0         3   
15            1.6            2                        0         3   

    Heart Disease  Predictions  
5               0            0  
7               1            1  
8               1            1  
12              0            0  
15              0            0  


Accuracy Score:
0.76


Testing Phase for Iteration 02 :

Predictions returned by svc_trained_model 02 :

    ST depression  Slope of ST  Number of vessels fluro  Thallium  \
0             2.4            2                        3         3   
2             0.3            1                        0   

In [16]:
# Calculate the Average Accuracy Score



# Calculate the Average Accuracy

print("\n\nAverage Accuracy Score:")
print("=======================")   
accuracy_average = sum(accuracy_list) / len(accuracy_list)
print(round(accuracy_average,2))



Average Accuracy Score:
0.8


In [20]:
# Take Input from User

''' 
*---------------- TAKE_USER_INPUT ----------------*
'''

ST_depression_input = input("\nPlease enter ST depression here in points (range from 0-4) : ").strip()
Slope_ST_input = input("\nPlease enter your Slope of ST here (1-3 discrete value) : ").strip()
Number_vessels_input = input("\nPlease enter your Number of vessels fluro here (Zero,One,Two,Three) : ").strip()
Thalium_input = input("\nPlease enter Thalium here (3-7 discrete) : ").strip()


Please enter ST depression here in points (range from 0-4) : 3.4

Please enter your Slope of ST here (1-3 discrete value) : 3

Please enter your Number of vessels fluro here (Zero,One,Two,Three) : 3

Please enter Thalium here (3-7 discrete) : 6


In [21]:
# Convert User Input into Feature Vector

user_input = pd.DataFrame({ 'ST depression': [ST_depression_input],'Slope of ST': [Slope_ST_input],'  Number of vessels fluro': [Number_vessels_input],'  Thallium': [Thalium_input]})

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)



User Input Feature Vector:

  ST depression Slope of ST   Number of vessels fluro   Thallium
0           3.4           3                         3          6


In [22]:
# Load the Best Model

''' 
*------------------------ LOAD_BEST_MODEL --------------------------*
|         Function: load()                                          |
|             Purpose: Method to Load Previously Saved Model        |
|         Arguments:                                                |
|               Model: Trained Model                                |
|         Return:                                                   |
|               File: Saved Model will be Loaded in Memory          |
*-------------------------------------------------------------------*
'''

# Load the Best Model
# svc_trained_model_01 has Highest Accuracy

model = pickle.load(open('svc_trained_model_01.pkl', 'rb'))

In [24]:
unseen_data_features = user_input.copy()

In [25]:
# Prediction of Unseen Instance


# Make a Prediction on Unseen Data


predicted_survival = model.predict(unseen_data_features)

if(predicted_survival == 1): 
    prediction = "Presence"
if(predicted_survival == 0):
    prediction = "Absence"

# Add the Prediction in a Pretty Table

pretty_table = PrettyTable()
pretty_table.add_column("       ** Prediction **       ",[prediction])
print(pretty_table)

+--------------------------------+
|        ** Prediction **        |
+--------------------------------+
|            Presence            |
+--------------------------------+


Feature names unseen at fit time:
-   Number of vessels fluro
-   Thallium
Feature names seen at fit time, yet now missing:
- Number of vessels fluro
- Thallium

