In [1]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import pickle
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from sklearn.utils import resample
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error

In [2]:
heart_disease_data = pd.read_csv('heart.csv')

In [3]:
# Transform Chest Pain Type
def changeChestPainType(ChestPainType):
    if ChestPainType == "ASY":
        return 0
    elif ChestPainType == "NAP":
        return 1
    elif ChestPainType == "ATA":
        return 2
    else:
        return 3

# Along with replace() and map(), this is another way to encode the gender column into numbers.
heart_disease_data['ChestPainType'] = heart_disease_data['ChestPainType'].apply(changeChestPainType)
heart_disease_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,2,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,1,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,2,130,283,0,ST,98,N,0.0,Up,0
3,48,F,0,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,1,150,195,0,Normal,122,N,0.0,Up,0


In [4]:
# Transform Resting ECG
def changeRestingECG(RestingECG):
    if RestingECG == "Normal":
        return 0
    elif RestingECG == "LVH":
        return 1
    else:
        return 2

# Along with replace() and map(), this is another way to encode the gender column into numbers.
heart_disease_data['RestingECG'] = heart_disease_data['RestingECG'].apply(changeRestingECG)
heart_disease_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,2,140,289,0,0,172,N,0.0,Up,0
1,49,F,1,160,180,0,0,156,N,1.0,Flat,1
2,37,M,2,130,283,0,2,98,N,0.0,Up,0
3,48,F,0,138,214,0,0,108,Y,1.5,Flat,1
4,54,M,1,150,195,0,0,122,N,0.0,Up,0


In [5]:
# Transform ST SLope
def changeST_Slope(ST_Slope):
    if ST_Slope == "Flat":
        return 0
    elif ST_Slope == "Up":
        return 1
    else:
        return 2

# Along with replace() and map(), this is another way to encode the gender column into numbers.
heart_disease_data['ST_Slope'] = heart_disease_data['ST_Slope'].apply(changeST_Slope)
heart_disease_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,2,140,289,0,0,172,N,0.0,1,0
1,49,F,1,160,180,0,0,156,N,1.0,0,1
2,37,M,2,130,283,0,2,98,N,0.0,1,0
3,48,F,0,138,214,0,0,108,Y,1.5,0,1
4,54,M,1,150,195,0,0,122,N,0.0,1,0


In [6]:
# Transform Sex
def changeSex(sex):
    if sex == "M":
        return 0
    
    else:
        return 1

# Along with replace() and map(), this is another way to encode the gender column into numbers.
heart_disease_data['Sex'] = heart_disease_data['Sex'].apply(changeSex)
heart_disease_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,0,2,140,289,0,0,172,N,0.0,1,0
1,49,1,1,160,180,0,0,156,N,1.0,0,1
2,37,0,2,130,283,0,2,98,N,0.0,1,0
3,48,1,0,138,214,0,0,108,Y,1.5,0,1
4,54,0,1,150,195,0,0,122,N,0.0,1,0


In [7]:
# Transform ExerciseAngina
def changeExerciseAngina(ExerciseAngina):
    if ExerciseAngina == "Y":
        return 0
    
    else:
        return 1

heart_disease_data['ExerciseAngina'] = heart_disease_data['ExerciseAngina'].apply(changeExerciseAngina)
heart_disease_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,0,2,140,289,0,0,172,1,0.0,1,0
1,49,1,1,160,180,0,0,156,1,1.0,0,1
2,37,0,2,130,283,0,2,98,1,0.0,1,0
3,48,1,0,138,214,0,0,108,0,1.5,0,1
4,54,0,1,150,195,0,0,122,1,0.0,1,0


In [8]:
y = heart_disease_data['HeartDisease']
X = heart_disease_data.drop(['HeartDisease'], axis = 1)
y

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [10]:
# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler()
X_scaler.fit(X_train)

StandardScaler()

In [11]:
# Transform the training and testing data by using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [39]:
SVC_best_params = SVC(C=1, kernel='linear')
SVC_best_params.fit(X_train, y_train)

SVC(C=1, kernel='linear')

In [22]:
input_data = (38,0,1,138,175,0,0,173,1,0.0,1)
input_data_as_numpy_array = np.asarray(input_data)

input_data_reshape = input_data_as_numpy_array.reshape(1, -1)
input_data_reshape

array([[ 38.,   0.,   1., 138., 175.,   0.,   0., 173.,   1.,   0.,   1.]])

In [35]:
heart_disease_data.head(50)

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,0,2,140,289,0,0,172,1,0.0,1,0
1,49,1,1,160,180,0,0,156,1,1.0,0,1
2,37,0,2,130,283,0,2,98,1,0.0,1,0
3,48,1,0,138,214,0,0,108,0,1.5,0,1
4,54,0,1,150,195,0,0,122,1,0.0,1,0
5,39,0,1,120,339,0,0,170,1,0.0,1,0
6,45,1,2,130,237,0,0,170,1,0.0,1,0
7,54,0,2,110,208,0,0,142,1,0.0,1,0
8,37,0,0,140,207,0,0,130,0,1.5,0,1
9,48,1,2,120,284,0,0,120,1,0.0,1,0


In [36]:
X.head(50)

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,0,2,140,289,0,0,172,1,0.0,1
1,49,1,1,160,180,0,0,156,1,1.0,0
2,37,0,2,130,283,0,2,98,1,0.0,1
3,48,1,0,138,214,0,0,108,0,1.5,0
4,54,0,1,150,195,0,0,122,1,0.0,1
5,39,0,1,120,339,0,0,170,1,0.0,1
6,45,1,2,130,237,0,0,170,1,0.0,1
7,54,0,2,110,208,0,0,142,1,0.0,1
8,37,0,0,140,207,0,0,130,0,1.5,0
9,48,1,2,120,284,0,0,120,1,0.0,1


In [46]:
input_data = (37,0,0,120,223,0,0,168,1,0.0,1)
input_data_as_numpy_array = np.asarray(input_data)

input_data_reshape = input_data_as_numpy_array.reshape(1, -1)
input_data_reshape

array([[ 37.,   0.,   0., 120., 223.,   0.,   0., 168.,   1.,   0.,   1.]])

In [47]:

#standardise the data
std_data = X_scaler.transform(input_data_reshape)
prediction = SVC_best_params.predict(std_data)


if (prediction[0] ==0):
    print("The person does not have heart disease")
else:
    print("The person has heart disease")

The person does not have heart disease


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
