In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pickle
import random

In [2]:
data = pd.read_csv("heart.csv")


In [3]:
global mean_val
mean_val = data["Cholesterol"].mean()

In [4]:
df = pd.DataFrame(data)
df.dropna(inplace=True)

def convert_vals(val):
    global mean_val
    if val < mean_val:
        return "Low"
    elif val == mean_val:
        return "moderate"
    else:
        return "High"


df['Cholesterol'] = df['Cholesterol'].apply(convert_vals)
print(df.head())

   Age Sex ChestPainType  RestingBP Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140        High          0     Normal    172   
1   49   F           NAP        160         Low          0     Normal    156   
2   37   M           ATA        130        High          0         ST     98   
3   48   F           ASY        138        High          0     Normal    108   
4   54   M           NAP        150         Low          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  


In [5]:
# 'ASY' - 0, 'ATA' - 1, 'NAP' - 2, 'TA' - 3
# 'F' - 0, 'M' - 1
# 'High' - 0, 'Low' - 1 (for cholesterol)
# 'LVH' - 0, 'Normal' - 1, 'ST' - 2
cols = ["Sex","ChestPainType", "Cholesterol", "RestingECG","ExerciseAngina"] 
df[cols] = df[cols].apply(LabelEncoder().fit_transform)


In [6]:
df = df[["Sex","ChestPainType", "Cholesterol", "RestingECG","ExerciseAngina","RestingBP","FastingBS","MaxHR", "HeartDisease"]]

In [7]:
df.head()

Unnamed: 0,Sex,ChestPainType,Cholesterol,RestingECG,ExerciseAngina,RestingBP,FastingBS,MaxHR,HeartDisease
0,1,1,0,1,0,140,0,172,0
1,0,2,1,1,0,160,0,156,1
2,1,1,0,2,0,130,0,98,0
3,0,0,0,1,1,138,0,108,1
4,1,2,1,1,0,150,0,122,0


In [8]:
test = df[["Sex","ChestPainType", "Cholesterol", "RestingECG","ExerciseAngina","RestingBP","FastingBS","MaxHR", "HeartDisease"]].head(176).to_dict()
print(test)

{'Sex': {0: 1, 1: 0, 2: 1, 3: 0, 4: 1, 5: 1, 6: 0, 7: 1, 8: 1, 9: 0, 10: 0, 11: 1, 12: 1, 13: 1, 14: 0, 15: 0, 16: 1, 17: 0, 18: 1, 19: 1, 20: 0, 21: 1, 22: 0, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 0, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 0, 35: 1, 36: 1, 37: 0, 38: 0, 39: 0, 40: 0, 41: 0, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 0, 52: 1, 53: 0, 54: 0, 55: 0, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 0, 62: 1, 63: 1, 64: 0, 65: 0, 66: 0, 67: 1, 68: 1, 69: 1, 70: 1, 71: 1, 72: 1, 73: 0, 74: 1, 75: 1, 76: 1, 77: 0, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 0, 88: 1, 89: 1, 90: 0, 91: 1, 92: 0, 93: 1, 94: 0, 95: 1, 96: 1, 97: 1, 98: 1, 99: 1, 100: 1, 101: 1, 102: 0, 103: 1, 104: 1, 105: 1, 106: 0, 107: 1, 108: 1, 109: 1, 110: 0, 111: 1, 112: 1, 113: 1, 114: 0, 115: 0, 116: 1, 117: 0, 118: 0, 119: 1, 120: 0, 121: 0, 122: 1, 123: 0, 124: 1, 125: 1, 126: 0, 127: 0, 128: 0, 129: 1, 130: 1, 131: 1, 132: 1, 133: 1, 134: 0, 135: 1, 136: 0, 137: 

In [9]:
x_train, x_test, y_train,y_test = train_test_split(df[["Sex","ChestPainType", "Cholesterol", "RestingECG","ExerciseAngina","RestingBP","FastingBS","MaxHR"]], df["HeartDisease"], test_size=.2)

In [10]:
lr = LogisticRegression()

In [11]:
lr.fit(x_train, y_train)

In [12]:
pred = lr.predict(x_test)

In [13]:
acc_lr = accuracy_score(pred,y_test)*100

In [14]:
print(acc_lr)

76.08695652173914


In [15]:
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))

In [16]:
clf.fit(x_train , y_train)

In [17]:
pred_svm = clf.predict(x_test)

In [18]:
acc_svm = accuracy_score(pred_svm,y_test)*100

In [19]:
print(acc_svm)

82.06521739130434


In [20]:
filename = "model_"
pickle.dump(lr,open(filename+"lr.sav","wb"))
pickle.dump(clf,open(filename+"clf.sav","wb"))

In [21]:
random_row_num = random.randint(0, df.shape[0] - 1)
print(random_row_num)
df[random_row_num:random_row_num + 1]

327


Unnamed: 0,Sex,ChestPainType,Cholesterol,RestingECG,ExerciseAngina,RestingBP,FastingBS,MaxHR,HeartDisease
327,1,0,1,1,1,125,1,119,1


In [24]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(8,)),
    Dense(32, activation='relu'),
    Dense(32, activation='relu'),
    Dense(32, activation='relu'),
    Dense(32, activation='relu')
    Dense(1, activation='sigmoid'),
])