In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv('/kaggle/input/health-care-data-set-on-heart-attack-possibility/heart.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df['cp'].unique()

In [None]:
df['fbs'].unique()

In [None]:
df['restecg'].unique()

In [None]:
df['exang'].unique()

In [None]:
df['slope'].unique()

In [None]:
df['ca'].unique()

In [None]:
df['thal'].unique()

In [None]:
cp = pd.get_dummies(df['cp'],drop_first=True)
restecg = pd.get_dummies(df['restecg'],drop_first=True)
slope = pd.get_dummies(df['slope'],drop_first=True)
ca = pd.get_dummies(df['ca'],drop_first=True)
thal = pd.get_dummies(df['thal'],drop_first=True)

In [None]:
cp.columns=['cp1','cp2','cp3']
restecg.columns=['r1','r2']
slope.columns=['s1','s2']
ca.columns=['ca1','ca2','ca3','ca4']
thal.columns=['t1','t2','t3']

In [None]:
df=pd.concat([df,cp,restecg,slope,ca,thal],axis=1)

In [None]:
df.drop(['cp','restecg','slope','ca','thal'],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.columns

In [None]:
X=df[['age', 'sex', 'trestbps', 'chol', 'fbs', 'thalach', 'exang', 'oldpeak',
        'cp1', 'cp2', 'cp3', 'r1', 'r2', 's1', 's2', 'ca1', 'ca2',
       'ca3', 'ca4', 't1', 't2', 't3']]

In [None]:
y=df['target']

In [None]:
X.head()

In [None]:
y.unique()

In [None]:
import seaborn as sns
sns.countplot(y)

In [None]:
df.info()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, test_size=0.30, 
                                                    random_state=101)

In [None]:
X_train.head()

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,predictions)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(df.drop('target',axis=1))

In [None]:
scaled_features = scaler.transform(df.drop('target',axis=1))

In [None]:
df_feat = pd.DataFrame(scaled_features,columns=['age', 'sex', 'trestbps', 'chol', 'fbs', 'thalach', 'exang', 'oldpeak',
        'cp1', 'cp2', 'cp3', 'r1', 'r2', 's1', 's2', 'ca1', 'ca2',
       'ca3', 'ca4', 't1', 't2', 't3'])
df_feat.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_features,df['target'],
                                                    test_size=0.30,random_state=101)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=1)

In [None]:
knn.fit(X_train,y_train)

In [None]:
pred = knn.predict(X_test)

In [None]:
print(confusion_matrix(y_test,pred))

In [None]:
print(classification_report(y_test,pred))

In [None]:
error_rate = []

# Will take some time
for i in range(1,40):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
# NOW WITH K=35
knn = KNeighborsClassifier(n_neighbors=35)

knn.fit(X_train,y_train)
pred = knn.predict(X_test)

print('WITH K=35')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X_train, y_train)

In [None]:
rfc_pred = rfc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,rfc_pred))

In [None]:
print(classification_report(y_test,rfc_pred))

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
print(classification_report(y_test,predictions))

In [None]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
from sklearn.svm import SVC

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

In [None]:
# May take awhile!
grid.fit(X_train,y_train)

In [None]:
grid.best_params_

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print(confusion_matrix(y_test,grid_predictions))

In [None]:
print(classification_report(y_test,grid_predictions))

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
scaler.fit(X_train)

In [None]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Dropout

In [None]:
X_train.shape

In [None]:
model = Sequential()

# https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw

model.add(Dense(units=22,activation='relu'))

model.add(Dense(units=11,activation='relu'))


model.add(Dense(units=1,activation='sigmoid'))

# For a binary classification problem
model.compile(loss='binary_crossentropy', optimizer='adam')

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)

In [None]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=600,
          validation_data=(X_test, y_test), verbose=1,
          callbacks=[early_stop]
          )

In [None]:
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

In [None]:
from tensorflow.keras.layers import Dropout

In [None]:
model = Sequential()
model.add(Dense(units=22,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(units=11,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')

In [None]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=600,
          validation_data=(X_test, y_test), verbose=1,
          callbacks=[early_stop]
          )

In [None]:
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

In [None]:
predictions = model.predict_classes(X_test)

In [None]:
# https://en.wikipedia.org/wiki/Precision_and_recall
from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(y_test,predictions))

In [None]:
print(confusion_matrix(y_test,predictions))