In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')

In [None]:
df.head()

<ol>
    <li>age</li>
    <li>sex</li>
    <li>cp = chest pain type (4 values)</li>
    <li>trestbps = resting blood pressure</li>
    <li>chol = serum cholestoral in mg/dl</li>
    <li>fbs = fasting blood sugar > 120 mg/dl (0 = false, 1 = true)</li>
    <li>restecq = resting electrocardiographic results (values 0,1,2)</li>
    <li>thalach = maximum heart rate achieved</li>
    <li>exang = exercise induced angina</li>
    <li>oldpeak = ST depression induced by exercise relative to rest</li>
    <li>slope = the slope of the peak exercise ST segment (0,1,2)</li>
    <li>ca = number of major vessels (0-3) colored by flourosopy</li>
    <li>thal = (3 = normal; 6 = fixed defect; 7 = reversable defect)</li>
    <li>target = (0 = no disease; 1 = has hear disease)</li>
</ol>

In [None]:
df.info()

In [None]:
categorical_col = ['sex','cp','fbs','restecg','slope','exang','ca','thal']
numer_col = df.columns.drop(categorical_col+['target'])

Spilt data into categorical columns and numerical columns for preprocessing

In [None]:
sns.set_theme(style="darkgrid")
sns.countplot(x='target',data=df)
plt.xlabel("Target (0 = False, 1 = True)")
plt.show()

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(20, 5))
for i,col in enumerate(['age','trestbps','chol','thalach']):
    sns.histplot(ax = axes[i], data=df,x=col,hue='target',kde=True)

Age might be a factor in influencing the y label

In [None]:
df.drop('target',axis=1).corrwith(df.target).plot.bar()

In [None]:
data = pd.get_dummies(df,columns=categorical_col)
data.head()

In [None]:
scaler = StandardScaler()
data[numer_col] = scaler.fit_transform(data[numer_col])
data.head()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(data.drop('target',axis=1),data.target,test_size=0.2,random_state=42)

In [None]:
lreg_model = LogisticRegression(solver='liblinear')
lreg_model.fit(X_train,y_train)

y_pred = lreg_model.predict(X_test)
print(classification_report(y_test,y_pred))
print('Test Accuracy {:.4f}'.format(accuracy_score(y_test,y_pred)))
# print('Precision {:.4f}'.format(precision_score(y_test,lreg_model.predict(X_test))))
# print('Recall {:.4f}'.format(recall_score(y_test,lreg_model.predict(X_test))))

In [None]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)
print(classification_report(y_test,y_pred))
print('Test Accuracy {:.4f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
decisiontree_model = DecisionTreeClassifier(random_state=42)
decisiontree_model.fit(X_train, y_train)

y_pred = decisiontree_model.predict(X_test)
print(classification_report(y_test,y_pred))
print('Test Accuracy {:.4f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
randomforest_model = RandomForestClassifier(n_estimators=1000, random_state=42)
randomforest_model.fit(X_train, y_train)

y_pred = randomforest_model.predict(X_test)
print(classification_report(y_test,y_pred))
print('Test Accuracy {:.4f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
def create_model(input_shape):
    return Sequential([
        Dense(64,activation='relu',input_dim=input_shape),
        Dropout(0.2),
        Dense(128,activation='relu'),
        Dropout(0.2),
        Dense(64,activation='relu'),
        Dropout(0.2),
        Dense(1,activation='sigmoid')
    ])

In [None]:
tf_model = create_model(X_train.shape[1])
tf_model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss',verbose=0,patience=8,restore_best_weights=True)
tf_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
history = tf_model.fit(X_train, y_train, batch_size = 16, epochs = 20,
                       validation_data=(X_test, y_test),
                       callbacks=[early_stopping],
                       verbose=2)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'g', label='Validation acc')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')
plt.legend()

In [None]:
tf_model.evaluate(X_test, y_test)