# 'Heart Disease Database' Dataset


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense
from sklearn.impute import KNNImputer

In [2]:
# read the Heart Disease Database from a URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
data = pd.read_csv(url, header=None)
data = data.replace({'?': np.nan})
# print(data)



Handling missing values by using k_nearest Algo 
We fit the imputer to the data using the fit_transform() method, which replaces missing values with the mean of the k-nearest neighbors. 

In [3]:
imputer = KNNImputer(n_neighbors=5)
data= pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

In [4]:
# Separate the features and labels
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values



Spliting the Data as well as removing the data containing non-floating type values or non - numerical values


In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
sc = StandardScaler()


X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)






# SUPPORT VECTOR MACHINE (SVM)

In [6]:
# Train the SVM model
svm_classifier = SVC(kernel='linear', C=1, random_state=42)
svm_classifier.fit(X_train, y_train)

# Predict the labels of test set
y_pred = svm_classifier.predict(X_test)
# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 55.74%

Classification Report:
               precision    recall  f1-score   support

         0.0       0.82      0.93      0.87        29
         1.0       0.38      0.25      0.30        12
         2.0       0.29      0.22      0.25         9
         3.0       0.15      0.29      0.20         7
         4.0       0.00      0.00      0.00         4

    accuracy                           0.56        61
   macro avg       0.33      0.34      0.32        61
weighted avg       0.52      0.56      0.53        61


Confusion Matrix:
 [[27  0  2  0  0]
 [ 3  3  2  4  0]
 [ 2  1  2  4  0]
 [ 1  3  1  2  0]
 [ 0  1  0  3  0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# DECISION TREE

In [None]:
# Define the decision tree classifier
dt = DecisionTreeClassifier()

# Fit the classifier to the training data
dt.fit(X_train, y_train)

# Predict the target variable for the test data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.4918032786885246

Classification Report:
               precision    recall  f1-score   support

         0.0       0.73      0.83      0.77        29
         1.0       0.40      0.33      0.36        12
         2.0       0.25      0.22      0.24         9
         3.0       0.00      0.00      0.00         7
         4.0       0.00      0.00      0.00         4

    accuracy                           0.49        61
   macro avg       0.28      0.28      0.27        61
weighted avg       0.46      0.49      0.47        61


Confusion Matrix:
 [[24  1  3  1  0]
 [ 5  4  2  1  0]
 [ 1  1  2  3  2]
 [ 2  4  1  0  0]
 [ 1  0  0  3  0]]


# NAIVE BAYES

In [None]:
# Define the Naive Bayes classifier
nb = GaussianNB()

# Fit the classifier to the training data
nb.fit(X_train, y_train)

# Predict the target variable for the test data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.3114754098360656

Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.52      0.68        29
         1.0       0.00      0.00      0.00        12
         2.0       0.25      0.11      0.15         9
         3.0       0.00      0.00      0.00         7
         4.0       0.08      0.75      0.15         4

    accuracy                           0.31        61
   macro avg       0.27      0.28      0.20        61
weighted avg       0.52      0.31      0.36        61


Confusion Matrix:
 [[15  0  1  0 13]
 [ 0  0  1  2  9]
 [ 0  1  1  1  6]
 [ 0  0  1  0  6]
 [ 0  0  0  1  3]]


# ARTIFICIAL NEURAL NETWORKS

In [None]:
# Define the model
model = Sequential()
model.add(Dense(units=16, activation='relu', input_dim=13))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

In this example, we're using a neural network with two hidden layers, each containing 16 neurons, and a single output layer with a sigmoid activation function to produce binary classification results. The input layer has 13 neurons, which corresponds to the number of features in the dataset.

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
model.fit(X_train, y_train, epochs=120, batch_size=32, verbose=1)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f84358fff70>

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {loss:.3f}\nTest accuracy: {accuracy:.3f}')
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Test loss: -748367.875
Test accuracy: 0.525

Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.52      0.68        29
         1.0       0.00      0.00      0.00        12
         2.0       0.25      0.11      0.15         9
         3.0       0.00      0.00      0.00         7
         4.0       0.08      0.75      0.15         4

    accuracy                           0.31        61
   macro avg       0.27      0.28      0.20        61
weighted avg       0.52      0.31      0.36        61


Confusion Matrix:
 [[15  0  1  0 13]
 [ 0  0  1  2  9]
 [ 0  1  1  1  6]
 [ 0  0  1  0  6]
 [ 0  0  0  1  3]]
