In [None]:
# Step 1: Import Libraries

import sys
import pandas as pd
import numpy as np
import sklearn
import matplotlib
import tensorflow as tf
print('Python: {}'.format(sys.version))
print('Pandas: {}'.format(pd.__version__))
print('Numpy: {}'.format(np.__version__))
print('Sklearn: {}'.format(sklearn.__version__))
print('Matplotlib: {}'.format(matplotlib.__version__))
print('TensorFlow: {}'.format(tf.__version__))
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import seaborn as sns

# Step 2: Load and Explore Dataset
# read the csv

cleveland = pd.read_csv('heart.csv')

# Make sure to use the correct path to your dataset

print('Shape of DataFrame: {}'.format(cleveland.shape))
print(cleveland.loc[1])
cleveland.loc[280:]

# remove missing data (indicated with a "?")

data = cleveland[~cleveland.isin(['?'])]
data.loc[280:]

# drop rows with NaN values from DataFrame

data = data.dropna(axis=0)
data.loc[280:]
print(data.shape)
print(data.dtypes)

# transform data to numeric to enable further analysis

data = data.apply(pd.to_numeric)
print(data.dtypes)
print(data.describe())

# plot histograms for each variable

data.hist(figsize=(12, 12))
plt.show()
pd.crosstab(data.age, data.target).plot(kind="bar", figsize=(20, 6))
plt.title('Heart Disease Frequency for Ages')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(10, 10))
sns.heatmap(data.corr(), annot=True, fmt='.1f')
plt.show()
age_unique = sorted(data.age.unique())
age_thalach_values = data.groupby('age')['thalach'].count().values
mean_thalach = [sum(data[data['age'] == age].thalach) /
age_thalach_values[i] for i, age in enumerate(age_unique)]
plt.figure(figsize=(10, 5))
sns.pointplot(x=age_unique, y=mean_thalach, color='red', alpha=0.8)
plt.xlabel('Age', fontsize=15, color='blue')
plt.xticks(rotation=45)
plt.ylabel('Thalach', fontsize=15, color='blue')
plt.title('Age vs Thalach', fontsize=15, color='blue')
plt.grid()
plt.show()
# Step 3: Create Training and Testing Datasets

X = np.array(data.drop(['target'], axis=1))
y = np.array(data['target'])
mean = X.mean(axis=0)
X -= mean
std = X.std(axis=0)
X /= std
from sklearn import model_selection
X_train,X_test,y_train, y_test = model_selection.train_test_split(X,
y, stratify=y, random_state=42, test_size=0.2)
from tensorflow.keras.utils import to_categorical
Y_train = to_categorical(y_train, num_classes=None)
Y_test = to_categorical(y_test, num_classes=None)
print(Y_train.shape)
print(Y_train[:10])

#Step 4: Building and Training the Neural Network

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
def create_model():
  model = Sequential()
  model.add(Dense(16, input_dim=13, kernel_initializer='normal',
                  kernel_regularizer=regularizers.l2(0.001), activation='relu'))
  model.add(Dropout(0.25))
  model.add(Dense(8, kernel_initializer='normal',
                  kernel_regularizer=regularizers.l2(0.001), activation='relu'))
  model.add(Dropout(0.25))
  model.add(Dense(2, activation='softmax'))
  adam = Adam(learning_rate=0.001) # Corrected parameter name
  model.compile(loss='categorical_crossentropy',
                optimizer='rmsprop', metrics=['accuracy'])
  return model
model = create_model()
print(model.summary())
history= model.fit(X_train,Y_train,validation_data=(X_test, Y_test),epochs=50, batch_size=10)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'])
plt.show()
# Step 5: Improving Results - A Binary Classification Problem

Y_train_binary = y_train.copy()
Y_test_binary = y_test.copy()

Y_train_binary[Y_train_binary > 0] = 1
Y_test_binary[Y_test_binary > 0] = 1
def create_binary_model():
  model = Sequential()
  model.add(Dense(16,input_dim=13,kernel_initializer='normal',
                  kernel_regularizer=regularizers.l2(0.001), activation='relu'))
  model.add(Dropout(0.25))
  model.add(Dense(8,kernel_initializer='normal',
                  kernel_regularizer=regularizers.l2(0.001), activation='relu'))
  model.add(Dropout(0.25))
  model.add(Dense(1, activation='sigmoid'))
  adam = Adam(learning_rate=0.001)
  model.compile(loss='binary_crossentropy', optimizer='rmsprop',metrics=['accuracy'])
  return model
binary_model = create_binary_model()
print(binary_model.summary())
history=binary_model.fit(X_train,Y_train_binary,validation_data=(X_test, Y_test_binary), epochs=50, batch_size=10)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.show()
# Step 6: Results and Metrics

from sklearn.metrics import classification_report, accuracy_score,confusion_matrix, precision_score, recall_score, f1_score

# Categorical model predictions and metrics

categorical_pred = np.argmax(model.predict(X_test), axis=1)

print('Results for Categorical Model')
print(accuracy_score(y_test, categorical_pred))
print(classification_report(y_test, categorical_pred))

# Binary model predictions and metrics

binary_pred = np.round(binary_model.predict(X_test)).astype(int)
print('Results for Binary Model')
print(accuracy_score(Y_test_binary, binary_pred))
print(classification_report(Y_test_binary, binary_pred))

# Confusion Matrix

cm = confusion_matrix(Y_test_binary, binary_pred)
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Disease", "Disease"], yticklabels=["No Disease", "Disease"])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Precision, Recall, F1-Score

precision = precision_score(Y_test_binary, binary_pred)
recall = recall_score(Y_test_binary, binary_pred)
f1 = f1_score(Y_test_binary, binary_pred)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print("\nClassificationReport:\n",classification_report(Y_test_binary, binary_pred, target_names=["NoDisease", "Disease"]))