In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
df = pd.read_csv('/Users/karimelzokm/Downloads/CVD_cleaned.csv')

In [None]:
print(f'Shape: {df.shape}')

In [None]:
numeric_features = ['Height_(cm)',
                   'Weight_(kg)',
                   'BMI',
                   'Alcohol_Consumption',
                   'Fruit_Consumption',
                   'Green_Vegetables_Consumption',
                   'FriedPotato_Consumption']

categorical_features = ['General_Health',
                       'Checkup','Exercise','Skin_Cancer','Other_Cancer',
                        'Depression','Diabetes','Arthritis',
                       'Sex',
                       'Age_Category',
                       'Smoking_History']

target = 'Heart_Disease'

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
for col in categorical_features:
    df[col] = label_encoder.fit_transform(df[col])

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
df['Heart_Disease'] = df['Heart_Disease'].replace({'Yes': 1, 'No': 0})

In [None]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
X = X = df.drop(columns=['Heart_Disease'])
y = df['Heart_Disease']


In [None]:
ros = RandomOverSampler(sampling_strategy='minority', random_state=42)
X_resampled_over, y_resampled_over = ros.fit_resample(X, y)

rus = RandomUnderSampler(sampling_strategy='majority', random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_resampled_over, y_resampled_over)

In [None]:
df_balanced = pd.concat([X_resampled, y_resampled], axis=1)

In [None]:
X_balanced = df_balanced.drop(columns=['Heart_Disease'])
y_balanced = df_balanced['Heart_Disease']

In [None]:
df_balanced.shape

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train = X_train.astype('float64')
X_test = X_test.astype('float64')

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score
import keras
from tensorflow.keras.callbacks import ReduceLROnPlateau

model = Sequential()
model.add(Dense(32, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))



reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1, callbacks=[reduce_lr])


y_pred = model.predict(X_test)
# y_pred_binary = tf.argmax(y_pred, axis=1).numpy() # for softmax with 2 neurons output
y_pred_binary = (y_pred > 0.5).astype(int) # for sigmoid with 1 neuron output

accuracy = accuracy_score(y_test, y_pred_binary)
print("Test Accuracy:", accuracy)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Assuming y_test and y_pred_binary_mlp are your true labels and predicted labels
conf_matrix_mlp = confusion_matrix(y_test, y_pred_binary)
2
# Create a heatmap using seaborn
sns.heatmap(conf_matrix_mlp, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Predicted 0', 'Predicted 1'],
            yticklabels=['Actual 0', 'Actual 1'])

plt.title('Confusion Matrix for MLP')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()