Load the data and separate it in objective and characteristics

In [9]:
import pandas as pd

data = pd.read_csv('../data/heart_2020_cleaned.csv')

Normalize variables

In [10]:
from sklearn.preprocessing import MinMaxScaler

columns_to_normalize = data.select_dtypes(include=['float64']).columns
scaler = MinMaxScaler()
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])

Separate between objective and independent variables

In [11]:
char = data.drop(columns=['HeartDisease'])
obj = data['HeartDisease']

Correlation-based feature selection

In [12]:
correlation_threshold = 0.12

# Calculate the correlation matrix
correlation_matrix = char.corrwith(obj)

# Select the features with more correlation than the threshold
relevant_features = correlation_matrix.index[abs(correlation_matrix) > correlation_threshold]
print(relevant_features)

# Include only the relevant features
char_relevant = char[relevant_features]

Index(['Smoking', 'Stroke', 'PhysicalHealth', 'DiffWalking', 'Sex',
       'AgeCategory', 'Diabetic', 'PhysicalActivity', 'GenHealth',
       'KidneyDisease', 'SkinCancer'],
      dtype='object')


Divide the data between train and test data

In [13]:
from sklearn.model_selection import train_test_split

char_train, char_test, obj_train, obj_test = train_test_split(char_relevant, obj, test_size=0.2, random_state=42)


Configure the neural network

In [14]:
from sklearn.neural_network import MLPClassifier

mlp_model = MLPClassifier(
    hidden_layer_sizes=(200, 100, 50),
    activation='relu',          
    solver='adam',              
    max_iter=30,                 
    random_state=42             
    )                


Train the neural network

In [15]:
mlp_model.fit(char_train, obj_train)



Predictions and evaluation

In [16]:
from sklearn.metrics import accuracy_score, recall_score, f1_score

predictions = mlp_model.predict(char_test)

accuracy = accuracy_score(obj_test, predictions)
recall = recall_score(obj_test, predictions)
f1 = f1_score(obj_test, predictions)

print(f'Accuracy: {accuracy:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')


Accuracy: 0.77
Recall: 0.87
F1-Score: 0.80
