## Design of optimal ANN architecture.

The optimal architecture was determined through cross-validation and classification metrics on the test subset, **considering all the medical data in order to have the better performance in the distributed learning architectures.**

In [1]:
# Importing libraries and modules
import numpy as np  # Numerical operations and array handling
import matplotlib.pylab as plt  # Data visualization
import os  # Operating system functionalities
import pandas as pd  # Data manipulation and analysis
import random as rn  # Random number generator

from sklearn.model_selection import train_test_split  # Splitting data into training and testing sets
from sklearn.preprocessing import MinMaxScaler  # Scaling features to a range
from sklearn.model_selection import GridSearchCV  # Grid search for hyperparameter tuning

from tensorflow.keras.models import Sequential # For linear stacking of layers
# For creating densely-connected neural network layers, dropout regularization, and normalizing inputs in layers:
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam # For efficient gradient descent optimization
from tensorflow.random import set_seed # To set global random seed in tensorflow
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier  # Keras adapter for use in Scikit-learn

2024-01-28 19:32:51.814053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-28 19:32:51.984665: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
# Set seed:
np.random.seed(0)
rn.seed(0)
set_seed(0)

In [3]:
# Loading processed and curated dataset:
dfHeart = pd.read_csv("/srv/heart_ConditionalMeanImputation.csv")
dfHeart

Unnamed: 0,Sex_F,Sex_M,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,ExerciseAngina_N,...,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,40,140.0,289.0,0,172,0.0,0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,0.0,49,160.0,180.0,0,156,1.0,1
2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,37,130.0,283.0,0,98,0.0,0
3,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,48,138.0,214.0,0,108,1.5,1
4,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,0.0,54,150.0,195.0,0,122,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,1.0,0.0,45,110.0,264.0,0,132,1.2,1
914,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,68,144.0,193.0,1,141,3.4,1
915,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,57,130.0,131.0,0,115,1.2,1
916,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,57,130.0,236.0,0,174,0.0,1


In [4]:
X = dfHeart.drop('HeartDisease', axis=1)
y = dfHeart['HeartDisease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# Scaling the data
scaler = MinMaxScaler()
scaler = scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Function to create the KerasClassifier model
def create_model():
    shape = X.shape[1]
    model = Sequential()
    model.add(Dense(units=256, input_shape=(shape,), activation="relu"))
    #, kernel_regularizer=regularizers.l1_l2(0.01)
    # model.add(BatchNormalization())
    # model.add(Dropout(0.2))
    model.add(Dense(units=256, activation="relu"))
    # model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(units=128, activation="relu"))
    # model.add(BatchNormalization())
    # model.add(Dropout(0.2))
    model.add(Dense(units=128, activation="relu"))
    # model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(units=64, activation="relu"))
    # model.add(BatchNormalization())
    # model.add(Dropout(0.5))
    model.add(Dense(units=64, activation="relu"))
    # model.add(BatchNormalization())
    # model.add(Dropout(0.5))
    model.add(Dense(units=1, activation="sigmoid"))
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss="binary_crossentropy",
                  metrics=["accuracy", "AUC"])
    return model

# Create the KerasClassifier model
model = KerasClassifier(build_fn=create_model)

# Define the parameters to search
param_grid = {
    'batch_size': [8, 14, 16, 18, 20],
    'epochs': [25, 50, 75, 85, 100]
}

# Create the GridSearchCV object
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

# Train and fine-tune the hyperparameters
grid_result = grid.fit(X_train_scaled, y_train, verbose=0)

# Print the results
print("Best hyperparameters:", grid_result.best_params_)
print("Best mean accuracy:", grid_result.best_score_)

  model = KerasClassifier(build_fn=create_model)
2024-01-28 19:32:53.623024: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-01-28 19:32:53.623072: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist
2024-01-28 19:32:53.623450: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Best hyperparameters: {'batch_size': 18, 'epochs': 25}
Best mean accuracy: 0.8473957578341166


In [6]:
shape = X.shape[1]
model = Sequential()
model.add(Dense(units=128, input_shape=(shape,), activation="relu"))
# , kernel_regularizer=regularizers.l1_l2(0.01)
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(units=128, activation="relu"))
# model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(units=128, activation="relu"))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(units=64, activation="relu"))
# model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(units=64, activation="relu"))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(units=64, activation="relu"))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))

model.add(Dense(units=1, activation="sigmoid"))
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="binary_crossentropy",
              metrics=["accuracy", "AUC"])

In [7]:
history = model.fit(X_train_scaled,y_train, epochs=grid_result.best_params_['epochs'], batch_size=grid_result.best_params_['batch_size'], verbose=0)
print('Train loss:', history.history['loss'][-1])
print('Train accuracy:', history.history['accuracy'][-1])
print('Train AUC:', history.history['auc'][-1])

Train loss: 0.24566791951656342
Train accuracy: 0.8982558250427246
Train AUC: 0.9602286219596863


In [8]:
score = model.evaluate(X_test_scaled, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test AUC:', score[2])

Test loss: 0.5183084011077881
Test accuracy: 0.8260869383811951
Test AUC: 0.8924464583396912


## SEARCHING OPTIMAL ARCHITECTURE

-  100-100-1 <br>
  - Best hyperparameters: {'batch_size': 14, 'epochs': 35} <br>
  - Best mean accuracy: 0.82 <br>
    - Train loss: 0.51 <br>
    - Train accuracy: 0.84 <br>
    - Train AUC: 0.91 <br>
    - Test loss: 3.13 <br>
    - Test accuracy: 0.60 <br>
    - Test AUC: 0.605 <br>

-  128-64-32-1 <br>
  - Best hyperparameters: {'batch_size': 16, 'epochs': 42} <br>
  - Best mean accuracy: 0.79 <br>
    - Train loss: 0.49 <br>
    - Train accuracy: 0.85 <br>
    - Train AUC: 0.92 <br>
    - Test loss: 6.75 <br>
    - Test accuracy: 0.40 <br>
    - Test AUC: 0.51 <br>

-  256-128-64-32-16-1 <br>
  - Best hyperparameters: {'batch_size': 12, 'epochs': 42} <br>
  - Best mean accuracy: 0.71 <br>
    - Train loss: 0.73 <br>
    - Train accuracy: 0.69 <br>
    - Train AUC: 0.77 <br>
    - Test loss: 2.58 <br>
    - Test accuracy: 0.61 <br>
    - Test AUC: 0.67 <br>

-  256-256-128-64 <br>
  - Best hyperparameters: {'batch_size': 18, 'epochs': 42} <br>
  - Best mean accuracy: 0.73 <br>
    - Train loss: 0.56 <br>
    - Train accuracy: 0.82 <br>
    - Train AUC: 0.90 <br>
    - Test loss: 0.97 <br>
    - Test accuracy: 0.69 <br>
    - Test AUC: 0.83 <br>

-  256-256-256-128-32 <br>
  - Best hyperparameters: {'batch_size': 12, 'epochs': 35} <br>
  - Best mean accuracy: 0.71 <br>
    - Train loss: 0.53 <br>
    - Train accuracy: 0.76 <br>
    - Train AUC: 0.82 <br>
    - Test loss: 1.54 <br>
    - Test accuracy: 0.60 <br>
    - Test AUC: 0.62 <br>

-  256-256-256-32-32 <br>
  - Best hyperparameters: {'batch_size': 12, 'epochs': 40} <br>
  - Best mean accuracy: 0.73 <br>
    - Train loss: 0.56 <br>
    - Train accuracy: 0.81 <br>
    - Train AUC: 0.87 <br>
    - Test loss: 1.66 <br>
    - Test accuracy: 0.6 <br>
    - Test AUC: 0.84 <br>

-  256-256-256-128-128 <br>
  - Best hyperparameters: {'batch_size': 16, 'epochs': 38} <br>
  - Best mean accuracy: 0.7 <br>
    - Train loss: 0.47 <br>
    - Train accuracy: 0.86 <br>
    - Train AUC: 0.92 <br>
    - Test loss: 0.89 <br>
    - Test accuracy: 0.67 <br>
    - Test AUC: 0.88 <br>

-  256-256-256-128-64 <br>
  - Best hyperparameters: {'batch_size': 10, 'epochs': 44} <br>
  - Best mean accuracy: 0.80 <br>
    - Train loss: 0.63 <br>
    - Train accuracy: 0.77 <br>
    - Train AUC: 0.84 <br>
    - Test loss: 1.06 <br>
    - Test accuracy: 0.6 <br>
    - Test AUC: 0.76 <br>

-  256-256-256-128-64-32 <br>
  - Best hyperparameters: {'batch_size': 12, 'epochs': 42} <br>
  - Best mean accuracy: 0.76 <br>
    - Train loss: 0.67 <br>
    - Train accuracy: 0.77 <br>
    - Train AUC: 0.83 <br>
    - Test loss: 1.02 <br>
    - Test accuracy: 0.62 <br>
    - Test AUC: 0.72 <br>

-  256-256-128-128-64-32 <br>
  - Best hyperparameters: {'batch_size': 10, 'epochs': 30} <br>
  - Best mean accuracy: 0.69 <br>
    - Train loss: 0.87 <br>
    - Train accuracy: 0.65 <br>
    - Train AUC: 0.7 <br>
    - Test loss: 0.84 <br>
    - Test accuracy: 0.65 <br>
    - Test AUC: 0.72 <br>

-  **128-128-128-64-64-64** <br>
  - Best hyperparameters: {'batch_size': 18, 'epochs': 50} <br>
  - Best mean accuracy: 0.87 <br>
    - Train loss: 0.36 <br>
    - Train accuracy: 0.85 <br>
    - Train AUC: 0.92 <br>
    - **Test loss: 0.40** <br>
    - **Test accuracy: 0.82** <br>
    - **Test AUC: 0.89** <br>