In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define the entropy calculation function
def calculate_entropy(probabilities):
    epsilon = 1e-10  # small value to avoid log(0)
    return -np.mean(np.sum(probabilities * np.log(probabilities + epsilon), axis=1))

def create_model(input_dim):
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Initialize the cross-validation
kf = StratifiedKFold(n_splits=10)

# Convert X and y to numpy arrays
X = np.array(X)
y = np.array(y)

# Lists to store the results for each fold
accuracies = []
precisions = []
recalls = []
f1s = []
entropies = []  # List to store entropy for each fold
risk_weighted_accuracies = []
conf_matrices = []
reports = []

# Cross-validation loop
for fold, (train_index, test_index) in enumerate(kf.split(X, y)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Standardize the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Create and compile the model
    model = create_model(input_dim=X_train.shape[1])
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # Define class weights
    class_weights = {0: 1, 1: len(y_train) / sum(y_train)}

    # Train the model
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
    model.fit(X_train, y_train, epochs=7000, batch_size=32, validation_split=0.2, class_weight=class_weights, callbacks=[early_stopping, reduce_lr], verbose=0)

    # Predict with the model
    y_pred_probs = model.predict(X_test).ravel()
    y_pred_opt = (y_pred_probs >= 0.5).astype(int)

    # Calculate and store metrics
    acc = accuracy_score(y_test, y_pred_opt)
    precision = precision_score(y_test, y_pred_opt)
    recall = recall_score(y_test, y_pred_opt)
    f1 = f1_score(y_test, y_pred_opt)
    conf_matrix = confusion_matrix(y_test, y_pred_opt)

    # Calculate entropy
    entropy = calculate_entropy(np.vstack((1 - y_pred_probs, y_pred_probs)).T)
    risk_weighted_accuracy = acc * (1 - entropy)

    accuracies.append(acc)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
    entropies.append(entropy)
    risk_weighted_accuracies.append(risk_weighted_accuracy)
    conf_matrices.append(conf_matrix)

    class_report = classification_report(y_test, y_pred_opt)
    reports.append(class_report)

    print(f'Fold: {fold+1}')
    print('Accuracy:', acc)
    print('Confusion Matrix:\n', conf_matrix)
    print('Classification Report:\n', class_report)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1-score:', f1)
    print('Entropy:', entropy)
    print('Risk-Weighted Accuracy:', risk_weighted_accuracy)
    print('\n')

# Print average metrics
print(f'Average Accuracy: {np.mean(accuracies)} ± {np.std(accuracies)}')
print(f'Average Precision: {np.mean(precisions)} ± {np.std(precisions)}')
print(f'Average Recall: {np.mean(recalls)} ± {np.std(recalls)}')
print(f'Average F1-score: {np.mean(f1s)} ± {np.std(f1s)}')
print(f'Average Entropy: {np.mean(entropies)} ± {np.std(entropies)}')
print(f'Average Risk-Weighted Accuracy: {np.mean(risk_weighted_accuracies)} ± {np.std(risk_weighted_accuracies)}')

# Calculate and print the mean confusion matrix
mean_conf_matrix = np.mean(conf_matrices, axis=0).astype(int)
print('Mean Confusion Matrix:\n', mean_conf_matrix)
