In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import (
    roc_curve, auc,
    precision_recall_curve, average_precision_score,
    confusion_matrix, f1_score, roc_auc_score
)
from viz import csv_best_plots, plot_and_save_metrics
import glob

In [15]:
os.makedirs("robust_viz", exist_ok=True)
os.makedirs("robust_log", exist_ok=True)

Plotting Functions NEED TO MOVE TO SEPERATE FILE LATER

In [16]:
def plot_avg_history(epochs, avg_metrics, std_metrics):
    plt.figure(figsize=(12, 6))

    # Loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, avg_metrics['loss'], label='Train Loss')
    plt.fill_between(epochs, 
                     np.array(avg_metrics['loss']) - np.array(std_metrics['loss']),
                     np.array(avg_metrics['loss']) + np.array(std_metrics['loss']),
                     alpha=0.2)
    plt.plot(epochs, avg_metrics['val_loss'], label='Val Loss')
    plt.fill_between(epochs, 
                     np.array(avg_metrics['val_loss']) - np.array(std_metrics['val_loss']),
                     np.array(avg_metrics['val_loss']) + np.array(std_metrics['val_loss']),
                     alpha=0.2)
    plt.title("Loss over Epochs")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    # Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, avg_metrics['accuracy'], label='Train Accuracy')
    plt.fill_between(epochs, 
                     np.array(avg_metrics['accuracy']) - np.array(std_metrics['accuracy']),
                     np.array(avg_metrics['accuracy']) + np.array(std_metrics['accuracy']),
                     alpha=0.2)
    plt.plot(epochs, avg_metrics['val_accuracy'], label='Val Accuracy')
    plt.fill_between(epochs, 
                     np.array(avg_metrics['val_accuracy']) - np.array(std_metrics['val_accuracy']),
                     np.array(avg_metrics['val_accuracy']) + np.array(std_metrics['val_accuracy']),
                     alpha=0.2)
    plt.title("Accuracy over Epochs")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.tight_layout()
    plt.savefig("robust_viz/history_curves.png")
    plt.close()

In [17]:
def plot_confusion_matrix_with_se(conf_matrix, annotations):
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=annotations, fmt='', cmap="Blues", cbar=False, square=True,
                xticklabels=['Predicted 0', 'Predicted 1'],
                yticklabels=['Actual 0', 'Actual 1'])

    plt.title('Average Confusion Matrix with SE')
    plt.xlabel('Prediction')
    plt.ylabel('Actual')
    plt.tight_layout()
    plt.savefig("robust_viz/avg_confusion_matrix.png")
    plt.close()

In [18]:
def plot_metric_distribution(values, metric_name, filename):
    mean, ci_lower, ci_upper = np_95ci(values)

    plt.figure(figsize=(8, 5))
    plt.hist(values, bins=15, color='skyblue', edgecolor='black', alpha=0.7)
    plt.axvline(mean, color='red', linestyle='--', label=f'Mean = {mean:.3f}')
    plt.axvline(ci_lower, color='green', linestyle=':', label=f'95% CI Lower = {ci_lower:.3f}')
    plt.axvline(ci_upper, color='green', linestyle=':', label=f'95% CI Upper = {ci_upper:.3f}')
    
    plt.title(f'{metric_name} Distribution with 95% CI')
    plt.xlabel(metric_name)
    plt.ylabel('Frequency')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"robust_viz/{filename}")
    plt.close()

STATS FUNCTIONS MOVE TO SEPERATE FILE

In [19]:
def np_95ci(data):
    mean = np.mean(data)
    std = np.std(data, ddof=1)  # sample standard deviation
    se = std / np.sqrt(len(data))
    ci_lower = mean - 1.96 * se
    ci_upper = mean + 1.96 * se
    return mean, ci_lower, ci_upper

In [20]:
def mean_se(values):
    values = np.array(values)
    return np.mean(values), np.std(values, ddof=1) / np.sqrt(len(values))

In [21]:
def bootstrap(x_train, y_train):
    x_train = pd.DataFrame(x_train)
    y_train = pd.DataFrame(y_train)
    k = len(x_train)
    idx = np.random.choice(k, size = k,  replace = True)
    return x_train.iloc[idx], y_train.iloc[idx]

PREPROCESSING

In [22]:
data = pd.read_csv("../data/endometriosis_dataset.csv")

X = data.drop(columns=['disease']).values
y = data['disease'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [23]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

TRAINING SCRIPTS

In [24]:
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [25]:
for i in range(50):

    X_boot, y_boot = bootstrap(X_train, y_train)

    model.compile(optimizer=Adam(learning_rate=1e-3),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    csv_logger = tf.keras.callbacks.CSVLogger(f"robust_log/training_log_original_{i}.csv", append=True)
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath = f"models_robust/model_original_{i}_epoch{{epoch:02d}}.keras",
        save_weights_only = False,
        save_best_only=False, # save every epoch, not just best
        verbose=1
    )
    
    history = model.fit(
        X_boot,
        y_boot,
        epochs=50,
        batch_size=32,
        validation_data= (X_val, y_val),
        callbacks = [csv_logger, checkpoint]
    )

Epoch 1/50


2025-05-06 13:14:56.780894: I external/local_xla/xla/service/service.cc:168] XLA service 0x1503c0696900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-05-06 13:14:56.780926: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA L4, Compute Capability 8.9
2025-05-06 13:14:56.785573: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-05-06 13:14:56.805926: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1746551696.872764   34220 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 1: saving model to models_robust/model_original_0_epoch01.keras
Epoch 2/50
1/8 [==>...........................] - ETA: 0s - loss: 0.8582 - accuracy: 0.9375
Epoch 2: saving model to models_robust/model_original_0_epoch02.keras
Epoch 3/50
1/8 [==>...........................] - ETA: 0s - loss: 1.5278 - accuracy: 0.9062
Epoch 3: saving model to models_robust/model_original_0_epoch03.keras
Epoch 4/50
1/8 [==>...........................] - ETA: 0s - loss: 0.9870 - accuracy: 0.9062
Epoch 4: saving model to models_robust/model_original_0_epoch04.keras
Epoch 5/50
1/8 [==>...........................] - ETA: 0s - loss: 0.2600 - accuracy: 0.9688
Epoch 5: saving model to models_robust/model_original_0_epoch05.keras
Epoch 6/50
1/8 [==>...........................] - ETA: 0s - loss: 1.4371 - accuracy: 0.9375
Epoch 6: saving model to models_robust/model_original_0_epoch06.keras
Epoch 7/50
1/8 [==>...........................] - ETA: 0s - loss: 0.0160 - accuracy: 1.0000
Epoch 7: saving model to mod

STATS SCRIPTS

In [26]:
# Step 1: Load all CSVs into a list of DataFrames
csv_files = glob.glob("robust_log/training_log_original_*.csv")
histories = [pd.read_csv(f) for f in csv_files]

# Step 2: Stack the metrics for each epoch
metrics = ['loss', 'accuracy', 'val_loss', 'val_accuracy']
avg_metrics = {m: [] for m in metrics}
std_metrics = {m: [] for m in metrics}
epochs = histories[0]['epoch']  # Assuming all runs have the same epoch range

In [42]:
for epoch in epochs:
    for metric in metrics:
        values = [h.loc[epoch, metric] for h in histories]
        avg_metrics[metric].append(np.mean(values))
        std_metrics[metric].append(np.std(values))

In [37]:
model_paths = glob.glob("models_robust/*_epoch50.keras")
model_paths.sort()

tp_list = []
fp_list = []
tn_list = []
fn_list = []
f1_scores = []
auc_scores = []


for model_path in model_paths:
    model = tf.keras.models.load_model(model_path)
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob > 0.5).astype(int)

    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_prob)
    
    f1_scores.append(f1)
    auc_scores.append(auc)
    
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    
    tp_list.append(tp)
    fp_list.append(fp)
    tn_list.append(tn)
    fn_list.append(fn)
    
# Compute mean and SE for each confusion matrix component
tp_mean, tp_se = mean_se(tp_list)
fp_mean, fp_se = mean_se(fp_list)
tn_mean, tn_se = mean_se(tn_list)
fn_mean, fn_se = mean_se(fn_list)

# Construct the matrix and annotation array
conf_matrix = np.array([[tn_mean, fp_mean],
                        [fn_mean, tp_mean]])

annotations = np.array([[f"{tn_mean:.1f}\n±{tn_se:.1f}", f"{fp_mean:.1f}\n±{fp_se:.1f}"],
                        [f"{fn_mean:.1f}\n±{fn_se:.1f}", f"{tp_mean:.1f}\n±{tp_se:.1f}"]])



PLOTTING SCRIPTS

In [43]:
plot_metric_distribution(f1_scores, "F1 Score", "f1_score_robust")
plot_metric_distribution(auc_scores, "AUC Score", "AUC_score_robust")
plot_confusion_matrix_with_se(conf_matrix, annotations)
plot_avg_history(epochs, avg_metrics, std_metrics)

In [45]:
print(mean_se(f1_scores),mean_se(auc_scores))

(0.9659922307568748, 0.002160392362343558) (0.896936507936508, 0.00458851871947309)
