# OS Status: advanced NN architectures

In [None]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
#Importing to split the dataset and preprocess the data
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
#Importing to build the models
from tensorflow.keras import layers, regularizers
from tensorflow.keras.layers import LeakyReLU
#Importing to evaluate the models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
#Importing to explain the models
import shap

# for reproducibility, the value is set for conventional reasons
SEED = 42
tf.keras.utils.set_random_seed(SEED)

In [None]:
# load the data
data = pd.read_csv('dataset_b', encoding='latin-1', sep=',') # request the dataset to the author
#data.head()

In [3]:
# target column : "os_status", binary variable
# relevant columns
relevant_columns = ['age', 'dcr', 'dnlr', 'histology', 'immuno_line', 'iorr', 
                    'ldhpre', 'leucotpre', 'nb_meta_beforeimmuno', 'neuttpre', 
                     'ps_befimmuno', 'sex', 'smoking_history', 'os_status']

data = data[relevant_columns]
data = data.dropna(axis=0)
data['dcr'] = data['dcr'].astype(int)
data['age'] = data['age'].astype(int)
data['iorr'] = data['iorr'].astype(int)
data['ps_befimmuno'] = data['ps_befimmuno'].astype(int)

#data.head()

In [4]:
data['histology'] = data['histology'].str.lower()
data['sex'] = data['sex'].str.lower()
data['smoking_history'] = data['smoking_history'].str.lower()

In [5]:
#to randomize the data
data = data.sample(frac=1, random_state=SEED)

# one-hot encoding
one_hot_data = pd.get_dummies(data, columns=['histology', 'sex', 'smoking_history'])

one_hot_data = one_hot_data.rename(columns={
    'histology_Adenocarcinoma': 'histology_adenocarcinoma',
    'histology_Squamous': 'histology_squamous',
    'histology_Nsclc_other': 'histology_nsclc_other',
    'histology_Large_cells': 'histology_large_cells',
    'sex_Male': 'sex_male',
    'sex_Female': 'sex_female',
    'smoking_history_Non_smoker': 'smoking_history_non_smoker',
    'smoking_history_Former': 'smoking_history_former',
    'smoking_history_Current': 'smoking_history_current',
    'smoking_history_Unk': 'smoking_history_unk'
})

#one_hot_data.head()

In [None]:
# replace boolean values with 0 and 1
for col in ['histology_adenocarcinoma','histology_squamous','histology_nsclc other',
    'histology_large cells','sex_male','sex_female','smoking_history_non smoker','smoking_history_former','smoking_history_current',
     'smoking_history_unk']:
    one_hot_data[col] = one_hot_data[col].replace({False: 0, True: 1})

In [None]:
# split the data into features and target
X = one_hot_data[one_hot_data.columns.difference(['os_status'])]
y = data['os_status']


# First split: training+validation vs test (80% vs 20%)
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y  # stratify to maintain the same proportion of classes in each set
)

# Second split: training vs validation (75% vs 25% of 80%)
# This results in 60% training, 20% validation, and 20% test
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp
)

In [None]:
#This ensures that all numerical features contribute equally
numerical_features = ['age', 'dcr', 'dnlr', 'ldhpre', 'leucotpre', 
                      'nb_meta_beforeimmuno', 'neuttpre', 'ps_befimmuno']
scaler = StandardScaler()

binary_features = [col for col in X.columns if col not in numerical_features]

scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_val_scaled = X_val.copy() 
X_test_scaled = X_test.copy()
X_scaled = X.copy()


X_scaled[numerical_features] = scaler.fit_transform(X_scaled[numerical_features])
X_train_scaled[numerical_features] = scaler.fit_transform(X_train_scaled[numerical_features])
X_val_scaled[numerical_features] = scaler.transform(X_val_scaled[numerical_features])
X_test_scaled[numerical_features] = scaler.transform(X_test_scaled[numerical_features])


## Funnel MLP

In [None]:
# 1. Funnel MLP (Deep Funnel Network)
#    Input → 512 → BN → ReLU → DO(0.3)
#          → 256 → BN → ReLU → DO(0.3)
#          → 128 → BN → ReLU → DO(0.2)
#          →  64 → BN → ReLU → DO(0.2)
#          → Output

mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='relu', input_shape=(X.shape[1],)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])


# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [11]:
X_scaled = X.copy()
X_scaled[numerical_features] = scaler.fit_transform(X_scaled[numerical_features])

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## WideRes MLP

In [None]:
# 2. WideRes MLP, Wide‑but‑Shallow Residual MLP
#    Input → 1024 → ReLU → DO(0.3) → add(input→dense(1024))
#          → 1024 → ReLU → DO(0.3)
#          → 1024 → ReLU → DO(0.3)
#          → Output

def build_model_residual_wide(input_dim):
    inp = layers.Input(shape=(input_dim,))
    x = layers.Dense(1024, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    # Residual branch
    res = layers.Dense(1024)(inp)
    x = layers.Add()([x, res])
    x = layers.Activation('relu')(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(inp, out)

mlp_model = build_model_residual_wide(X.shape[1])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()


plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## Self‑Norm MLP

In [None]:
# 3. Self‑Norm MLP, Self‑Normalizing Network with SELU & AlphaDropout
#    Input → 512 → SELU → AD(0.1) → add(input→dense(512))
#          → 512 → SELU → AD(0.1) → add(input→dense(512))       
#          → 256 → SELU → AD(0.1) → add(input→dense(256))
#          → Output


mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='selu', input_shape=(X.shape[1],)),
        layers.AlphaDropout(0.1),
        layers.Dense(512, activation='selu'),
        layers.AlphaDropout(0.1),
        layers.Dense(256, activation='selu'),
        layers.AlphaDropout(0.1),
        layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=200, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## Stacked Narrow MLP

In [None]:
# 4. Stacked Narrow MLP, Deep Narrow‑and‑Deep MLP
#    Input → 256 → LeakyReLU → DO(0.25) → add(input→dense(256))
#          → 128 → LeakyReLU → DO(0.25) → add(input→dense(128))
#          → 128 → LeakyReLU → DO(0.2)  → add(input→dense(128))
#          → 64  → LeakyReLU → DO(0.2)  → add(input→dense(64))
#          → 32  → LeakyReLU → DO(0.2)  → add(input→dense(32))
#          → 16  → LeakyReLU → DO(0.15) → add(input→dense(16))
#          → Output


mlp_model = tf.keras.Sequential([
        layers.Dense(256, input_shape=(X.shape[1],)),
        LeakyReLU(),
        layers.Dropout(0.25),
        layers.Dense(128),
        LeakyReLU(),
        layers.Dropout(0.25),
        layers.Dense(128),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(64),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(32),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(16),
        LeakyReLU(),
        layers.Dropout(0.15),
        layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy') 
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## NoisyWide MLP

In [None]:
# 5. NoisyWide MLP, Gaussian Noise‑Augmented Wide MLP

mlp_model = tf.keras.Sequential([
        layers.GaussianNoise(0.1, input_shape=(X.shape[1],)),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## ParallelBranch MLP

In [None]:
# 6. ParallelBranch MLP, Multi‑Branch Ensemble MLP
# Input
# ├─ Branch A: Dense(512)→ReLU→Dropout(0.3)→Dense(256)→ReLU
# └─ Branch B: Dense(512)→ReLU→Dropout(0.3)→Dense(256)→ReLU
# Concat → Dense(128)→ReLU→Dropout(0.2) → Output

def build_model_multibranch(input_dim):
    inp = layers.Input(shape=(input_dim,))
    # Branch A
    a = layers.Dense(512, activation='relu')(inp)
    a = layers.Dropout(0.3)(a)
    a = layers.Dense(256, activation='relu')(a)
    # Branch B
    b = layers.Dense(512, activation='relu')(inp)
    b = layers.Dropout(0.3)(b)
    b = layers.Dense(256, activation='relu')(b)
    # Merge
    x = layers.Concatenate()([a, b])
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    out = layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(inp, out)

mlp_model = build_model_multibranch(X.shape[1])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy') 
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()


## L2Light MLP

In [None]:
# 7. L2Light MLP, Lightweight L2‑Heavy Regularized MLP

mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.005)),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='binary_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
prob_predictions = np.squeeze(prob_predictions)

# Convert the probabilities into binary predictions
class_predictions = (prob_predictions >= 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions)
recall = recall_score(y_test, class_predictions)
f1 = f1_score(y_test, class_predictions)
roc_auc = roc_auc_score(y_test, prob_predictions)  

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, class_predictions)
print("Confusion Matrix:")
print(cm)

prob_predictions = mlp_model.predict(X_scaled)
prob_predictions = np.squeeze(prob_predictions)
class_predictions = (prob_predictions >= 0.5).astype(int)
data['Predicted'] = class_predictions

print(data.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)

X_subset = X_test_scaled_df.sample(50, random_state=42)
shap_values = explainer.shap_values(X_subset, nsamples=50, silent=True)

if isinstance(shap_values, list):
    shap_values = shap_values[0]
if shap_values.ndim == 3:
    shap_values = shap_values[:, :, 0]

print(f"SHAP values shape: {shap_values.shape}, X_subset shape: {X_subset.shape}")

plt.figure(figsize=(10, 8))
shap.summary_plot(
    shap_values,
    features=X_subset,
    feature_names=X_subset.columns,
    plot_type='dot',
    max_display=len(binary_features + numerical_features),
    show=False
)
plt.title("SHAP Summary Plot – Class 1")
plt.tight_layout()
plt.savefig("shap_summary_binary.png")
plt.show()
