# RespGroup_fromTTP: advanced NN architectures 

In [None]:
#Importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
#Importing to preprocess the data
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler, LabelEncoder 
from sklearn.preprocessing import label_binarize
#Importing to build the models
from tensorflow.keras import layers, regularizers, models
from tensorflow.keras.layers import LeakyReLU
#Importing to evaluate the models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
#Importing to explain the models
import shap

# for reproducibility, the value is set for conventional reasons
SEED = 42
tf.keras.utils.set_random_seed(SEED)

In [None]:
# load the data
data = pd.read_csv('dataset_d.csv', encoding='latin-1', sep=',') # request the dataset to the author
#data.head()

In [None]:
# target column : "progression_cat" multi-classification problem
# relevant columns for the model
relevant_columns = [ 'age', 'sex', 'smoking', 'ps_at_diagnosis_ad', 'n#_mets_sites', 'lung_only_m1', 'pleural', 'pericard','lymph_nodes_only_m1','soft_tissue',
    'leptomingeal','skin','peritoneal','renal','pancreas', 'brain', 'liver', 'bone', 'adrenal', 'histology', 'hbbaselineio','leucotbaselineio',
    'neut_abs...143','linfo_abs...144','baso_abs...145', 'mono_abs...147', 'plaqtbaselineio', 'progression_cat']

df= data.copy()
df = df[relevant_columns]
df.shape

In [None]:
df = df.dropna(axis=0)
df.shape

In [None]:
#to randomize the data
df = df.sample(frac=1, random_state=SEED)

var_int = ['ps_at_diagnosis_ad', 'n#_mets_sites', 'lung_only_m1', 'pleural', 'pericard', 'lymph_nodes_only_m1', 'soft_tissue',
           'leptomingeal','skin','peritoneal','renal','pancreas', 'brain', 'liver', 'bone', 'adrenal']
for i in var_int:
    df[i] = df[i].astype(int)
    
df['sex'] = df['sex'].str.lower()

sex_dummies = pd.get_dummies(df['sex'], prefix='sex', drop_first=True)

other_dummies = pd.get_dummies(df[['histology', 'smoking']])

df_encoded = pd.concat([df.drop(columns=['sex', 'histology', 'smoking']),
                        sex_dummies, other_dummies], axis=1)

cols_to_convert = ['histology_adenocarcinoma', 'histology_nsclc', 'histology_squamous', 'sex_male', 
                   'smoking_current', 'smoking_former', 'smoking_non-smoker'] #'histology_adenosquamous',

df_encoded[cols_to_convert] = df_encoded[cols_to_convert].astype(int)

In [None]:
# split the data into features and target
X = df_encoded[df_encoded.columns.difference(['progression_cat'])]  # features
y = df_encoded['progression_cat']  

le = LabelEncoder()
y_encoded = le.fit_transform(y) 

In [None]:
# Step 1: Find the indices of each class
y = np.array(y_encoded)
idx_class_0 = np.where(y == 0)[0]
idx_class_1 = np.where(y == 1)[0]
idx_class_2 = np.where(y == 2)[0]

# Step 2: Randomly select 5 samples from class 1 for the test set
np.random.seed(42)
idx_class_1_test = np.random.choice(idx_class_1, size=5, replace=False)

# Step 3: Build the rest of the test indices
# First, remove the already selected class 1 samples from the full index list
remaining_idx = np.setdiff1d(np.arange(len(y)), idx_class_1_test)

# Define total test set size (20% of the dataset)
test_size = int(0.2 * len(y))

# Compute how many more samples are needed to reach the desired test size
other_needed = test_size - len(idx_class_1_test)

# Randomly select the remaining test samples
idx_remaining_test = np.random.choice(remaining_idx, size=other_needed, replace=False)

# Step 4: Combine all test indices and define the train+validation indices
idx_test = np.concatenate([idx_class_1_test, idx_remaining_test])
idx_train_val = np.setdiff1d(np.arange(len(y)), idx_test)

# Step 5: Create the corresponding test and train+validation sets
X_test = X.iloc[idx_test]
y_test = y[idx_test]
X_train_val = X.iloc[idx_train_val]
y_train_val = y[idx_train_val]

# Step 6: Split train and validation sets with stratification
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val,
    y_train_val,
    test_size=0.2,
    random_state=42,
    stratify=y_train_val
)


In [None]:
print("Shape of X_train_val:",X_train_val.shape)
print("Shape of X_train:",X_train.shape)
print("Shape of X_val:",X_val.shape)
print("Shape of X_test:",X_test.shape)

In [None]:
binary_features = ['lung_only_m1', 'pleural', 'pericard', 'lymph_nodes_only_m1', 'soft_tissue', 'leptomingeal','skin','peritoneal','renal',
                   'pancreas', 'brain', 'liver', 'bone', 'adrenal','histology_adenocarcinoma', 'histology_nsclc', 
                   'histology_squamous', 'sex_male','smoking_current', 'smoking_former', 'smoking_non-smoker']#, 'histology_adenosquamous'
numeric_features = ['neut_abs...143','linfo_abs...144', 'plaqtbaselineio', 'age', 'ps_at_diagnosis_ad', 'n#_mets_sites', 'leucotbaselineio',
                    'hbbaselineio','baso_abs...145', 'mono_abs...147'] #'duration_l1', 'time_to_l1_start'


X_train_scaled = X_train.copy()
X_val_scaled = X_val.copy() 
X_test_scaled = X_test.copy()
X_train_val_scaled = X_train_val.copy()

scaler = StandardScaler()

X_train_val_scaled[numeric_features] = scaler.fit_transform(X_train_val_scaled[numeric_features])
X_train_scaled[numeric_features] = scaler.fit_transform(X_train_scaled[numeric_features])
X_val_scaled[numeric_features] = scaler.transform(X_val_scaled[numeric_features])
X_test_scaled[numeric_features] = scaler.transform(X_test_scaled[numeric_features])

## Funnel MLP

In [None]:
# 1. Funnel MLP (Deep Funnel Network)
#    Input → 512 → BN → ReLU → DO(0.3)
#          → 256 → BN → ReLU → DO(0.3)
#          → 128 → BN → ReLU → DO(0.2)
#          →  64 → BN → ReLU → DO(0.2)
#          → Output

mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='relu', input_shape=(X.shape[1],)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(3, activation='softmax')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])


# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
X_scaled = X.copy()
X_scaled[numeric_features] = scaler.fit_transform(X_scaled[numeric_features])

In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_train_val_scaled_df = pd.DataFrame(X_train_val_scaled, columns=X_train_val.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## WideRes MLP

In [None]:
# 2. WideRes MLP, Wide‑but‑Shallow Residual MLP
#    Input → 1024 → ReLU → DO(0.3) → add(input→dense(1024))
#          → 1024 → ReLU → DO(0.3)
#          → 1024 → ReLU → DO(0.3)
#          → Output

def build_model_residual_wide(input_dim):
    inp = layers.Input(shape=(input_dim,))
    x = layers.Dense(1024, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    # Residual branch
    res = layers.Dense(1024)(inp)
    x = layers.Add()([x, res])
    x = layers.Activation('relu')(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(3, activation='softmax')(x)
    return tf.keras.Model(inp, out)

mlp_model = build_model_residual_wide(X.shape[1])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()


plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## Self‑Norm MLP

In [None]:
# 3. Self‑Norm MLP, Self‑Normalizing Network with SELU & AlphaDropout
#    Input → 512 → SELU → AD(0.1) → add(input→dense(512))
#          → 512 → SELU → AD(0.1) → add(input→dense(512))       
#          → 256 → SELU → AD(0.1) → add(input→dense(256))
#          → Output


mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='selu', input_shape=(X.shape[1],)),
        layers.AlphaDropout(0.1),
        layers.Dense(512, activation='selu'),
        layers.AlphaDropout(0.1),
        layers.Dense(256, activation='selu'),
        layers.AlphaDropout(0.1),
        layers.Dense(3, activation='softmax')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=200, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## Stacked Narrow MLP

In [None]:
# 4. Stacked Narrow MLP, Deep Narrow‑and‑Deep MLP
#    Input → 256 → LeakyReLU → DO(0.25) → add(input→dense(256))
#          → 128 → LeakyReLU → DO(0.25) → add(input→dense(128))
#          → 128 → LeakyReLU → DO(0.2)  → add(input→dense(128))
#          → 64  → LeakyReLU → DO(0.2)  → add(input→dense(64))
#          → 32  → LeakyReLU → DO(0.2)  → add(input→dense(32))
#          → 16  → LeakyReLU → DO(0.15) → add(input→dense(16))
#          → Output


mlp_model = tf.keras.Sequential([
        layers.Dense(256, input_shape=(X.shape[1],)),
        LeakyReLU(),
        layers.Dropout(0.25),
        layers.Dense(128),
        LeakyReLU(),
        layers.Dropout(0.25),
        layers.Dense(128),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(64),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(32),
        LeakyReLU(),
        layers.Dropout(0.2),
        layers.Dense(16),
        LeakyReLU(),
        layers.Dropout(0.15),
        layers.Dense(3, activation='softmax')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy') 
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## NoisyWide MLP

In [None]:
# 5. NoisyWide MLP, Gaussian Noise‑Augmented Wide MLP

mlp_model = tf.keras.Sequential([
        layers.GaussianNoise(0.1, input_shape=(X.shape[1],)),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(3, activation='softmax')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## ParallelBranch MLP

In [None]:
# 6. ParallelBranch MLP, Multi‑Branch Ensemble MLP
# Input
# ├─ Branch A: Dense(512)→ReLU→Dropout(0.3)→Dense(256)→ReLU
# └─ Branch B: Dense(512)→ReLU→Dropout(0.3)→Dense(256)→ReLU
# Concat → Dense(128)→ReLU→Dropout(0.2) → Output

def build_model_multibranch(input_dim):
    inp = layers.Input(shape=(input_dim,))
    # Branch A
    a = layers.Dense(512, activation='relu')(inp)
    a = layers.Dropout(0.3)(a)
    a = layers.Dense(256, activation='relu')(a)
    # Branch B
    b = layers.Dense(512, activation='relu')(inp)
    b = layers.Dropout(0.3)(b)
    b = layers.Dense(256, activation='relu')(b)
    # Merge
    x = layers.Concatenate()([a, b])
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    out = layers.Dense(3, activation='softmax')(x)
    return tf.keras.Model(inp, out)

mlp_model = build_model_multibranch(X.shape[1])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy') 
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()

## L2Light MLP

In [None]:
# 7. L2Light MLP, Lightweight L2‑Heavy Regularized MLP

mlp_model = tf.keras.Sequential([
        layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X.shape[1],)),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.005)),
        layers.Dropout(0.2),
        layers.Dense(3, activation='softmax')
    ])

# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  
                  metrics=['accuracy'])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

# Plot of the loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['accuracy'], 'b', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation accuracy')
plt.legend()

plt.show()

In [None]:
# Predict class probabilities
prob_predictions = mlp_model.predict(X_test_scaled)

# Get the predicted class index
class_predictions = np.argmax(prob_predictions, axis=1)

original_predictions = le.inverse_transform(class_predictions)


In [None]:
accuracy = accuracy_score(y_test, class_predictions)
precision = precision_score(y_test, class_predictions, average='weighted')
recall = recall_score(y_test, class_predictions, average='weighted')
f1 = f1_score(y_test, class_predictions, average='weighted')

y_test_bin = label_binarize(y_test, classes=[0,1,2])
roc_auc = roc_auc_score(y_test_bin, prob_predictions, multi_class='ovr', average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

print(classification_report(y_test, class_predictions))
print(confusion_matrix(y_test, class_predictions))

prob_predictions = mlp_model.predict(X_scaled)
class_predictions = np.argmax(prob_predictions, axis=1)
original_predictions = le.inverse_transform(class_predictions)
df['Predicted'] = original_predictions

print(df.tail(10))

In [None]:
background = shap.kmeans(X_train_scaled, 10)
explainer = shap.KernelExplainer(lambda x: mlp_model.predict(x), background)
shap_values = explainer.shap_values(X_test_scaled_df, nsamples=100, silent=True)

class_names = le.classes_  
for i, class_name in enumerate(class_names):
    shap.summary_plot(
        shap_values[:,:,i], 
        features=X_test_scaled_df,
        feature_names=X_test_scaled_df.columns,
        plot_type='dot',
        max_display=len(binary_features + numeric_features),
        show=False  
    )
    plt.title(f"Class {class_name}")
    plt.savefig(f"shap_plot_class_{class_name}.png")
    plt.close()


fig, axs = plt.subplots(1, 3, figsize=(16, 12))
axs = axs.flatten()

for i, class_name in enumerate(class_names):
    img = plt.imread(f"shap_plot_class_{class_name}.png")
    axs[i].imshow(img)
    axs[i].axis('off')
    axs[i].set_title(f"Class {class_name}")

plt.tight_layout()
plt.show()