# TTP: conventional NN models

In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
#Import sklearn libraries for data preprocessing
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
#Import keras libraries for building the models
from tensorflow.keras import layers, regularizers
#Import libraries for model evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error
#Import SHAP library for model interpretability
import shap

# for reproducibility, the value is set for conventional reasons
SEED = 42
tf.keras.utils.set_random_seed(SEED)

In [None]:
# load the data
data = pd.read_csv('dataset_d.csv', encoding='latin-1', sep=',') # request the dataset to the author
#data.head()

In [None]:
# target column : "progression", regression task
# relevant columns for the model
relevant_columns = [ 'age', 'sex', 'smoking', 'ps_at_diagnosis_ad', 'n#_mets_sites', 'lung_only_m1', 'pleural', 'pericard', 'lymph_nodes_only_m1', 'soft_tissue',
    'leptomingeal','skin','peritoneal','renal','pancreas', 'brain', 'liver', 'bone', 'adrenal', 'histology', 'hbbaselineio','leucotbaselineio',
    'neut_abs...143','linfo_abs...144','baso_abs...145', 'mono_abs...147', 'plaqtbaselineio', 'progression']

df = data[relevant_columns]
df.shape

In [None]:
df = df.dropna(axis=0)
df.shape

In [None]:
#to randomize the data
df = df.sample(frac=1, random_state=SEED)

var_int = ['ps_at_diagnosis_ad', 'n#_mets_sites', 'lung_only_m1', 'pleural', 'pericard', 'lymph_nodes_only_m1', 'soft_tissue',
    'leptomingeal','skin','peritoneal','renal','pancreas', 'brain', 'liver', 'bone', 'adrenal']
for i in var_int:
    df[i] = df[i].astype(int)
    
df['sex'] = df['sex'].str.lower()

# one-hot encoding
one_hot_data = pd.get_dummies(df, columns=['histology', 'sex', 'smoking'])

cols_to_convert = ['histology_adenocarcinoma','histology_nsclc', 'histology_squamous', 'sex_female', 'sex_male',
                   'smoking_current', 'smoking_former', 'smoking_non-smoker']

one_hot_data[cols_to_convert] = one_hot_data[cols_to_convert].astype(int)

In [None]:
# split the data into features and target
X = one_hot_data[one_hot_data.columns.difference(['progression'])] 
y = df['progression']

In [None]:
# split the data into training, validation and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

X_train.shape, X_val.shape, X_test.shape

In [None]:
binary_features = ['lung_only_m1', 'pleural', 'pericard', 'lymph_nodes_only_m1', 'soft_tissue', 'leptomingeal','skin','peritoneal','renal',
                   'pancreas', 'brain', 'liver', 'bone', 'adrenal','histology_adenocarcinoma', 'histology_nsclc', 
                   'histology_squamous', 'sex_female', 'sex_male','smoking_current', 'smoking_former', 'smoking_non-smoker']
numeric_features = ['hbbaselineio','leucotbaselineio', 'neut_abs...143','linfo_abs...144','baso_abs...145', 'mono_abs...147',
                    'plaqtbaselineio', 'age', 'ps_at_diagnosis_ad', 'n#_mets_sites', ]


X_train_scaled = X_train.copy()
X_val_scaled = X_val.copy() 
X_test_scaled = X_test.copy()
X_train_val_scaled = X_train_val.copy()

scaler = StandardScaler()

X_train_val_scaled[numeric_features] = scaler.fit_transform(X_train_val_scaled[numeric_features])
X_train_scaled[numeric_features] = scaler.fit_transform(X_train_scaled[numeric_features])
X_val_scaled[numeric_features] = scaler.transform(X_val_scaled[numeric_features])
X_test_scaled[numeric_features] = scaler.transform(X_test_scaled[numeric_features])

In [None]:
def smape_f(y_true, y_pred):
    epsilon = tf.keras.backend.epsilon()
    denominator = tf.maximum(
        (tf.abs(y_true) + tf.abs(y_pred) + epsilon) / 2.0,
        epsilon
    )
    diff = tf.abs(y_true - y_pred)
    return 100 * tf.reduce_mean(diff / denominator)

In [None]:
# model with 1 hidden layer with 512 neurons and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X.shape[1],)),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # linea ideale y=x
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 1 hidden layer with 512 neurons, L2 regularization 0.01 and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)
plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()


plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 1 hidden layer with 512 neurons, Norm layer, L2 regularization 0.01 and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.LayerNormalization(),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 1 hidden layers with 512 neurons, Batch layer, L2 regularization 0.01 and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])



# compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae', 
                  metrics=['mae', 'mse', smape_f])

# train the model
history = mlp_model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of the MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 2 hidden layers, each of them with 512 neurons, L2 regularization 0.01 and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # linea ideale y=x
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 2 hidden layers, each of them with 512 neurons, Norm layer, L2 regularization 0.01 and dropout 0.4
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l2(0.01)), 
    layers.LayerNormalization(center=True, scale=True),
    layers.Dropout(0.4),  
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # linea ideale y=x
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)


In [None]:
# model with 2 hidden layers, each of them with 512 neurons, Norm layer, ElasticNet and dropout 0.2
mlp_model = tf.keras.models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(X_train_scaled.shape[1],), kernel_regularizer=regularizers.l1_l2(l1=0.001, l2=0.01)), 
    layers.LayerNormalization(center=True, scale=True),
    layers.Dropout(0.2),  
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.001, l2=0.01)),
    layers.Dropout(0.2),
    layers.Dense(1, activation='linear')
])


# Compile the model
mlp_model.compile(optimizer='adam',
                  loss='mae',  
                  metrics=['mae', 'mse', smape_f])

# Train the model
history = mlp_model.fit(X_train_scaled, y_train,
                         validation_data=(X_val_scaled, y_val),
                         epochs=500, batch_size=16)

epochs = range(1, len(history.history['loss']) + 1)

plt.figure(figsize=(12, 5))

# Plot of the Loss 
plt.subplot(1, 2, 1)
plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot of MAE 
plt.subplot(1, 2, 2)
plt.plot(epochs, history.history['mae'], 'b', label='Training MAE')
plt.plot(epochs, history.history['val_mae'], 'r', label='Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred = mlp_model.predict(X_test_scaled).flatten()  

# Main metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / (y_test + 1e-8))) * 100  # 1e-8 to avoid division by zero
smape = 100/len(y_test) * np.sum(2 * np.abs(y_pred - y_test) / (np.abs(y_test) + np.abs(y_pred) + 1e-8))

# Print results
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")


results = pd.DataFrame({
    'ttp_true': y_test,       # real values
    'ttp_pred': y_pred        # predicted values
})

print(results.tail(10))

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # linea ideale y=x
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Predicted vs True Values")
plt.show()


In [None]:
explainer = shap.Explainer(mlp_model, X_train_scaled_df)  

shap_values = explainer(X_test_scaled_df)

print("SHAP summary plot:")
shap.plots.beeswarm(
    shap_values,
    max_display=len(binary_features + numeric_features)
)
