In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, mean_absolute_error, accuracy_score
import tensorflow as tf
from tensorflow.keras import Model, Input, layers, losses, metrics, optimizers, callbacks
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.base import BaseEstimator

In [2]:
# 1. Chargement et Prétraitement
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_excel("/content/drive/MyDrive/GastricCancerData.xlsx", skiprows=1)
#df = pd.read_excel("D:/UADB/MEMOIRE/Data/GastricCancerData.xlsx")

In [3]:
# Encodage des variables catégorielles
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# Renommer la colonne pour éviter les espaces et caractères spéciaux
df.rename(columns={"Tempsdesuivi (Mois)": "Tempsdesuivi"}, inplace=True)

In [5]:
# Pour la variable 'Deces'
print(df['Deces'].value_counts(normalize=True).round(2))
# Pour la variable 'Traitement'
print(df['Traitement'].value_counts(normalize=True).round(2))

Deces
1    0.66
0    0.34
Name: proportion, dtype: float64
Traitement
1    0.55
0    0.45
Name: proportion, dtype: float64


In [6]:
# Séparation des caractéristiques et des cibles
X = df.drop(['Traitement', 'Deces', 'Tempsdesuivi'], axis=1)
y_traitement = df['Traitement']
y_deces = df['Deces']
y_survie = df['Tempsdesuivi']

# Normalisation
num_scaler = StandardScaler()
surv_scaler = StandardScaler()

X[['AGE']] = num_scaler.fit_transform(X[['AGE']])
y_survie = surv_scaler.fit_transform(y_survie.values.reshape(-1, 1))

# Split stratifié sur Deces
X_train, X_test, yt_train, yt_test, yd_train, yd_test, ys_train, ys_test = train_test_split(
    X, y_traitement, y_deces, y_survie, 
    test_size=0.2, 
    stratify=y_deces,  
    random_state=42
)

In [7]:
# 2. Combiner les sorties dans une seule variable cible
# On combine les 3 sorties en une matrice de forme (n_samples, 3)
# Pour les tâches de classification, nous utilisons des valeurs telles qu'elles sont
y_train_combined = np.column_stack([yt_train.values, yd_train.values, ys_train])
y_test_combined  = np.column_stack([yt_test.values, yd_test.values, ys_test])

# ---------------------------
# 3. Définition de l'architecture du modèle
# ---------------------------
def build_advanced_model(input_shape):
    inputs = Input(shape=(input_shape,))
    
    x = layers.Dense(512, activation='swish', kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.6)(x)
    
    x = layers.Dense(256, activation='swish', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    
    x = layers.Dense(128, activation='swish', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    
    # Branche 1 : Traitement (classification binaire ou multiclasse)
    t = layers.Dense(64, activation='swish')(x)
    t = layers.Dense(1, activation='sigmoid', name='Traitement')(t)
    
    # Branche 2 : Décès (classification binaire)
    d = layers.Dense(64, activation='swish')(x)
    d = layers.Dense(1, activation='sigmoid', name='Deces')(d)
    
    # Branche 3 : Temps de survie (régression)
    s = layers.Dense(64, activation='swish')(x)
    s = layers.Dense(1, activation='linear', name='Tempsdesuivi')(s)
    
    return Model(inputs=inputs, outputs=[t, d, s])

In [9]:
# ---------------------------
# 4. Wrapper personnalisé pour GridSearchCV
# ---------------------------
class MultiOutputKerasModel(BaseEstimator):
    def __init__(self, learning_rate=1e-3, epochs=100, batch_size=64, verbose=0):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model_ = None
    
    def build_model(self):
        model = build_advanced_model(X_train.shape[1])
        model.compile(
            optimizer=optimizers.Adam(learning_rate=self.learning_rate),
            loss={
                'Traitement': losses.BinaryCrossentropy(),
                'Deces': losses.BinaryCrossentropy(),
                'Tempsdesuivi': losses.Huber()
            },
            loss_weights=[0.25, 0.45, 0.3],
            metrics={
                'Traitement': metrics.BinaryAccuracy(),
                'Deces': [metrics.BinaryAccuracy(), metrics.AUC(name='auc')],
                'Tempsdesuivi': metrics.MeanAbsoluteError()
            }
        )
        return model
    
    def fit(self, X, y):
        # y est une matrice de forme (n_samples, 3)
        yt_true = y[:, 0]
        yd_true = y[:, 1]
        ys_true = y[:, 2].reshape(-1, 1)
        
        self.model_ = self.build_model()
        self.history_ = self.model_.fit(
            X,
            {'Traitement': yt_true, 'Deces': yd_true, 'Tempsdesuivi': ys_true},
            epochs=self.epochs,
            batch_size=self.batch_size,
            validation_split=0.2,
            verbose=self.verbose
        )
        return self
    
    def predict(self, X):
        return self.model_.predict(X)
    
    def score(self, X, y):
        # On définit le score comme la moyenne des BinaryAccuracy pour Traitement et Décès
        yt_true = y[:, 0]
        yd_true = y[:, 1]
        ys_true = y[:, 2].reshape(-1, 1)
        results = self.model_.evaluate(
            X, 
            {'Traitement': yt_true, 'Deces': yd_true, 'Tempsdesuivi': ys_true},
            verbose=0
        )
        # On suppose que results[4] = Traitement BinaryAccuracy et results[5] = Décès BinaryAccuracy
        return (results[4] + results[5]) / 2.

In [10]:
# ---------------------------
# 5. Optimisation des hyperparamètres via GridSearchCV
# ---------------------------
param_grid = {
    'epochs': [100, 150],
    'batch_size': [32, 64],
    'learning_rate': [1e-3, 1e-4]
}

estimator = MultiOutputKerasModel(verbose=1)
grid_search = GridSearchCV(estimator=estimator, param_grid=param_grid, n_jobs=1, cv=3, scoring='accuracy')

grid_search.fit(X_train.values, y_train_combined)
print(f"Meilleurs hyperparamètres : {grid_search.best_params_}")

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 402ms/step - Deces_auc: 0.5409 - Deces_binary_accuracy: 0.5697 - Deces_loss: 0.7317 - Tempsdesuivi_loss: 0.8279 - Tempsdesuivi_mean_absolute_error: 1.2427 - Traitement_binary_accuracy: 0.4951 - Traitement_loss: 0.8030 - loss: 0.7790 - val_Deces_auc: 0.8600 - val_Deces_binary_accuracy: 0.7500 - val_Deces_loss: 0.5917 - val_Tempsdesuivi_loss: 0.4601 - val_Tempsdesuivi_mean_absolute_error: 0.8374 - val_Traitement_binary_accuracy: 0.4167 - val_Traitement_loss: 0.7164 - val_loss: 0.5791
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - Deces_auc: 0.6177 - Deces_binary_accuracy: 0.6160 - Deces_loss: 0.6758 - Tempsdesuivi_loss: 0.7776 - Tempsdesuivi_mean_absolute_error: 1.2110 - Traitement_binary_accuracy: 0.4864 - Traitement_loss: 0.8548 - loss: 0.7557 - val_Deces_auc: 0.9618 - val_Deces_binary_accuracy: 0.7778 - val_Deces_loss: 0.5297 - val_Tempsdesuivi_loss: 0.4020 - val_Tempsdesu

Traceback (most recent call last):
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 400ms/step - Deces_auc: 0.4997 - Deces_binary_accuracy: 0.5511 - Deces_loss: 0.8675 - Tempsdesuivi_loss: 0.8604 - Tempsdesuivi_mean_absolute_error: 1.2877 - Traitement_binary_accuracy: 0.5236 - Traitement_loss: 0.7946 - loss: 0.8538 - val_Deces_auc: 0.8309 - val_Deces_binary_accuracy: 0.8056 - val_Deces_loss: 0.5674 - val_Tempsdesuivi_loss: 0.3977 - val_Tempsdesuivi_mean_absolute_error: 0.7265 - val_Traitement_binary_accuracy: 0.6389 - val_Traitement_loss: 0.7170 - val_loss: 0.5435
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - Deces_auc: 0.6007 - Deces_binary_accuracy: 0.6091 - Deces_loss: 0.7739 - Tempsdesuivi_loss: 0.6209 - Tempsdesuivi_mean_absolute_error: 1.0311 - Traitement_binary_accuracy: 0.5593 - Traitement_loss: 0.7762 - loss: 0.7279 - val_Deces_auc: 0.9145 - val_Deces_binary_accuracy: 0.8333 - val_Deces_loss: 0.5106 - val_Tempsdesuivi_loss: 0.3671 - val_Tempsdesu

Traceback (most recent call last):
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Sefdine\AppData\Local\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 367ms/step - Deces_auc: 0.3754 - Deces_binary_accuracy: 0.3836 - Deces_loss: 1.0954 - Tempsdesuivi_loss: 0.9586 - Tempsdesuivi_mean_absolute_error: 1.3974 - Traitement_binary_accuracy: 0.4933 - Traitement_loss: 0.9261 - loss: 1.0160 - val_Deces_auc: 0.4327 - val_Deces_binary_accuracy: 0.3333 - val_Deces_loss: 0.7861 - val_Tempsdesuivi_loss: 0.5056 - val_Tempsdesuivi_mean_absolute_error: 0.8581 - val_Traitement_binary_accuracy: 0.4722 - val_Traitement_loss: 0.7014 - val_loss: 0.6469
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - Deces_auc: 0.4929 - Deces_binary_accuracy: 0.4449 - Deces_loss: 1.0065 - Tempsdesuivi_loss: 0.8351 - Tempsdesuivi_mean_absolute_error: 1.2499 - Traitement_binary_accuracy: 0.6195 - Traitement_loss: 0.9279 - loss: 0.9329 - val_Deces_auc: 0.4818 - val_Deces_binary_accuracy: 0.3333 - val_Deces_loss: 0.7550 - val_Tempsdesuivi_loss: 0.4967 - val_Tempsdesu



Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 468ms/step - Deces_auc: 0.3517 - Deces_binary_accuracy: 0.4171 - Deces_loss: 0.9723 - Tempsdesuivi_loss: 0.9496 - Tempsdesuivi_mean_absolute_error: 1.3714 - Traitement_binary_accuracy: 0.5070 - Traitement_loss: 0.7735 - loss: 0.9167 - val_Deces_auc: 0.8603 - val_Deces_binary_accuracy: 0.7778 - val_Deces_loss: 0.6135 - val_Tempsdesuivi_loss: 0.3792 - val_Tempsdesuivi_mean_absolute_error: 0.8263 - val_Traitement_binary_accuracy: 0.6481 - val_Traitement_loss: 0.6486 - val_loss: 0.5554
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - Deces_auc: 0.7013 - Deces_binary_accuracy: 0.6633 - Deces_loss: 0.6460 - Tempsdesuivi_loss: 0.7929 - Tempsdesuivi_mean_absolute_error: 1.1862 - Traitement_binary_accuracy: 0.4646 - Traitement_loss: 0.9153 - loss: 0.7579 - val_Deces_auc: 0.8904 - val_Deces_binary_accuracy: 0.8704 - val_Deces_loss: 0.5374 - val_Tempsdesuivi_loss: 0.2708 - val_Tempsdesu

In [11]:
# ---------------------------
# 6. Entraînement final avec les meilleurs hyperparamètres
# ---------------------------
best_estimator = grid_search.best_estimator_

# Définition des callbacks
cb = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=5)
]

history = best_estimator.model_.fit(
    X_train,
    {'Traitement': yt_train, 'Deces': yd_train, 'Tempsdesuivi': ys_train},
    validation_data=(X_test, {'Traitement': yt_test, 'Deces': yd_test, 'Tempsdesuivi': ys_test}),
    epochs=best_estimator.epochs,
    batch_size=best_estimator.batch_size,
    callbacks=cb,
    verbose=1
)

Epoch 1/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 226ms/step - Deces_auc: 0.9552 - Deces_binary_accuracy: 0.8746 - Deces_loss: 0.2500 - Tempsdesuivi_loss: 0.1728 - Tempsdesuivi_mean_absolute_error: 0.4469 - Traitement_binary_accuracy: 0.6721 - Traitement_loss: 0.6026 - loss: 0.3154 - val_Deces_auc: 0.8686 - val_Deces_binary_accuracy: 0.8088 - val_Deces_loss: 0.8775 - val_Tempsdesuivi_loss: 0.1972 - val_Tempsdesuivi_mean_absolute_error: 0.3690 - val_Traitement_binary_accuracy: 0.6176 - val_Traitement_loss: 0.7188 - val_loss: 0.5234 - learning_rate: 0.0010
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - Deces_auc: 0.9588 - Deces_binary_accuracy: 0.8953 - Deces_loss: 0.2353 - Tempsdesuivi_loss: 0.1994 - Tempsdesuivi_mean_absolute_error: 0.4834 - Traitement_binary_accuracy: 0.6608 - Traitement_loss: 0.6024 - loss: 0.3157 - val_Deces_auc: 0.8696 - val_Deces_binary_accuracy: 0.8088 - val_Deces_loss: 0.8957 - val_Tempsdesuivi_loss

In [19]:
# ---------------------------
# 7. Évaluation du modèle
# ---------------------------
def evaluate_model(model, X_test, yt_test, yd_test, ys_test):
    results = model.evaluate(X_test, {'Traitement': yt_test, 'Deces': yd_test, 'Tempsdesuivi': ys_test}, verbose=0)
    print(f"Test Loss: {results[0]:.2f}")
    print(f"Traitement - Accuracy: {results[4]:.2%}")
    print(f"Décès - Accuracy: {results[5]:.2%}  AUC: {results[6]:.2f}")
    print(f"Survie - MAE (scaled): {results[7]:.2f}")
    
    preds = model.predict(X_test)
    yt_pred = (preds[0] > 0.5).astype(int)
    yd_pred = (preds[1] > 0.5).astype(int)
    ys_pred = surv_scaler.inverse_transform(preds[2])
    
    print("\nMatrice de confusion (Décès):")
    print(confusion_matrix(yd_test, yd_pred))
    
    mae_real = mean_absolute_error(surv_scaler.inverse_transform(ys_test.reshape(-1, 1)), ys_pred)
    print(f"\nMAE survie réelle : {mae_real:.2f}")

evaluate_model(best_estimator.model_, X_test, yt_test, yd_test, ys_test)

Test Loss: 0.52
Traitement - Accuracy: 86.86%
Décès - Accuracy: 80.88%  AUC: 0.37
Survie - MAE (scaled): 0.62
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Matrice de confusion (Décès):
[[17  6]
 [ 7 38]]

MAE survie réelle : 8.81


In [20]:
# ---------------------------
# 8. Exemple de prédiction
# ---------------------------
sample = X_test.iloc[:1]
pred = best_estimator.model_.predict(sample)
print(f"\nExemple de prédiction:")
print(f"Prob Traitement: {pred[0][0][0]:.2%}")
print(f"Prob Décès: {pred[1][0][0]:.2%}")
print(f"Temps de survie prédit: {surv_scaler.inverse_transform(pred[2])[0][0]:.0f} mois")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step

Exemple de prédiction:
Prob Traitement: 70.77%
Prob Décès: 97.21%
Temps de survie prédit: 7 mois
