In [2]:
import numpy as np
import pandas as pd
import optuna
import tensorflow as tf

from tensorflow.keras.layers import (
    Input, Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D,
    Flatten, SimpleRNN, LSTM, MultiHeadAttention, LayerNormalization,
    GlobalAveragePooling1D
)
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss

print("TensorFlow:", tf.__version__)
print("Keras:", tf.keras.__version__)
print("Optuna:", optuna.__version__)


  from .autonotebook import tqdm as notebook_tqdm


TensorFlow: 2.20.0
Keras: 3.12.0
Optuna: 4.6.0


In [4]:
df = pd.read_csv("processed_churn.csv")

X = df.drop(columns=["Churn"])
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

input_dim = X_train_scaled.shape[1]
seq_len   = input_dim

X_train_seq = np.expand_dims(X_train_scaled, axis=-1)
X_test_seq  = np.expand_dims(X_test_scaled, axis=-1)

print("Train:", X_train_scaled.shape)
print("Test:", X_test_scaled.shape)


Train: (5634, 26)
Test: (1409, 26)


In [11]:
def build_dl_model(trial, model_type, input_dim, seq_len):
    lr      = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    l2_reg  = trial.suggest_float("l2_reg", 1e-6, 1e-3, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.4)
    reg     = regularizers.l2(l2_reg)

    # =========================
    # MLP
    # =========================
    if model_type == "mlp":
        u1 = trial.suggest_int("mlp_u1", 64, 256, step=32)
        u2 = trial.suggest_int("mlp_u2", 32, 128, step=32)

        model = Sequential([
            Dense(u1, activation="relu", kernel_regularizer=reg, input_dim=input_dim),
            BatchNormalization(),
            Dropout(dropout),

            Dense(u2, activation="relu", kernel_regularizer=reg),
            BatchNormalization(),
            Dropout(dropout),

            Dense(1, activation="sigmoid")
        ])

    # =========================
    # CNN 1D
    # =========================
    elif model_type == "cnn":
        f1 = trial.suggest_int("cnn_f1", 32, 128, step=32)
        f2 = trial.suggest_int("cnn_f2", 16, 64, step=16)
        k  = trial.suggest_int("cnn_k", 2, 5)

        inputs = Input(shape=(seq_len, 1))
        x = Conv1D(f1, k, activation="relu", kernel_regularizer=reg)(inputs)
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        x = Dropout(dropout)(x)

        x = Conv1D(f2, k, activation="relu", kernel_regularizer=reg)(x)
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        x = Dropout(dropout)(x)

        x = Flatten()(x)
        x = Dense(64, activation="relu", kernel_regularizer=reg)(x)
        x = Dropout(dropout)(x)
        outputs = Dense(1, activation="sigmoid")(x)

        model = Model(inputs, outputs)

    # =========================
    # RNN
    # =========================
    elif model_type == "rnn":
        units = trial.suggest_int("rnn_units", 32, 128, step=32)

        model = Sequential([
            SimpleRNN(units, input_shape=(seq_len,1),
                      kernel_regularizer=reg, recurrent_regularizer=reg),
            Dropout(dropout),
            Dense(32, activation="relu", kernel_regularizer=reg),
            BatchNormalization(),
            Dense(1, activation="sigmoid")
        ])

    # =========================
    # LSTM
    # =========================
    elif model_type == "lstm":
        units = trial.suggest_int("lstm_units", 32, 128, step=32)

        model = Sequential([
            LSTM(units, input_shape=(seq_len,1),
                 kernel_regularizer=reg, recurrent_regularizer=reg),
            Dropout(dropout),
            Dense(32, activation="relu", kernel_regularizer=reg),
            BatchNormalization(),
            Dense(1, activation="sigmoid")
        ])

    # =========================
    # Transformer Encoder
    # =========================
    elif model_type == "transformer":
        d_model = trial.suggest_int("t_dmodel", 32, 96, step=32)
        heads   = trial.suggest_int("t_heads", 2, 4)
        ff_dim  = trial.suggest_int("t_ff", 64, 256, step=64)

        inputs = Input(shape=(seq_len,1))
        x = Dense(d_model, kernel_regularizer=reg)(inputs)

        att = MultiHeadAttention(num_heads=heads, key_dim=d_model//heads)(x, x)
        x = LayerNormalization(epsilon=1e-6)(x + att)

        ff = Dense(ff_dim, activation="relu")(x)
        ff = Dense(d_model)(ff)
        x = LayerNormalization(epsilon=1e-6)(x + ff)

        x = GlobalAveragePooling1D()(x)
        x = Dropout(dropout)(x)

        x = Dense(64, activation="relu")(x)
        x = Dropout(dropout)(x)

        outputs = Dense(1, activation="sigmoid")(x)

        model = Model(inputs, outputs)

    else:
        raise ValueError("Invalid model type.")

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

    return model


In [15]:
def objective(trial):
    tf.keras.backend.clear_session()

    model_type = trial.suggest_categorical(
        "model_type",
        ["mlp", "cnn", "rnn", "lstm", "transformer"]
    )

    X_tr, X_val, y_tr, y_val = train_test_split(
        X_train_scaled, y_train,
        test_size=0.2,
        random_state=42,
        stratify=y_train
    )

    if model_type == "mlp":
        X_tr_in = X_tr
        X_val_in = X_val
    else:
        X_tr_in = np.expand_dims(X_tr, axis=-1)
        X_val_in = np.expand_dims(X_val, axis=-1)

    model = build_dl_model(trial, model_type, input_dim, seq_len)

    early = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)

    model.fit(
        X_tr_in, y_tr,
        validation_data=(X_val_in, y_val),
        epochs=15,
        batch_size=trial.suggest_categorical("batch", [32,64,128]),
        verbose=0,
        callbacks=[early]
    )

    # =====================
    # TRAIN RESULT
    # =====================
    train_proba = model.predict(X_tr_in, verbose=0).flatten()
    train_pred  = (train_proba >= 0.5).astype(int)

    train_acc  = accuracy_score(y_tr, train_pred)
    train_auc  = roc_auc_score(y_tr, train_proba)
    train_loss = log_loss(y_tr, train_proba)

    # =====================
    # VALIDATION RESULT
    # =====================
    val_proba = model.predict(X_val_in, verbose=0).flatten()
    val_pred  = (val_proba >= 0.5).astype(int)

    val_acc  = accuracy_score(y_val, val_pred)
    val_auc  = roc_auc_score(y_val, val_proba)
    val_loss = log_loss(y_val, val_proba)

    # save them
    trial.set_user_attr("model_type", model_type)
    trial.set_user_attr("train_acc", float(train_acc))
    trial.set_user_attr("train_auc", float(train_auc))
    trial.set_user_attr("train_loss", float(train_loss))
    trial.set_user_attr("val_acc", float(val_acc))
    trial.set_user_attr("val_loss", float(val_loss))

    return val_auc


In [16]:
N_TRIALS = 20

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=N_TRIALS)

print("Best AUC:", study.best_value)
print("Best Params:", study.best_trial.params)
print("Best Model:", study.best_trial.user_attrs["model_type"])


[I 2025-12-06 21:20:43,611] A new study created in memory with name: no-name-2eb9c8b4-3d63-4703-97c8-76e0a60e9c0a


  super().__init__(**kwargs)
[I 2025-12-06 21:20:59,768] Trial 0 finished with value: 0.8289628875640218 and parameters: {'model_type': 'lstm', 'lr': 0.001507423277447287, 'l2_reg': 1.945916438398404e-06, 'dropout': 0.13661754251554234, 'lstm_units': 32, 'batch': 32}. Best is trial 0 with value: 0.8289628875640218.
[I 2025-12-06 21:21:04,329] Trial 1 finished with value: 0.8297343803014882 and parameters: {'model_type': 'cnn', 'lr': 0.0005884454493730873, 'l2_reg': 7.3723389208437366e-06, 'dropout': 0.2700734744217837, 'cnn_f1': 96, 'cnn_f2': 64, 'cnn_k': 3, 'batch': 128}. Best is trial 1 with value: 0.8297343803014882.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[I 2025-12-06 21:21:08,497] Trial 2 finished with value: 0.8343431405813259 and parameters: {'model_type': 'mlp', 'lr': 0.00010965498492198123, 'l2_reg': 0.0005113627253088087, 'dropout': 0.09768300898496399, 'mlp_u1': 64, 'mlp_u2': 32, 'batch': 128}. Best is trial 2 with value: 0.8343431405813259.


Best AUC: 0.8482704021456383
Best Params: {'model_type': 'rnn', 'lr': 0.0003557479919953826, 'l2_reg': 1.581789284287251e-05, 'dropout': 0.3484681157034343, 'rnn_units': 128, 'batch': 64}
Best Model: rnn


In [18]:
rows = []
for t in study.trials:
    if t.state.name != "COMPLETE":
        continue

    rows.append({
        "model_type": t.user_attrs["model_type"],
        "train_acc":  t.user_attrs["train_acc"],
        "val_acc":    t.user_attrs["val_acc"],
        "train_auc":  t.user_attrs["train_auc"],
        "val_auc":    t.value,
        "train_loss": t.user_attrs["train_loss"],
        "val_loss":   t.user_attrs["val_loss"]
    })

df_dl = pd.DataFrame(rows)

best_per_model = (
    df_dl.sort_values("val_auc", ascending=False)
         .groupby("model_type")
         .first()
         .reset_index()
)

print("\n=== Final Deep Learning Comparison Table ===\n")
display(best_per_model)



=== Final Deep Learning Comparison Table ===



Unnamed: 0,model_type,train_acc,val_acc,train_auc,val_auc,train_loss,val_loss
0,cnn,0.795207,0.80213,0.845896,0.842175,0.435847,0.439106
1,lstm,0.799201,0.800355,0.838617,0.828963,0.425759,0.434288
2,mlp,0.81562,0.813665,0.868553,0.842555,0.403816,0.427278
3,rnn,0.797204,0.801242,0.853521,0.84827,0.427354,0.430395
4,transformer,0.745729,0.737356,0.693464,0.685461,0.5339,0.539155


In [19]:
best_lr = 0.0005694339909914094
best_l2 = 4.982806891517478e-06
best_dropout = 0.3179818384226337
best_u1 = 256
best_u2 = 128
best_batch = 32

reg = tf.keras.regularizers.l2(best_l2)

In [20]:
def build_best_mlp():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(best_u1, activation="relu", kernel_regularizer=reg, input_dim=input_dim),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(best_dropout),

        tf.keras.layers.Dense(best_u2, activation="relu", kernel_regularizer=reg),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(best_dropout),

        tf.keras.layers.Dense(1, activation="sigmoid")
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=best_lr)
    model.compile(
        loss="binary_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )
    return model

In [21]:
best_mlp_model = build_best_mlp()
best_mlp_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
early = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=4,
    restore_best_weights=True
)

In [23]:
history = best_mlp_model.fit(
    X_train_scaled, 
    y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=best_batch,
    verbose=1,
    callbacks=[early]
)

Epoch 1/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7124 - loss: 0.6189 - val_accuracy: 0.7391 - val_loss: 0.5035
Epoch 2/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7659 - loss: 0.5111 - val_accuracy: 0.7737 - val_loss: 0.4699
Epoch 3/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7797 - loss: 0.4774 - val_accuracy: 0.7959 - val_loss: 0.4422
Epoch 4/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7854 - loss: 0.4620 - val_accuracy: 0.7933 - val_loss: 0.4477
Epoch 5/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7945 - loss: 0.4480 - val_accuracy: 0.7870 - val_loss: 0.4522
Epoch 6/20
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7934 - loss: 0.4416 - val_accuracy: 0.7773 - val_loss: 0.4533
Epoch 7/20
[1m141/141[0m 

In [24]:
test_proba = best_mlp_model.predict(X_test_scaled).flatten()
test_pred = (test_proba >= 0.5).astype(int)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [25]:
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, classification_report

test_acc  = accuracy_score(y_test, test_pred)
test_auc  = roc_auc_score(y_test, test_proba)
test_loss = log_loss(y_test, test_proba)

print("===== FINAL MLP TEST RESULTS =====")
print("Test Accuracy:", test_acc)
print("Test AUC:", test_auc)
print("Test Log Loss:", test_loss)
print("\nClassification Report:\n", classification_report(y_test, test_pred))


===== FINAL MLP TEST RESULTS =====
Test Accuracy: 0.7934705464868701
Test AUC: 0.8429900023250407
Test Log Loss: 0.4226578774074473

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.92      0.87      1035
           1       0.66      0.45      0.54       374

    accuracy                           0.79      1409
   macro avg       0.74      0.68      0.70      1409
weighted avg       0.78      0.79      0.78      1409



In [26]:
best_mlp_model.save("best_deep_learning_model.h5")
print("\nModel saved as best_deep_learning_model.h5")




Model saved as best_deep_learning_model.h5


In [28]:
import joblib
joblib.dump(scaler, "scaler.pkl")
print("Scaler saved as scaler.pkl")


Scaler saved as scaler.pkl
