In [1]:
# Install dependencies (add imbalanced-learn)
%pip install pandas matplotlib scikit-learn imbalanced-learn --quiet

from features import get_train_test_data

x_train, x_test, y_train, y_test = get_train_test_data("data/bank_data_train.csv")

print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Display first 5 rows of x_train
x_train.head()

Note: you may need to restart the kernel to use updated packages.
x_train shape: (284152, 32)
x_test shape: (71038, 32)
y_train shape: (284152,)
y_test shape: (71038,)


Unnamed: 0,CR_PROD_CNT_IL,TURNOVER_DYNAMIC_IL_1M,REST_DYNAMIC_FDEP_1M,REST_DYNAMIC_SAVE_3M,CR_PROD_CNT_VCU,REST_AVG_CUR,CR_PROD_CNT_TOVR,CR_PROD_CNT_PIL,TURNOVER_CC,TURNOVER_PAYM,AGE,CR_PROD_CNT_CC,REST_DYNAMIC_FDEP_3M,REST_DYNAMIC_IL_1M,CR_PROD_CNT_CCFP,REST_DYNAMIC_CUR_1M,REST_AVG_PAYM,LDEAL_GRACE_DAYS_PCT_MED,REST_DYNAMIC_CUR_3M,TURNOVER_DYNAMIC_CUR_1M,REST_DYNAMIC_PAYM_3M,REST_DYNAMIC_IL_3M,TURNOVER_DYNAMIC_IL_3M,REST_DYNAMIC_PAYM_1M,TURNOVER_DYNAMIC_CUR_3M,CLNT_SETUP_TENOR,TURNOVER_DYNAMIC_PAYM_3M,TURNOVER_DYNAMIC_PAYM_1M,REST_DYNAMIC_CC_1M,TURNOVER_DYNAMIC_CC_1M,REST_DYNAMIC_CC_3M,TURNOVER_DYNAMIC_CC_3M
0,2.061893,-0.044935,-0.052273,-0.312231,5.244695,-0.110851,1.176112,-0.192559,-0.038441,-0.093385,-0.976263,-0.242002,-0.086736,-0.068473,-0.064618,0.25893,-0.151018,-0.045033,0.283547,0.190178,-0.375206,-0.099585,-0.075343,-0.284607,0.326273,-0.610612,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
1,-0.244365,-0.044935,-0.052273,2.151389,-0.169621,0.192015,-0.526426,-0.192559,-0.038441,-0.093385,-1.064184,-0.242002,-0.086736,-0.068473,-0.064618,-0.286218,-0.151018,-0.045033,-0.22763,0.140643,-0.375206,-0.099585,-0.075343,-0.284607,-0.316017,1.144931,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
2,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.260938,-0.526426,-0.192559,-0.038441,-0.093385,1.837201,-0.242002,-0.086736,-0.068473,-0.064618,-0.433742,-0.151018,-0.045033,-0.448334,-0.392441,-0.375206,-0.099585,-0.075343,-0.284607,-0.661855,-1.081618,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
3,2.061893,-0.044935,-0.052273,-0.312231,-0.169621,-0.332548,1.176112,-0.192559,-0.038441,-0.093385,-0.184976,-0.242002,-0.086736,-0.068473,-0.064618,0.039323,-0.151018,-0.045033,0.836577,0.003751,-0.375206,-0.099585,-0.075343,-0.284607,0.89938,-0.425565,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
4,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.29102,1.176112,-0.192559,-0.038441,-0.093385,-1.327946,-0.242002,-0.086736,-0.068473,-0.064618,0.799208,-0.151018,-0.045033,1.675035,1.528313,-0.375206,-0.099585,-0.075343,-0.284607,1.55469,-0.719649,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104


In [2]:
# print name of features used for training
print(f"Features used for training: {x_train.columns.tolist()}")

Features used for training: ['CR_PROD_CNT_IL', 'TURNOVER_DYNAMIC_IL_1M', 'REST_DYNAMIC_FDEP_1M', 'REST_DYNAMIC_SAVE_3M', 'CR_PROD_CNT_VCU', 'REST_AVG_CUR', 'CR_PROD_CNT_TOVR', 'CR_PROD_CNT_PIL', 'TURNOVER_CC', 'TURNOVER_PAYM', 'AGE', 'CR_PROD_CNT_CC', 'REST_DYNAMIC_FDEP_3M', 'REST_DYNAMIC_IL_1M', 'CR_PROD_CNT_CCFP', 'REST_DYNAMIC_CUR_1M', 'REST_AVG_PAYM', 'LDEAL_GRACE_DAYS_PCT_MED', 'REST_DYNAMIC_CUR_3M', 'TURNOVER_DYNAMIC_CUR_1M', 'REST_DYNAMIC_PAYM_3M', 'REST_DYNAMIC_IL_3M', 'TURNOVER_DYNAMIC_IL_3M', 'REST_DYNAMIC_PAYM_1M', 'TURNOVER_DYNAMIC_CUR_3M', 'CLNT_SETUP_TENOR', 'TURNOVER_DYNAMIC_PAYM_3M', 'TURNOVER_DYNAMIC_PAYM_1M', 'REST_DYNAMIC_CC_1M', 'TURNOVER_DYNAMIC_CC_1M', 'REST_DYNAMIC_CC_3M', 'TURNOVER_DYNAMIC_CC_3M']


In [3]:
# keras neural network classifier
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

model = Sequential([
    Dense(64, activation='relu', input_dim=x_train.shape[1]),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

import keras
from keras.callbacks import Callback

class StopAtAUC(Callback):
    def __init__(self, target_auc=0.85, monitor='val_auc'):
        super().__init__()
        self.target_auc = target_auc
        self.monitor = monitor
    
    def on_epoch_end(self, _, logs=None):
        current_auc = logs.get(self.monitor)
        current_auc = float(current_auc) if current_auc is not None else None
        if current_auc is not None and current_auc >= self.target_auc:
            print(f"\n\nReached target AUC of {self.target_auc:.4f}! Stopping training.")
            self.model.stop_training = True

stop_at_auc = StopAtAUC(target_auc=0.85, monitor='auc')

LOSS_FUNCTION = 'binary_crossentropy'
LEARNING_RATE = 0.001
EPOCHS = 300
BATCH_SIZE = 1024

model.compile(loss=LOSS_FUNCTION, optimizer=Adam(learning_rate=LEARNING_RATE), metrics=[keras.metrics.AUC(name='auc')])

model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[stop_at_auc], verbose=1)

2025-12-29 21:05:15.663583: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-29 21:05:15.663823: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-29 21:05:15.689137: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-29 21:05:16.496351: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

Epoch 1/300


  if not hasattr(np, "object"):
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
E0000 00:00:1767038716.889810  302647 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1767038716.894339  302647 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - auc: 0.6370 - loss: 0.2845
Epoch 2/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7371 - loss: 0.2554
Epoch 3/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7567 - loss: 0.2497
Epoch 4/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7693 - loss: 0.2457
Epoch 5/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7792 - loss: 0.2427
Epoch 6/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7853 - loss: 0.2408
Epoch 7/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7899 - loss: 0.2393
Epoch 8/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7947 - loss: 0.2377
Epoch 9/300
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/st

<keras.src.callbacks.history.History at 0x7cefb12ad940>

In [4]:
LIBRARYNAME = "keras"
ALGORITHMNAME = "neural_network"

metrics = model.get_metrics_result()
AUC = metrics['auc']
LOSS = metrics['loss']

import pandas as pd

print(model.summary())

pd.DataFrame({
    'Library': [LIBRARYNAME],
    'Algorithm': [ALGORITHMNAME],
    'LossFunction': [LOSS_FUNCTION],
    'LearningRate': [LEARNING_RATE],
    'Epochs': [EPOCHS],
    'BatchSize': [BATCH_SIZE],
    'AUC': [AUC],
    'Loss': [LOSS]
})

None


Unnamed: 0,Library,Algorithm,LossFunction,LearningRate,Epochs,BatchSize,AUC,Loss
0,keras,neural_network,binary_crossentropy,0.001,300,1024,0.85034,0.213116


In [None]:
from features import get_test_data

selected_features = x_train.columns.tolist()
df = get_test_data("data/bank_data_test.csv", selected_features)
test_df = df[selected_features]

predictions = model.predict(test_df)
predictions

[1m2775/2775[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 396us/step


array([[0.43752006],
       [0.00520549],
       [0.01614418],
       ...,
       [0.04137262],
       [0.00052582],
       [0.37889782]], shape=(88798, 1), dtype=float32)

In [6]:
# save probabilities to csv file with ID and TARGET column
output_df = pd.DataFrame({
    'ID': df['ID'],
    'TARGET': predictions.flatten()
})
output_df.to_csv("churn_keras_predictions.csv", index=False)
output_df.head()

Unnamed: 0,ID,TARGET
0,400980,0.43752
1,525062,0.005205
2,280316,0.016144
3,496066,0.021359
4,375031,0.000891
