In [1]:
# Install dependencies (add imbalanced-learn)
%pip install pandas matplotlib scikit-learn imbalanced-learn --quiet

from features import get_train_test_data

x_train, x_test, y_train, y_test = get_train_test_data("data/bank_data_train.csv")

print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Display first 5 rows of x_train
x_train.head()

Note: you may need to restart the kernel to use updated packages.
x_train shape: (284152, 32)
x_test shape: (71038, 32)
y_train shape: (284152,)
y_test shape: (71038,)


Unnamed: 0,CR_PROD_CNT_IL,TURNOVER_DYNAMIC_IL_1M,REST_DYNAMIC_FDEP_1M,REST_DYNAMIC_SAVE_3M,CR_PROD_CNT_VCU,REST_AVG_CUR,CR_PROD_CNT_TOVR,CR_PROD_CNT_PIL,TURNOVER_CC,TURNOVER_PAYM,AGE,CR_PROD_CNT_CC,REST_DYNAMIC_FDEP_3M,REST_DYNAMIC_IL_1M,CR_PROD_CNT_CCFP,REST_DYNAMIC_CUR_1M,REST_AVG_PAYM,LDEAL_GRACE_DAYS_PCT_MED,REST_DYNAMIC_CUR_3M,TURNOVER_DYNAMIC_CUR_1M,REST_DYNAMIC_PAYM_3M,REST_DYNAMIC_IL_3M,TURNOVER_DYNAMIC_IL_3M,REST_DYNAMIC_PAYM_1M,TURNOVER_DYNAMIC_CUR_3M,CLNT_SETUP_TENOR,TURNOVER_DYNAMIC_PAYM_3M,TURNOVER_DYNAMIC_PAYM_1M,REST_DYNAMIC_CC_1M,TURNOVER_DYNAMIC_CC_1M,REST_DYNAMIC_CC_3M,TURNOVER_DYNAMIC_CC_3M
0,2.061893,-0.044935,-0.052273,-0.312231,5.244695,-0.110851,1.176112,-0.192559,-0.038441,-0.093385,-0.976263,-0.242002,-0.086736,-0.068473,-0.064618,0.25893,-0.151018,-0.045033,0.283547,0.190178,-0.375206,-0.099585,-0.075343,-0.284607,0.326273,-0.610612,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
1,-0.244365,-0.044935,-0.052273,2.151389,-0.169621,0.192015,-0.526426,-0.192559,-0.038441,-0.093385,-1.064184,-0.242002,-0.086736,-0.068473,-0.064618,-0.286218,-0.151018,-0.045033,-0.22763,0.140643,-0.375206,-0.099585,-0.075343,-0.284607,-0.316017,1.144931,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
2,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.260938,-0.526426,-0.192559,-0.038441,-0.093385,1.837201,-0.242002,-0.086736,-0.068473,-0.064618,-0.433742,-0.151018,-0.045033,-0.448334,-0.392441,-0.375206,-0.099585,-0.075343,-0.284607,-0.661855,-1.081618,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
3,2.061893,-0.044935,-0.052273,-0.312231,-0.169621,-0.332548,1.176112,-0.192559,-0.038441,-0.093385,-0.184976,-0.242002,-0.086736,-0.068473,-0.064618,0.039323,-0.151018,-0.045033,0.836577,0.003751,-0.375206,-0.099585,-0.075343,-0.284607,0.89938,-0.425565,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
4,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.29102,1.176112,-0.192559,-0.038441,-0.093385,-1.327946,-0.242002,-0.086736,-0.068473,-0.064618,0.799208,-0.151018,-0.045033,1.675035,1.528313,-0.375206,-0.099585,-0.075343,-0.284607,1.55469,-0.719649,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104


In [2]:
# print name of features used for training
print(f"Features used for training: {x_train.columns.tolist()}")

Features used for training: ['CR_PROD_CNT_IL', 'TURNOVER_DYNAMIC_IL_1M', 'REST_DYNAMIC_FDEP_1M', 'REST_DYNAMIC_SAVE_3M', 'CR_PROD_CNT_VCU', 'REST_AVG_CUR', 'CR_PROD_CNT_TOVR', 'CR_PROD_CNT_PIL', 'TURNOVER_CC', 'TURNOVER_PAYM', 'AGE', 'CR_PROD_CNT_CC', 'REST_DYNAMIC_FDEP_3M', 'REST_DYNAMIC_IL_1M', 'CR_PROD_CNT_CCFP', 'REST_DYNAMIC_CUR_1M', 'REST_AVG_PAYM', 'LDEAL_GRACE_DAYS_PCT_MED', 'REST_DYNAMIC_CUR_3M', 'TURNOVER_DYNAMIC_CUR_1M', 'REST_DYNAMIC_PAYM_3M', 'REST_DYNAMIC_IL_3M', 'TURNOVER_DYNAMIC_IL_3M', 'REST_DYNAMIC_PAYM_1M', 'TURNOVER_DYNAMIC_CUR_3M', 'CLNT_SETUP_TENOR', 'TURNOVER_DYNAMIC_PAYM_3M', 'TURNOVER_DYNAMIC_PAYM_1M', 'REST_DYNAMIC_CC_1M', 'TURNOVER_DYNAMIC_CC_1M', 'REST_DYNAMIC_CC_3M', 'TURNOVER_DYNAMIC_CC_3M']


In [3]:
import tensorflow as tf

# Define model architecture
def create_model(input_dim):
    # Input layer
    inputs = tf.keras.Input(shape=(input_dim,))
    
    # Hidden layers
    x = tf.keras.layers.Dense(64, activation='relu')(inputs)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    
    # Output layer
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    return tf.keras.Model(inputs=inputs, outputs=outputs)

model = create_model(x_train.shape[1])

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC(name='auc')]
)

# Custom callback to stop at target AUC
class StopAtTargetAUC(tf.keras.callbacks.Callback):
    def __init__(self, target_auc=0.85):
        super().__init__()
        self.target_auc = target_auc
    
    def on_epoch_end(self, _, logs=None):
        val_auc = logs.get('auc')
        if val_auc is not None:
            val_auc = float(val_auc)
            if val_auc >= self.target_auc:
                print(f"\nReached target AUC of {self.target_auc}! Stopping training.")
                self.model.stop_training = True

# Train model
history = model.fit(
    x_train,
    y_train,
    epochs=500,
    batch_size=1024,
    callbacks=[StopAtTargetAUC(target_auc=0.85)],
    verbose=1
)

2025-12-28 19:10:17.856068: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-28 19:10:17.856324: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-28 19:10:17.882011: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-28 19:10:18.641864: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

Epoch 1/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - auc: 0.6115 - loss: 0.2953
Epoch 2/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - auc: 0.7268 - loss: 0.2582
Epoch 3/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7496 - loss: 0.2523
Epoch 4/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7626 - loss: 0.2482
Epoch 5/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7737 - loss: 0.2446
Epoch 6/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7820 - loss: 0.2419
Epoch 7/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7876 - loss: 0.2400
Epoch 8/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - auc: 0.7925 - loss: 0.2386
Epoch 9/500
[1m278/278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0