In [9]:
# Install dependencies (add imbalanced-learn)
%pip install pandas matplotlib scikit-learn imbalanced-learn --quiet

from features import get_train_test_data

x_train, x_test, y_train, y_test = get_train_test_data("data/bank_data_train.csv")

print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Display first 5 rows of x_train
x_train.head()

Note: you may need to restart the kernel to use updated packages.
x_train shape: (284152, 32)
x_test shape: (71038, 32)
y_train shape: (284152,)
y_test shape: (71038,)


Unnamed: 0,CR_PROD_CNT_IL,TURNOVER_DYNAMIC_IL_1M,REST_DYNAMIC_FDEP_1M,REST_DYNAMIC_SAVE_3M,CR_PROD_CNT_VCU,REST_AVG_CUR,CR_PROD_CNT_TOVR,CR_PROD_CNT_PIL,TURNOVER_CC,TURNOVER_PAYM,AGE,CR_PROD_CNT_CC,REST_DYNAMIC_FDEP_3M,REST_DYNAMIC_IL_1M,CR_PROD_CNT_CCFP,REST_DYNAMIC_CUR_1M,REST_AVG_PAYM,LDEAL_GRACE_DAYS_PCT_MED,REST_DYNAMIC_CUR_3M,TURNOVER_DYNAMIC_CUR_1M,REST_DYNAMIC_PAYM_3M,REST_DYNAMIC_IL_3M,TURNOVER_DYNAMIC_IL_3M,REST_DYNAMIC_PAYM_1M,TURNOVER_DYNAMIC_CUR_3M,CLNT_SETUP_TENOR,TURNOVER_DYNAMIC_PAYM_3M,TURNOVER_DYNAMIC_PAYM_1M,REST_DYNAMIC_CC_1M,TURNOVER_DYNAMIC_CC_1M,REST_DYNAMIC_CC_3M,TURNOVER_DYNAMIC_CC_3M
0,2.061893,-0.044935,-0.052273,-0.312231,5.244695,-0.110851,1.176112,-0.192559,-0.038441,-0.093385,-0.976263,-0.242002,-0.086736,-0.068473,-0.064618,0.25893,-0.151018,-0.045033,0.283547,0.190178,-0.375206,-0.099585,-0.075343,-0.284607,0.326273,-0.610612,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
1,-0.244365,-0.044935,-0.052273,2.151389,-0.169621,0.192015,-0.526426,-0.192559,-0.038441,-0.093385,-1.064184,-0.242002,-0.086736,-0.068473,-0.064618,-0.286218,-0.151018,-0.045033,-0.22763,0.140643,-0.375206,-0.099585,-0.075343,-0.284607,-0.316017,1.144931,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
2,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.260938,-0.526426,-0.192559,-0.038441,-0.093385,1.837201,-0.242002,-0.086736,-0.068473,-0.064618,-0.433742,-0.151018,-0.045033,-0.448334,-0.392441,-0.375206,-0.099585,-0.075343,-0.284607,-0.661855,-1.081618,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
3,2.061893,-0.044935,-0.052273,-0.312231,-0.169621,-0.332548,1.176112,-0.192559,-0.038441,-0.093385,-0.184976,-0.242002,-0.086736,-0.068473,-0.064618,0.039323,-0.151018,-0.045033,0.836577,0.003751,-0.375206,-0.099585,-0.075343,-0.284607,0.89938,-0.425565,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104
4,-0.244365,-0.044935,-0.052273,-0.312231,-0.169621,-0.29102,1.176112,-0.192559,-0.038441,-0.093385,-1.327946,-0.242002,-0.086736,-0.068473,-0.064618,0.799208,-0.151018,-0.045033,1.675035,1.528313,-0.375206,-0.099585,-0.075343,-0.284607,1.55469,-0.719649,-0.347538,-0.24017,-0.083058,-0.031616,-0.108653,-0.07104


In [10]:
# print name of features used for training
print(f"Features used for training: {x_train.columns.tolist()}")

Features used for training: ['CR_PROD_CNT_IL', 'TURNOVER_DYNAMIC_IL_1M', 'REST_DYNAMIC_FDEP_1M', 'REST_DYNAMIC_SAVE_3M', 'CR_PROD_CNT_VCU', 'REST_AVG_CUR', 'CR_PROD_CNT_TOVR', 'CR_PROD_CNT_PIL', 'TURNOVER_CC', 'TURNOVER_PAYM', 'AGE', 'CR_PROD_CNT_CC', 'REST_DYNAMIC_FDEP_3M', 'REST_DYNAMIC_IL_1M', 'CR_PROD_CNT_CCFP', 'REST_DYNAMIC_CUR_1M', 'REST_AVG_PAYM', 'LDEAL_GRACE_DAYS_PCT_MED', 'REST_DYNAMIC_CUR_3M', 'TURNOVER_DYNAMIC_CUR_1M', 'REST_DYNAMIC_PAYM_3M', 'REST_DYNAMIC_IL_3M', 'TURNOVER_DYNAMIC_IL_3M', 'REST_DYNAMIC_PAYM_1M', 'TURNOVER_DYNAMIC_CUR_3M', 'CLNT_SETUP_TENOR', 'TURNOVER_DYNAMIC_PAYM_3M', 'TURNOVER_DYNAMIC_PAYM_1M', 'REST_DYNAMIC_CC_1M', 'TURNOVER_DYNAMIC_CC_1M', 'REST_DYNAMIC_CC_3M', 'TURNOVER_DYNAMIC_CC_3M']


In [None]:
# keras neural network classifier
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

model = Sequential([
    Dense(64, activation='relu', input_dim=x_train.shape[1]),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

import keras
from keras.callbacks import Callback

class StopAtAUC(Callback):
    def __init__(self, target_auc=0.85, monitor='val_auc'):
        super().__init__()
        self.target_auc = target_auc
        self.monitor = monitor
    
    def on_epoch_end(self, epoch, logs=None):
        current_auc = logs.get(self.monitor)
        if current_auc is not None and current_auc >= self.target_auc:
            print(f"\n\nReached target AUC of {self.target_auc:.4f}! Stopping training.")
            self.model.stop_training = True

stop_at_auc = StopAtAUC(target_auc=0.85, monitor='val_auc')

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=[keras.metrics.AUC(name='auc')])

model.fit(x_train, y_train, epochs=500, batch_size=1024, validation_split=0.2, callbacks=[stop_at_auc])

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - auc: 0.6075 - loss: 0.2954 - val_auc: 0.6925 - val_loss: 0.2614
Epoch 2/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - auc: 0.7201 - loss: 0.2605 - val_auc: 0.7345 - val_loss: 0.2530
Epoch 3/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - auc: 0.7443 - loss: 0.2547 - val_auc: 0.7516 - val_loss: 0.2487
Epoch 4/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - auc: 0.7588 - loss: 0.2504 - val_auc: 0.7611 - val_loss: 0.2456
Epoch 5/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - auc: 0.7702 - loss: 0.2468 - val_auc: 0.7707 - val_loss: 0.2421
Epoch 6/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - auc: 0.7775 - loss: 0.2443 - val_auc: 0.7740 - val_loss: 0.2410
Epoch 7/500
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - auc: 

<keras.src.callbacks.history.History at 0x726a81988e00>