In [1]:
from datetime import datetime as dt
from preprocessing import Preprocessing
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.utils import resample

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, roc_auc_score, RocCurveDisplay

In [2]:
import tensorflow as tf
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
from keras import Sequential
from keras.layers import Dense
from keras.metrics import AUC
from keras.optimizers import Adam

# Preprocessing

In [4]:
col_drop = ['CustomerId','Surname']
types = {
    'HasCrCard':bool,
    'IsActiveMember':bool
}
train = pd.read_csv('data/train.csv', index_col='id', dtype=types).drop(columns=col_drop)
test = pd.read_csv('data/test.csv', index_col='id', dtype=types).drop(columns=col_drop)

In [5]:
train.shape

(165034, 11)

In [6]:
X = train.drop(columns='Exited')
y = train.Exited

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y, stratify=y, test_size=0.15)

In [8]:
prepro = Preprocessing()

In [9]:
X_train_scld = prepro.fit_transform(X_train).astype(float)
X_test_scld = prepro.transform(X_test).astype(float)

In [10]:
X_train_scld

Unnamed: 0_level_0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
77660,1.035398,1.0,-0.5,-0.75,0.000000,0.0,1.0,1.0,0.733255,1.0,0.0,0.0
97534,0.831858,1.0,-0.8,0.00,1.022958,0.0,1.0,1.0,-0.436728,1.0,0.0,0.0
41159,0.070796,1.0,0.0,1.00,0.885652,-1.0,1.0,1.0,0.184350,0.0,0.0,1.0
101119,0.654867,1.0,0.1,1.25,0.000000,0.0,1.0,0.0,-0.690745,1.0,0.0,0.0
83401,1.017699,1.0,-0.7,-1.25,0.482963,-1.0,0.0,0.0,0.646400,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
77262,0.477876,1.0,-0.3,-1.00,0.000000,0.0,1.0,1.0,0.658913,1.0,0.0,0.0
104886,0.407080,1.0,-0.2,-0.25,0.000000,0.0,1.0,0.0,0.520938,1.0,0.0,0.0
13483,-0.336283,1.0,-0.1,0.50,1.195644,-1.0,1.0,0.0,0.315208,0.0,0.0,1.0
158903,-1.026549,1.0,-0.2,0.00,0.000000,0.0,0.0,1.0,-1.275484,1.0,0.0,0.0


# Model

In [11]:
X_train_scld.shape

(140278, 12)

In [12]:
model = Sequential(name="BankChurn")
model.add(Dense(units=64, activation='relu', input_dim=12, name="Dense64"))
model.add(Dense(units=32, activation='relu', input_dim=12, name="Dense64"))
model.add(Dense(units=16, activation='relu', input_dim=12, name="Dense64"))
model.add(Dense(units=1, activation='sigmoid', name="Dense1"))


model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=[AUC()])


2024-01-08 11:46:59.668375: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2024-01-08 11:46:59.668394: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-01-08 11:46:59.668402: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-01-08 11:46:59.668742: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-08 11:46:59.669082: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
model.summary()

Model: "BankChurn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense64 (Dense)             (None, 100)               1300      
                                                                 
 Dense1 (Dense)              (None, 1)                 101       
                                                                 
Total params: 1401 (5.47 KB)
Trainable params: 1401 (5.47 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
history = model.fit(X_train_scld.values, y_train.values, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
pred = model.predict(X_test_scld.values)



In [None]:
pred[:,0]

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

# Score

In [None]:
RocScore  = roc_auc_score(y_test, pred[:,0])
print(f'Roc Auc Score : {RocScore:.6f}')

Roc Auc Score : 0.591435


In [None]:
RocCurveDisplay.from_predictions(y_test,pred).plot()

# Submission

In [None]:
X_val_scld = prepro.transform(test)

In [None]:
submission = pd.read_csv("data/sample_submission.csv", index_col='id')

In [None]:
submission.loc[:,'Exited'] = nn.predict_proba(X_val_scld)[:,1]



In [None]:
submission

Unnamed: 0_level_0,Exited
id,Unnamed: 1_level_1
165034,0.085936
165035,0.947197
165036,0.079534
165037,0.519489
165038,0.638603
...,...
275052,0.132850
275053,0.217605
275054,0.071276
275055,0.381622


In [None]:
name = dt.now().strftime("%Y%m%d_%H%M")

In [None]:
#submission.to_csv(f"submission/{name}.csv")