In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow import keras
import os
import tempfile
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
raw_df = pd.read_csv(r'/kaggle/input/credit-card-customers/BankChurners.csv')
raw_df = raw_df[raw_df.columns[:-2]]
raw_df.head()

In [None]:
raw_df.dtypes

In [None]:
#Convert data types of numerical columns to float32
int_col = raw_df.dtypes==int
raw_df.loc[:,int_col[int_col].index.to_list()] = raw_df.loc[:,int_col[int_col].index.to_list()].astype(np.float32)
float_col = raw_df.dtypes==float
raw_df.loc[:,float_col[float_col].index.to_list()] = raw_df.loc[:,float_col[float_col].index.to_list()].astype(np.float32)


In [None]:
#Check null value of each column
raw_df.isnull().sum()

In [None]:
dict_cate={'Existing Customer':1, 'Attrited Customer':0}
raw_df['Attrition_Flag'] = raw_df['Attrition_Flag'].apply(lambda x:dict_cate[x])
churned, existed = np.bincount(raw_df['Attrition_Flag'])
print('Existed Customer : {}, Churned Customer : {}'.format(existed, churned))

In [None]:
train, test = train_test_split(raw_df, test_size = 0.2)
train, val = train_test_split(train, test_size = 0.2)
print('Train number of rows : {}'.format(train.shape[0]))
print('Test number of rows : {}'.format(test.shape[0]))
print('Validation number of rows : {}'.format(val.shape[0]))

In [None]:
def tf_dataset(df, shuffle=False, batch_size=32):
    label = df.pop('Attrition_Flag')
    df = tf.data.Dataset.from_tensor_slices((dict(df), label))
    if shuffle:
        df = df.shuffle(train.shape[0])
    df = df.batch(batch_size)
    df = df.prefetch(batch_size)
    return df
train = tf_dataset(train, shuffle=True)
test = tf_dataset(test)
val = tf_dataset(val)

In [None]:
def normalization(df, col):
    normalizer = tf.keras.layers.experimental.preprocessing.Normalization()
    feature = df.map(lambda x, y: x[col])
    normalizer.adapt(feature)
    return normalizer

In [None]:
def category_encoding(df, col):
    index = tf.keras.layers.experimental.preprocessing.StringLookup()
    features = df.map(lambda x, y: x[col])
    index.adapt(features)
    encoder = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=index.vocab_size())
    features = features.map(index)
    encoder.adapt(features)
    return lambda feature: encoder(index(feature))

In [None]:
encoded_features=[]
inputs=[]
float_col = raw_df.dtypes==np.float32
for col in float_col[float_col].index:
    numerical_input = tf.keras.Input(shape=(1,), name=col)
    normalization_layer = normalization(train, col)
    encoded_col = normalization_layer(numerical_input)
    inputs.append(numerical_input)
    encoded_features.append(encoded_col)

In [None]:
category_col = ['Card_Category','Education_Level','Gender','Income_Category','Marital_Status']
for col in category_col:
    categorical_input = tf.keras.Input(shape=(1,), name=col, dtype='string')
    encoding_layer = category_encoding(train, col)
    encoded_col = encoding_layer(categorical_input)
    inputs.append(categorical_input)
    encoded_features.append(encoded_col)
    

In [None]:
inputs

In [None]:
encoded_features

In [None]:
features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(64, activation='relu')(features)
x = tf.keras.layers.Dropout(0.2)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, output)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

In [None]:
class_weight = {0: 1627/(1627+8500), 1:8500/(1627+8500)}
model.fit(train, validation_data=val, epochs=5, class_weight=class_weight)