In [47]:
import tensorflow as tf
import numpy as np
import pandas as pd
import category_encoders as ce
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE


In [48]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [49]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(histogram_freq=1)

In [50]:
df_churn = pd.read_csv('Churn_Modelling.csv')

In [51]:
df_churn

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [52]:
df_churn['Exited'].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [53]:
df_churn.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [54]:
df_churn.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [55]:
df_churn = df_churn.drop(['RowNumber','CustomerId','Surname'],axis=1)

In [56]:
df_churn['Tenure'].unique()

array([ 2,  1,  8,  7,  4,  6,  3, 10,  5,  9,  0], dtype=int64)

In [57]:
oneh = ce.OneHotEncoder(cols=['Geography', 'Gender','NumOfProducts','Tenure'])

In [58]:
oneh.fit(df_churn)

OneHotEncoder(cols=['Geography', 'Gender', 'NumOfProducts', 'Tenure'])

In [59]:
X_cleaned = oneh.transform(df_churn)

In [60]:
X_cleaned['Exited'].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [61]:
X_cleaned.describe()

Unnamed: 0,CreditScore,Geography_1,Geography_2,Geography_3,Gender_1,Gender_2,Age,Tenure_1,Tenure_2,Tenure_3,...,Tenure_11,Balance,NumOfProducts_1,NumOfProducts_2,NumOfProducts_3,NumOfProducts_4,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,0.5014,0.2477,0.2509,0.4543,0.5457,38.9218,0.1048,0.1035,0.1025,...,0.0413,76485.889288,0.5084,0.0266,0.459,0.006,0.7055,0.5151,100090.239881,0.2037
std,96.653299,0.500023,0.431698,0.433553,0.497932,0.497932,10.487806,0.306311,0.304626,0.30332,...,0.198993,62397.405202,0.499954,0.160919,0.498341,0.077231,0.45584,0.499797,57510.492818,0.402769
min,350.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.58,0.0
25%,584.0,0.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51002.11,0.0
50%,652.0,1.0,0.0,0.0,0.0,1.0,37.0,0.0,0.0,0.0,...,0.0,97198.54,1.0,0.0,0.0,0.0,1.0,1.0,100193.915,0.0
75%,718.0,1.0,0.0,1.0,1.0,1.0,44.0,0.0,0.0,0.0,...,0.0,127644.24,1.0,0.0,1.0,0.0,1.0,1.0,149388.2475,0.0
max,850.0,1.0,1.0,1.0,1.0,1.0,92.0,1.0,1.0,1.0,...,1.0,250898.09,1.0,1.0,1.0,1.0,1.0,1.0,199992.48,1.0


In [62]:
X_resampled, y_resampled = SMOTE().fit_resample(X_cleaned[X_cleaned.drop('Exited',axis=1).columns], X_cleaned['Exited'])


In [63]:
y_resampled.value_counts()

1    7963
0    7963
Name: Exited, dtype: int64

In [64]:
standard_transformer = Pipeline(steps=[
        ('standard', StandardScaler())])

minmax_transformer = Pipeline(steps=[
        ('minmax', MinMaxScaler())])


preprocessor = ColumnTransformer(
        remainder='passthrough', #passthough features not listed
        transformers=[
        #     ('std', standard_transformer , ['']),
            ('mm', minmax_transformer , ['CreditScore','Age','Balance'])
        ])

In [65]:
preprocessor.fit(X_resampled)

ColumnTransformer(remainder='passthrough',
                  transformers=[('mm',
                                 Pipeline(steps=[('minmax', MinMaxScaler())]),
                                 ['CreditScore', 'Age', 'Balance'])])

In [66]:
df_final = preprocessor.transform(X_resampled)
df_final

array([[5.38000000e-01, 3.24324324e-01, 0.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.01348880e+05],
       [5.16000000e-01, 3.10810811e-01, 3.34031479e-01, ...,
        0.00000000e+00, 1.00000000e+00, 1.12542580e+05],
       [3.04000000e-01, 3.24324324e-01, 6.36357176e-01, ...,
        1.00000000e+00, 0.00000000e+00, 1.13931570e+05],
       ...,
       [6.52000000e-01, 3.91891892e-01, 5.28488148e-01, ...,
        1.00000000e+00, 0.00000000e+00, 6.33282422e+03],
       [7.30000000e-01, 2.56756757e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 4.22874904e+04],
       [3.40000000e-01, 3.78378378e-01, 4.72029416e-01, ...,
        1.00000000e+00, 0.00000000e+00, 8.28243599e+04]])

In [67]:
df_final.shape

(15926, 26)

In [68]:
X_train, X_test, y_train, y_test = train_test_split(df_final, y_resampled, test_size=0.2, random_state=42)

In [69]:
model=tf.keras.Sequential([
                           tf.keras.layers.Dense(256,activation='tanh',input_shape=(26,)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Dense(108,activation='relu'),
                           tf.keras.layers.Dropout(0.3), 
                           tf.keras.layers.Dense(56,activation='tanh'),
                           tf.keras.layers.Dropout(0.125),
                           tf.keras.layers.Dense(24,activation='exponential'),
                           tf.keras.layers.Dropout(0.3),
                           tf.keras.layers.Dense(1,activation='sigmoid')
])

In [70]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-1,
    decay_steps=100,
    decay_rate=0.9)
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
model.compile(optimizer=optimizer,
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [71]:
y_train.value_counts()

1    6410
0    6330
Name: Exited, dtype: int64

In [72]:
model.fit(X_train,y_train, epochs=100,verbose=2,validation_data=[X_test,y_test],batch_size=64,callbacks=[tensorboard_callback])

Epoch 1/100


  return dispatch_target(*args, **kwargs)


200/200 - 1s - loss: 0.8848 - accuracy: 0.5091 - val_loss: 0.6956 - val_accuracy: 0.4874 - 1s/epoch - 6ms/step
Epoch 2/100
200/200 - 1s - loss: 0.7840 - accuracy: 0.5036 - val_loss: 0.6934 - val_accuracy: 0.4874 - 621ms/epoch - 3ms/step
Epoch 3/100
200/200 - 1s - loss: 0.7600 - accuracy: 0.4999 - val_loss: 0.6931 - val_accuracy: 0.5126 - 578ms/epoch - 3ms/step
Epoch 4/100
200/200 - 1s - loss: 0.7459 - accuracy: 0.4983 - val_loss: 0.6935 - val_accuracy: 0.4874 - 580ms/epoch - 3ms/step
Epoch 5/100
200/200 - 1s - loss: 0.7334 - accuracy: 0.4945 - val_loss: 0.6930 - val_accuracy: 0.5129 - 523ms/epoch - 3ms/step
Epoch 6/100
200/200 - 1s - loss: 0.7272 - accuracy: 0.5033 - val_loss: 0.6931 - val_accuracy: 0.5119 - 551ms/epoch - 3ms/step
Epoch 7/100
200/200 - 1s - loss: 0.7238 - accuracy: 0.4975 - val_loss: 0.6936 - val_accuracy: 0.4874 - 546ms/epoch - 3ms/step
Epoch 8/100
200/200 - 1s - loss: 0.7258 - accuracy: 0.4956 - val_loss: 0.6939 - val_accuracy: 0.4874 - 553ms/epoch - 3ms/step
Epoch 9

<keras.callbacks.History at 0x18db4fb4040>

In [73]:
y_pred = model.predict(X_test)
y_pred = np.where(y_pred>0.5,1,0)
tf.math.confusion_matrix(y_test,y_pred)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1633,    0],
       [1552,    1]])>

In [74]:
report = pd.DataFrame(classification_report(y_test,y_pred,output_dict=True)).transpose()

In [75]:
report

Unnamed: 0,precision,recall,f1-score,support
0,0.512716,1.0,0.677875,1633.0
1,1.0,0.000644,0.001287,1553.0
accuracy,0.512869,0.512869,0.512869,0.512869
macro avg,0.756358,0.500322,0.339581,3186.0
weighted avg,0.75024,0.512869,0.348075,3186.0
