In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv("Churn_Modelling.csv")

In [None]:
df.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
df = df.drop(columns=["CustomerId", "Surname", "RowNumber"])
df.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
df.dtypes

Unnamed: 0,0
CreditScore,int64
Geography,object
Gender,object
Age,int64
Tenure,int64
Balance,float64
NumOfProducts,int64
HasCrCard,int64
IsActiveMember,int64
EstimatedSalary,float64


In [None]:
df["Gender"].replace({'Male': 1,'Female': 0} ,inplace=True)
df.dtypes

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Gender"].replace({'Male': 1,'Female': 0} ,inplace=True)
  df["Gender"].replace({'Male': 1,'Female': 0} ,inplace=True)


Unnamed: 0,0
CreditScore,int64
Geography,object
Gender,int64
Age,int64
Tenure,int64
Balance,float64
NumOfProducts,int64
HasCrCard,int64
IsActiveMember,int64
EstimatedSalary,float64


In [None]:
df = pd.get_dummies(data=df, columns=['Geography'])
df.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype='object')

In [None]:
df = df.astype(int)
df.dtypes
df.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0,1,1,1,101348,1,1,0,0
1,608,0,41,1,83807,1,0,1,112542,0,0,0,1
2,502,0,42,8,159660,3,1,0,113931,1,1,0,0
3,699,0,39,1,0,2,0,0,93826,0,1,0,0
4,850,0,43,2,125510,1,1,1,79084,0,0,0,1
5,645,1,44,8,113755,2,1,0,149756,1,0,0,1
6,822,1,50,7,0,2,1,1,10062,0,1,0,0
7,376,0,29,4,115046,4,1,0,119346,1,0,1,0
8,501,1,44,4,142051,2,0,1,74940,0,1,0,0
9,684,1,27,2,134603,1,1,1,71725,0,1,0,0


In [None]:
cols_to_scale = ['CreditScore','Age','Tenure', 'Balance','EstimatedSalary']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
df.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,0.538,0,0.324324,0.2,0.0,1,1,1,0.506733,1,1,0,0
1,0.516,0,0.310811,0.1,0.334028,1,0,1,0.562708,0,0,0,1
2,0.304,0,0.324324,0.8,0.636354,3,1,0,0.569654,1,1,0,0
3,0.698,0,0.283784,0.1,0.0,2,0,0,0.46912,0,1,0,0
4,1.0,0,0.337838,0.2,0.500243,1,1,1,0.395403,0,0,0,1
5,0.59,1,0.351351,0.8,0.453391,2,1,0,0.748796,1,0,0,1
6,0.944,1,0.432432,0.7,0.0,2,1,1,0.05026,0,1,0,0
7,0.052,0,0.148649,0.4,0.458537,4,1,0,0.596732,1,0,1,0
8,0.302,1,0.351351,0.4,0.56617,2,0,1,0.374681,0,1,0,0
9,0.668,1,0.121622,0.2,0.536485,1,1,1,0.358604,0,1,0,0


In [None]:
X = df.drop('Exited',axis='columns')
y = df['Exited']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [None]:
X.shape

(10000, 12)

In [None]:
def ANN(X_train, y_train, X_test, y_test, loss, weights):
    model = keras.Sequential([
        keras.layers.Dense(10000, input_dim=12, activation='relu'),
        keras.layers.Dense(5000, activation='relu'),
        keras.layers.Dense(2500, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

    if weights == -1:
        model.fit(X_train, y_train, epochs=84)
    else:
        model.fit(X_train, y_train, epochs=8, class_weight = weights)

    print(model.evaluate(X_test, y_test))

    y_preds = model.predict(X_test)
    y_preds = np.round(y_preds)

    print("Classification Report: \n", classification_report(y_test, y_preds))

    return y_preds

In [None]:
from sklearn.metrics import confusion_matrix , classification_report

In [None]:
ANN(X_train, y_train, X_test, y_test, "binary_crossentropy", -1)

Epoch 1/84


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8081 - loss: 0.5154
Epoch 2/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8444 - loss: 0.3888
Epoch 3/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.8467 - loss: 0.3732
Epoch 4/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.8483 - loss: 0.3758
Epoch 5/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8544 - loss: 0.3522
Epoch 6/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8610 - loss: 0.3461
Epoch 7/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8653 - loss: 0.3380
Epoch 8/84
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8650 - loss: 0.3421
Epoch 9/84
[1m250/250[0m [32m━━━━━━━━━━━

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

Method - 1

In [None]:
zeros, ones = df["Exited"].value_counts()

df_class_0 = df[df["Exited"] == 0]
df_class_1 = df[df["Exited"] == 1]

In [None]:
df_class_0_under = df_class_0.sample(ones)
df_test_under = pd.concat([df_class_0_under, df_class_1], axis=0)

In [None]:
X = df_test_under.drop('Exited',axis='columns')
y = df_test_under['Exited']

In [None]:
df_test_under['Exited'].value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
0,2037
1,2037


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, "binary_crossentropy", -1)

Epoch 1/84


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.6075 - loss: 0.7209
Epoch 2/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7231 - loss: 0.5337
Epoch 3/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7424 - loss: 0.5176
Epoch 4/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7578 - loss: 0.4913
Epoch 5/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7615 - loss: 0.4827
Epoch 6/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7704 - loss: 0.4903
Epoch 7/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7639 - loss: 0.4848
Epoch 8/84
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7832 - loss: 0.4572
Epoch 9/84
[1m102/102[0m [32m━━━━━━━━━━━

Method-2

In [None]:
df_class_1_above = df_class_1.sample(zeros, replace=True)
df_test_above = pd.concat([df_class_0, df_class_1_above], axis=0)

In [None]:
X = df_test_above.drop('Exited',axis='columns')
y = df_test_above['Exited']
df_test_above['Exited'].value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
0,7963
1,7963


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, "binary_crossentropy", -1)

Epoch 1/84


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - accuracy: 0.6896 - loss: 0.6002
Epoch 2/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7575 - loss: 0.4894
Epoch 3/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7674 - loss: 0.4851
Epoch 4/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7654 - loss: 0.4715
Epoch 5/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7755 - loss: 0.4679
Epoch 6/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7839 - loss: 0.4567
Epoch 7/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7831 - loss: 0.4541
Epoch 8/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7770 - loss: 0.4574
Epoch 9/84
[1m399/399[0m [32m━━━━━━━━

Method-3

In [None]:
pip install imbalanced-learn



In [None]:
X = df.drop('Exited',axis='columns')
y = df['Exited']

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X, y)

y_sm.value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
1,7963
0,7963


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_sm, y_sm, test_size=0.2, random_state=15, stratify=y_sm)

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, "binary_crossentropy", -1)

Epoch 1/84


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.6497 - loss: 0.6302
Epoch 2/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7693 - loss: 0.4824
Epoch 3/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.7847 - loss: 0.4656
Epoch 4/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.7840 - loss: 0.4554
Epoch 5/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7891 - loss: 0.4464
Epoch 6/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7938 - loss: 0.4425
Epoch 7/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7905 - loss: 0.4459
Epoch 8/84
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.8049 - loss: 0.4219
Epoch 9/84
[1m399/399[0m [32m━━━━━━━━━

Method - 4

In [None]:
X = df.drop('Exited',axis='columns')
y = df['Exited']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [None]:
df = X_train.copy()
df['Exited'] = y_train

In [None]:
y_train.value_counts()

Unnamed: 0_level_0,count
Exited,Unnamed: 1_level_1
0,6370
1,1630
