In [31]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# **Data** **Pre** **Processing**

Importing the data

In [32]:
data= pd.read_excel("ae_user_exit.xlsx")

In [33]:
data.head(10)

Unnamed: 0,Credit Score,Geography,Gender,Age,Customer Since,Current Account,Num of products,UPI Enabled,Estimated Yearly Income,Closed
0,553,Delhi,Female,45,4,0.0,4,1,274150,0
1,447,Bengaluru,Male,31,7,0.0,4,1,519360,0
2,501,Delhi,Female,32,2,0.0,4,1,545501,0
3,428,Delhi,Male,51,3,0.0,4,1,86868,0
4,492,Delhi,Female,57,6,1912681.501,2,1,518680,0
5,649,Mumbai,Male,61,4,1739005.126,2,1,86816,1
6,624,Mumbai,Male,54,6,1741981.923,4,1,72959,1
7,510,Bengaluru,Male,74,8,0.0,4,1,295115,0
8,573,Mumbai,Male,45,8,1245561.866,4,1,471174,1
9,517,Delhi,Female,62,6,2210750.266,4,0,149488,1


In [34]:
data.tail()

Unnamed: 0,Credit Score,Geography,Gender,Age,Customer Since,Current Account,Num of products,UPI Enabled,Estimated Yearly Income,Closed
9922,594,Bengaluru,Male,28,6,0.0,4,1,394810,0
9923,557,Bengaluru,Male,59,3,805049.0,2,0,58163,1
9924,627,Mumbai,Female,42,4,1893594.0,4,0,494067,0
9925,600,Bengaluru,Female,51,0,903177.8,2,1,109375,1
9926,553,Delhi,Male,75,7,0.0,4,1,180031,0


In [35]:
x= data.iloc[:, :-1].values
y= data.iloc[:, -1].values

In [36]:
x

array([[553, 'Delhi', 'Female', ..., 4, 1, 274150],
       [447, 'Bengaluru', 'Male', ..., 4, 1, 519360],
       [501, 'Delhi', 'Female', ..., 4, 1, 545501],
       ...,
       [627, 'Mumbai', 'Female', ..., 4, 0, 494067],
       [600, 'Bengaluru', 'Female', ..., 2, 1, 109375],
       [553, 'Delhi', 'Male', ..., 4, 1, 180031]], dtype=object)

In [37]:
y

array([0, 0, 0, ..., 0, 1, 0])

Handling missing data

In [38]:
from sklearn.impute import SimpleImputer

In [39]:
imputer= SimpleImputer(missing_values= np.nan, strategy= 'mean')

In [40]:
imputer.fit(x[:, 0:1])
#handling missing values in the credit column only as it is our main column (also data doesn't contain any missing values)

In [41]:
x[:, 0:1]= imputer.fit_transform(x[:, 0:1])

In [42]:
x

array([[553.0, 'Delhi', 'Female', ..., 4, 1, 274150],
       [447.0, 'Bengaluru', 'Male', ..., 4, 1, 519360],
       [501.0, 'Delhi', 'Female', ..., 4, 1, 545501],
       ...,
       [627.0, 'Mumbai', 'Female', ..., 4, 0, 494067],
       [600.0, 'Bengaluru', 'Female', ..., 2, 1, 109375],
       [553.0, 'Delhi', 'Male', ..., 4, 1, 180031]], dtype=object)

encoding categorical data into numeric data

In [43]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [44]:
x[0,:]

array([553.0, 'Delhi', 'Female', 45, 4, 0.0, 4, 1, 274150], dtype=object)

In [45]:
le= LabelEncoder() #label encoder to convert gender col into 0/1 form (as it was binary categorical data)

In [46]:
x[:, 2]= le.fit_transform(x[:, 2])

In [47]:
x[0,:]

array([553.0, 'Delhi', 0, 45, 4, 0.0, 4, 1, 274150], dtype=object)

In [48]:
ct= ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder= 'passthrough') #one hot encoding on city as it is categorical data (more than 2 categories)

In [49]:
x= np.array(ct.fit_transform(x))

In [50]:
x

array([[0.0, 1.0, 0.0, ..., 4, 1, 274150],
       [1.0, 0.0, 0.0, ..., 4, 1, 519360],
       [0.0, 1.0, 0.0, ..., 4, 1, 545501],
       ...,
       [0.0, 0.0, 1.0, ..., 4, 0, 494067],
       [1.0, 0.0, 0.0, ..., 2, 1, 109375],
       [0.0, 1.0, 0.0, ..., 4, 1, 180031]], dtype=object)

In [51]:
x.shape

(9927, 11)

In [52]:
x[0,:]

array([0.0, 1.0, 0.0, 553.0, 0, 45, 4, 0.0, 4, 1, 274150], dtype=object)

data splitting

In [53]:
from sklearn.model_selection import train_test_split

In [55]:
x_train, x_test, y_train, y_test= train_test_split(x, y,test_size=0.2, random_state=0)

In [56]:
x_train

array([[1.0, 0.0, 0.0, ..., 4, 1, 264201],
       [0.0, 0.0, 1.0, ..., 4, 1, 328575],
       [1.0, 0.0, 0.0, ..., 2, 0, 168958],
       ...,
       [0.0, 0.0, 1.0, ..., 2, 1, 248115],
       [0.0, 0.0, 1.0, ..., 2, 1, 499155],
       [1.0, 0.0, 0.0, ..., 2, 0, 10454]], dtype=object)

In [57]:
x_test

array([[0.0, 0.0, 1.0, ..., 2, 1, 502959],
       [0.0, 0.0, 1.0, ..., 4, 1, 80970],
       [1.0, 0.0, 0.0, ..., 4, 1, 2568],
       ...,
       [1.0, 0.0, 0.0, ..., 4, 0, 294987],
       [1.0, 0.0, 0.0, ..., 2, 0, 180942],
       [1.0, 0.0, 0.0, ..., 2, 1, 538471]], dtype=object)

In [58]:
y_train

array([0, 0, 0, ..., 1, 1, 0])

In [59]:
y_test

array([1, 0, 0, ..., 1, 0, 1])

In [61]:
print(f'train shape: {x_train.shape}\n test shape: {x_test.shape}')

train shape: (7941, 11)
 test shape: (1986, 11)


feature scaling

In [64]:
#scaling all the data of the data in a comparable range
from sklearn.preprocessing import StandardScaler
scale= StandardScaler()

In [65]:
x_train = scale.fit_transform(x_train) #train data is first fit and then transformed
x_test = scale.transform(x_test) #test data is directly transformed

In [66]:
x_train #scaled into a comparable ranges

array([[ 0.99434917, -0.57109746, -0.5792406 , ...,  0.90580857,
         0.65086753, -0.06854301],
       [-1.00568294, -0.57109746,  1.72639831, ...,  0.90580857,
         0.65086753,  0.34018176],
       [ 0.99434917, -0.57109746, -0.5792406 , ..., -0.951175  ,
        -1.53641095, -0.67326188],
       ...,
       [-1.00568294, -0.57109746,  1.72639831, ..., -0.951175  ,
         0.65086753, -0.17067658],
       [-1.00568294, -0.57109746,  1.72639831, ..., -0.951175  ,
         0.65086753,  1.4232319 ],
       [ 0.99434917, -0.57109746, -0.5792406 , ..., -0.951175  ,
        -1.53641095, -1.67963883]])

In [85]:
y_train = y_train.reshape(-1, 1)

In [87]:
y_train.shape

(7941, 1)

# **ANN implementation**

Initialization

In [88]:
#initializing neural network
ann= tf.keras.models.Sequential()

adding layers to the neural network

In [90]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu')) #for first layer we will use 5 nodes and relu as the activation function

In [91]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu')) #for the next hidden layer we will use the same metrics

In [92]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) #for the final output layer we will use single node but sigmoid activation function as it helps us to classify the y-hat in range of 0-1

compiling the nn

In [93]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [94]:
ann.fit(x_train, y_train, batch_size=32, epochs=120)

Epoch 1/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.5985 - loss: 0.6846
Epoch 2/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7930 - loss: 0.5173
Epoch 3/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7929 - loss: 0.4738
Epoch 4/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7953 - loss: 0.4392
Epoch 5/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7916 - loss: 0.4340
Epoch 6/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8165 - loss: 0.4129
Epoch 7/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8196 - loss: 0.4032
Epoch 8/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8279 - loss: 0.3974
Epoch 9/120
[1m249/249[0m [32

<keras.src.callbacks.history.History at 0x783a306fef10>

**prediction**

In [101]:
y_pred= ann.predict(x_test)
y_pred= (y_pred > 0.5).astype(int)
y_pred.shape

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


(1986, 1)

In [102]:
y_test.reshape(-1, 1)

array([[1],
       [0],
       [0],
       ...,
       [1],
       [0],
       [1]])

In [103]:
m,_= y_pred.shape
for i in range(m):
  print(f'y_pred: {y_pred[i]} y_test: {y_test[i]}')

y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [1] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [1] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [1] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0] y_test: 1
y_pred: [0] y_test: 0
y_pred: [0] y_test: 0
y_pred: [0

In [104]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm= confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1499   74]
 [ 222  191]]


0.850956696878147