## Bank Customer Churn Prediction

##### This project predicts whether a bank customer is likely to **churn** (leave the bank) using machine learning. The goal is to help financial institutions identify at-risk customers and take proactive measures to improve retention.  

### Importing the libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
tf.__version__

'2.20.0'

### Importing the dataset

In [3]:
data = pd.read_csv("Churn_Modelling.csv")
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [5]:
data["Geography"].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [6]:
X = data.iloc[:,3:-1].values
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
y = data.iloc[:,-1].values
y

array([1, 0, 1, ..., 1, 1, 0])

### Encoding categorical data

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])

In [9]:
X

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [11]:
X

array([[1.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 0.0, 1.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

### Splitting data into training and test sets

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2, random_state=10)

In [13]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(8000, 12)
(2000, 12)
(8000,)
(2000,)


### Feature Scaling

In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [15]:
X_train

array([[-1.00551521,  1.73436329, -0.57388614, ...,  0.64007158,
         0.97530483, -0.56087335],
       [-1.00551521, -0.57658047,  1.74250594, ...,  0.64007158,
         0.97530483, -0.156221  ],
       [-1.00551521, -0.57658047,  1.74250594, ...,  0.64007158,
        -1.02532046,  1.56921791],
       ...,
       [-1.00551521,  1.73436329, -0.57388614, ...,  0.64007158,
        -1.02532046, -0.14906952],
       [ 0.99451504, -0.57658047, -0.57388614, ...,  0.64007158,
        -1.02532046, -0.7496959 ],
       [-1.00551521,  1.73436329, -0.57388614, ...,  0.64007158,
        -1.02532046, -1.71929584]])

In [16]:
X_test

array([[-1.00551521,  1.73436329, -0.57388614, ..., -1.56232526,
         0.97530483,  1.23435994],
       [-1.00551521, -0.57658047,  1.74250594, ...,  0.64007158,
        -1.02532046,  1.16364195],
       [-1.00551521, -0.57658047,  1.74250594, ...,  0.64007158,
        -1.02532046,  1.6989113 ],
       ...,
       [-1.00551521, -0.57658047,  1.74250594, ...,  0.64007158,
        -1.02532046,  0.03772612],
       [ 0.99451504, -0.57658047, -0.57388614, ...,  0.64007158,
         0.97530483, -1.63596523],
       [-1.00551521, -0.57658047,  1.74250594, ..., -1.56232526,
         0.97530483, -0.33182023]])

### Building ANN (Artificial Neural Network)

In [17]:
ann = tf.keras.models.Sequential([tf.keras.layers.Dense(units=6, activation='relu'),
                                 tf.keras.layers.Dense(units=6, activation='relu'),
                                 tf.keras.layers.Dense(units=1, activation='sigmoid')])

### Training the ANN

In [18]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [19]:
ann.fit(X_train,y_train, batch_size=32, epochs=30)

Epoch 1/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 795us/step - accuracy: 0.7589 - loss: 0.5395  
Epoch 2/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 765us/step - accuracy: 0.8065 - loss: 0.4459
Epoch 3/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921us/step - accuracy: 0.8156 - loss: 0.4231
Epoch 4/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step - accuracy: 0.8286 - loss: 0.4042
Epoch 5/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 746us/step - accuracy: 0.8370 - loss: 0.3896
Epoch 6/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 777us/step - accuracy: 0.8435 - loss: 0.3769
Epoch 7/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 775us/step - accuracy: 0.8482 - loss: 0.3663
Epoch 8/30
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 793us/step - accuracy: 0.8537 - loss: 0.3584
Epoch 9/30
[1m250/250

<keras.src.callbacks.history.History at 0x1a32d054830>

### Evaluating the model

In [23]:
ann.evaluate(X_test,y_test, batch_size=32, verbose=2)

63/63 - 0s - 1ms/step - accuracy: 0.8525 - loss: 0.3618


[0.36175453662872314, 0.8525000214576721]