Banking Customer Churn Prediction

In [123]:
# import warnings
# warnings.filterwarnings('ignore')

In [124]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import KNNImputer

df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619.0,France,Female,42,2,0.0,1,1,1,101348.88,1.0
1,2,15647311,Hill,608.0,Spain,Female,41,1,83807.86,1,0,1,112542.58,0.0
2,3,15619304,Onio,502.0,France,Female,42,8,159660.8,3,1,0,113931.57,1.0
3,4,15701354,Boni,699.0,France,Female,39,1,0.0,2,0,0,93826.63,0.0
4,5,15737888,Mitchell,850.0,Spain,Female,43,2,125510.82,1,1,1,79084.1,0.0


In [125]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      9997 non-null   float64
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  9999 non-null   float64
 13  Exited           9965 non-null   float64
dtypes: float64(4), int64(7), object(3)
memory usage: 1.1+ MB


In [126]:
df.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,9997.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,9999.0,9965.0
mean,5000.5,15690940.0,650.530459,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100080.320515,0.203713
std,2886.89568,71936.19,96.655346,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57504.813358,0.402778
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,50992.93,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100187.43,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149382.875,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0



**data Cleaning & Preprocessing**
---

In [127]:
df.isnull().sum()

RowNumber           0
CustomerId          0
Surname             0
CreditScore         3
Geography           0
Gender              0
Age                 0
Tenure              0
Balance             0
NumOfProducts       0
HasCrCard           0
IsActiveMember      0
EstimatedSalary     1
Exited             35
dtype: int64

In [128]:
df.dropna(subset='Exited', inplace=True)

In [129]:
df[df['CreditScore'].isnull()]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
433,434,15595039,Manna,,Germany,Female,37,8,114754.08,1,1,0,136050.44,1.0
435,436,15581197,Ricci,,France,Female,51,3,99286.98,1,0,1,85578.63,0.0


In [130]:
df[df['EstimatedSalary'].isnull()]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
458,459,15707362,Yin,514.0,Germany,Male,43,1,95556.31,1,0,1,,1.0


In [131]:
features = [
    'CreditScore', 
    'EstimatedSalary',
    'Age',
    'Tenure',
    'Balance',
    'NumOfProducts',
    'HasCrCard',
    'IsActiveMember'
]

imputer = KNNImputer(n_neighbors=5)
imputed_values = imputer.fit_transform(df[features])

df_imputed = pd.DataFrame(imputed_values, columns=features, index=df.index)
df[features] = df_imputed

In [132]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64