In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import matplotlib.pyplot as plt
import seaborn as sns # for data visualization
sns.set(style="whitegrid", color_codes=True) # set a seaborn style

import tensorflow as tf # for model preparing
from tensorflow import keras 
%matplotlib inline #With this backend, the output of plotting commands is displayed inline within frontends like the Jupyter notebook

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df= pd.read_csv('../input/bank-customer-churn-modeling/Churn_Modelling.csv') # load and read csv files into dataframe
df.sample(5)

## Data preprocessing and cleaning

In [None]:
df.shape

In [None]:
df.drop(['RowNumber','CustomerId','Surname'], axis='columns', inplace=True)

'RowNumber', 'CustomerId', 'Surname' these 3 columns are not necessary for churn prediction.So using 'drop' function we deletes these columns.

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum() # check is there any missing value

## Data Visualization

In [None]:
sns.distplot(df['Age'],bins=20 ) # distribution of age data 
plt.title('Age distribution in Bank dataset')
plt.xlabel('Age')
plt.ylabel('Density')

In [None]:
# check object type column unique values
for i in df:
  if df[i].dtype == 'object':

    print(i +':',df[i].unique())

In [None]:
sns.countplot(df['Geography'], data=df, palette='mako')

In [None]:
sns.countplot(df['Geography'],hue='Exited', data=df, palette='mako', saturation=1.0)

In [None]:
sns.countplot(df['NumOfProducts'], data=df, hue='Exited', palette='mako', saturation=1.00)

In [None]:
sns.countplot(df['IsActiveMember'], data=df, hue='Exited', palette='mako', saturation=1.00)

## Data Transformation

In [None]:
# Transform categorical data into numerical data
df['Gender'].replace({'Male':1,'Female':0}, inplace= True) # replacing object type data into numeric data

In [None]:
df.head()

In [None]:
df1= pd.get_dummies(data= df, columns=['Geography']) # one hot encoding using dummy variables
df1.head()

In [None]:
df1.drop(['Geography_Spain'], axis='columns', inplace=True) # drop unnecessary column after encoding
df1.head()

In [None]:
# Data Scalling witk sklearn
col_to_scale= ['CreditScore','Age','Tenure','Balance','EstimatedSalary']
from sklearn.preprocessing import MinMaxScaler
scaler= MinMaxScaler()
df1[col_to_scale]= scaler.fit_transform(df1[col_to_scale])

In [None]:
df1.head()

## Train Test Split

In [None]:
X= df1.drop(['Exited'], axis='columns')
y= df1['Exited']

In [None]:
from sklearn.model_selection import train_test_split
X_train, x_test,y_train, y_test = train_test_split(X, y , test_size= 0.2, random_state= 32)
X_train.shape

## ANN Model

In [None]:
model = keras.Sequential([
                        keras.layers.Dense(100, input_shape=(11,), activation='relu'),
                        keras.layers.Dense(75, activation='relu'),
                        keras.layers.Dense(1, activation='sigmoid')

])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train,y_train, epochs=150)

In [None]:
model.evaluate(x_test,y_test) # evaluate model

Model Accuracy : 83.70%

In [None]:
y_predicted= model.predict(x_test)

In [None]:
y_predicted[:10]

In [None]:
y_test[:10]

In [None]:
# converting predicted output to binary data with threshold
y_pred=[]
for i in y_predicted:
  if i>= 0.5:
    y_pred.append(1)
  else:
    y_pred.append(0)

In [None]:
y_pred[:10]

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

The confusion matrix for the model test prediction results is shown below

In [None]:
data= tf.math.confusion_matrix(labels=y_test, predictions=y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(data, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')