In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import resample

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
df = pd.read_csv('/kaggle/input/credit-card-customers/BankChurners.csv')

In [None]:
df.info()

In [None]:
df.select_dtypes([object]).columns

In [None]:
df['Attrition_Flag'].unique()

In [None]:
del df['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1']

In [None]:
del df['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2']

In [None]:
del df['CLIENTNUM']

In [None]:
def perc_chur_cust(feature):
    '''
    This function calculates what percentage of people are churned based on just one feature.
    
    '''
    df['x'] = 'a'
    df['y'] = 'b'
    a = df.groupby(feature).count()['x'].reset_index()
    a.columns = [feature,'Total_Customers']
    b = df.loc[df['Attrition_Flag'] == 'Attrited Customer'].groupby(feature).count()['y'].reset_index()
    b.columns = [feature,'Total_Churned_Customers']
    c = pd.merge(a,b)
    del df['x']
    del df['y']
    c['perc_churned'] = c['Total_Churned_Customers'] / c['Total_Customers']
    plt.xticks(rotation=90)
    sns.barplot(c[feature],c['perc_churned'])
    return c

In [None]:
perc_chur_cust('Education_Level')

********Well this is an interesting one, so the more educated the person is, the more likely he is to raise an Attrition Flag.

In [None]:
perc_chur_cust('Gender')

so the bank looses more female customers than male.
The bank has 5.8% more female account holders more than male,
and the bank looses 2.7% more female accounts than male.

In [None]:
perc_chur_cust('Marital_Status')

In [None]:
perc_chur_cust('Income_Category')

In [None]:
perc_chur_cust('Card_Category')

In [None]:
df.select_dtypes(['object']).columns

In [None]:
df = pd.get_dummies(df,columns=['Gender', 'Education_Level', 'Marital_Status','Income_Category', 'Card_Category'])

In [None]:
X = df.drop(columns=['Attrition_Flag'])

In [None]:
X.columns

In [None]:
df['Attrition_Flag'] = df['Attrition_Flag'].apply(lambda x: 1 if x=='Attrited Customer' else 0)

In [None]:
y = df['Attrition_Flag']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=101)

In [None]:
scaler = MinMaxScaler()
scaler.fit(x_train)
scaled_x_train = scaler.transform(x_train)
scaled_x_test = scaler.transform(x_test)

In [None]:
scaled_x_train.shape

In [None]:
forest = RandomForestClassifier(n_estimators=1000,random_state=1)

In [None]:
forest.fit(x_train,y_train)

In [None]:
forest_pred_y = forest.predict(x_test)

In [None]:
model = Sequential()

model.add(Dense(37,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(20,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy')

earlystop = EarlyStopping(patience=25,verbose=1,monitor='val_loss',mode='min')

In [None]:
model.fit(scaled_x_train,
         y_train,
         batch_size=128,
         epochs=550,
         validation_data=(scaled_x_test,y_test),
         callbacks=[earlystop])

In [None]:
pd.DataFrame(model.history.history).plot()

In [None]:
ANN_pred_y = model.predict_classes(scaled_x_test)

In [None]:
print('Classification Report of RandomForestClassifier')
print(classification_report(y_test,forest_pred_y))
print('Classification Report of ANN')
print(classification_report(y_test,ANN_pred_y))

This is an unbalanced dataset thats why forest algorithm is giving us better result.

Lets balance this dataset and train another ANN.

# Resample and TensorFlow

In [None]:
df = pd.read_csv('/kaggle/input/credit-card-customers/BankChurners.csv')

In [None]:
del df['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1']
del df['Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2']
del df['CLIENTNUM']

In [None]:
df.groupby('Attrition_Flag').count()['Customer_Age']

In [None]:
df_minority = df.loc[df['Attrition_Flag'] == 'Attrited Customer']
df_majority = df.loc[df['Attrition_Flag'] == 'Existing Customer']

df_minoriry_upscaled = resample(df_minority,replace=True,
                               n_samples=8500,
                               random_state=123)

df1 = pd.concat([df_minoriry_upscaled,df_majority]).reset_index(drop=True)

df1.info()

In [None]:
df1.select_dtypes('object').columns

In [None]:
df1 = pd.get_dummies(df1,columns=['Gender', 'Education_Level', 'Marital_Status',
                                  'Income_Category', 'Card_Category'])

df1['Attrition_Flag'] = df1['Attrition_Flag'].apply(lambda x : 1 if x == 'Attrited Customer' else 0)

In [None]:
X = df1.drop(columns=['Attrition_Flag'])
y = df1['Attrition_Flag']
xtrain,xtest,ytrain,ytest = train_test_split(X,y,test_size=0.3,random_state=101)

In [None]:
scaler = MinMaxScaler()

In [None]:
scaler.fit(xtrain)
xtrain_s = scaler.transform(xtrain)
xtest_s = scaler.transform(xtest)

In [None]:
model = Sequential()

model.add(Dense(37,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(20,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy')

earlystop = EarlyStopping(patience=25,verbose=1,monitor='val_loss',mode='min')

In [None]:
model.fit(xtrain_s,ytrain,batch_size=128,
         epochs=600,validation_data=(xtest_s,ytest),callbacks=[earlystop])

In [None]:
pred_y = model.predict_classes(xtest_s)

In [None]:
print(classification_report(ytest,pred_y))

This artificial neural network result is still not better than the forest algorithm but its better then the privious ANN.