In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('../input/churn-predictions-personal/Churn_Predictions.csv')
df.head()

In [None]:
labels = 'Exited', 'Retained'
sizes = [df.Exited[df['Exited']==1].count(), df.Exited[df['Exited']==0].count()]
explode = (0, 0.1)
fig1, ax1 = plt.subplots(figsize=(10, 8))
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')
plt.title("Proportion of customer churned and retained", size = 20)
plt.show()

So about 20% of the customers have churned. So the baseline model could be to predict that 20% of the customers will churn. Given 20% is a small number, we need to ensure that the chosen model does predict with great accuracy this 20% as it is of interest to the bank to identify and keep this bunch as opposed to accurately predicting the customers that are retained.



In [None]:
# We first review the 'Status' relation with categorical variables
fig, axarr = plt.subplots(2, 2, figsize=(20, 12))
sns.countplot(x='Geography', hue = 'Exited',data = df, ax=axarr[0][0])
sns.countplot(x='Gender', hue = 'Exited',data = df, ax=axarr[0][1])
sns.countplot(x='HasCrCard', hue = 'Exited',data = df, ax=axarr[1][0])
sns.countplot(x='IsActiveMember', hue = 'Exited',data = df, ax=axarr[1][1])

We note the following:

- Majority of the data is from persons from France & Germany. However, the proportion of churned customers is with inversely related to the population of customers alluding to the bank possibly having a problem (maybe not enough customer service resources allocated) in the areas where it has fewer clients.

- The proportion of female customers churning is also greater than that of male customers.

- Interestingly, majority of the customers that churned are those with credit cards. Given that majority of the customers have credit cards could prove this to be just a coincidence.

- Unsurprisingly the inactive members have a greater churn. Worryingly is that the overall proportion of inactive mebers is quite high suggesting that the bank may need a program implemented to turn this group to active customers as this will definately have a positive impact on the customer churn.

In [None]:
# Relations based on the continuous data attributes
fig, axarr = plt.subplots(3, 2, figsize=(20, 12))
sns.boxplot(y='CreditScore',x = 'Exited', hue = 'Exited',data = df, ax=axarr[0][0])
sns.boxplot(y='Age',x = 'Exited', hue = 'Exited',data = df , ax=axarr[0][1])
sns.boxplot(y='Tenure',x = 'Exited', hue = 'Exited',data = df, ax=axarr[1][0])
sns.boxplot(y='Balance',x = 'Exited', hue = 'Exited',data = df, ax=axarr[1][1])
sns.boxplot(y='NumOfProducts',x = 'Exited', hue = 'Exited',data = df, ax=axarr[2][0])
sns.boxplot(y='EstimatedSalary',x = 'Exited', hue = 'Exited',data = df, ax=axarr[2][1])


We note the following:

- There is no significant difference in the credit score distribution between retained and churned customers.
- The older customers are churning at more than the younger ones alluding to a difference in service preference in the age categories. The bank may need to review their target market or review the strategy for retention between the different age groups
- With regard to the tenure, the clients on either extreme end (spent little time with the bank or a lot of time with the bank) are more likely to churn compared to those that are of average tenure.
- Worryingly, the bank is losing customers with significant bank balances which is likely to hit their available capital for lending.
- Neither the product nor the salary has a significant effect on the likelihood to churn.

In [None]:
df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [None]:
df[df.Exited==0].shape

In [None]:
df[df.Exited==1].shape

In [None]:
df['Gender'].replace({'Female':0,'Male':1},inplace=True)

In [None]:
df=pd.get_dummies(df,columns=['Geography'])

In [None]:
df.head()

In [None]:
df.drop('Geography_Spain',axis=1,inplace=True)

In [None]:
df.head()

In [None]:
cols_to_scale=['CreditScore','Age','Balance','EstimatedSalary']

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
df[cols_to_scale]=scaler.fit_transform(df[cols_to_scale])

In [None]:
df.head()

In [None]:
X=df.drop(['Exited'],axis=1)
y=df['Exited']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0,test_size=0.2)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
def ANN(X_train,y_train,X_test,y_test,loss,weights):

    model=keras.Sequential([
        keras.layers.Dense(20,input_shape=(11,),activation='relu'),
        keras.layers.Dense(40,activation='relu'),
        keras.layers.Dense(80,activation='relu'),
        keras.layers.Dense(40,activation='relu'),
        keras.layers.Dense(20,activation='relu'),
        keras.layers.Dense(1,activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss=loss,
                  metrics=['accuracy'])
    if weights==-1:
        model.fit(X_train,y_train,epochs=100)
    else:
        model.fit(X_train,y_train,epochs=100,class_weight=weights)
        
    print(model.evaluate(X_test,y_test))
    
    y_pred=model.predict(X_test)
    y_pred=np.round(y_pred)
    
    print('Classification Report: \n',classification_report(y_test,y_pred))
    
    return y_pred


In [None]:
y_pred=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',0)

In [None]:
y_pred[:5]

In [None]:
y_test[:5]

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
print (classification_report(y_test,y_pred))

### Method 1 : Under Sampling

In [None]:
count_class_0,count_class_1=df.Exited.value_counts()


In [None]:
count_class_0

In [None]:
count_class_1

In [None]:
df_class_0=df[df['Exited']==0]
df_class_1=df[df['Exited']==1]

In [None]:
df_class_0.shape

In [None]:
df_class_0_under=df_class_0.sample(count_class_1)

df_test_under=pd.concat([df_class_0_under,df_class_1],axis=0)
df_test_under.shape
print(df_test_under.Exited.value_counts())

In [None]:
X=df_test_under.drop('Exited',axis=1)
y=df_test_under['Exited']

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=15,stratify=y,test_size=0.2)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.value_counts()

In [None]:
y_preds=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',0)

## Method 2 : Over Sampling

In [None]:
count_class_0,count_class_1

In [None]:
df_class_1_over=df_class_1.sample(count_class_0,replace=True)

In [None]:
df_class_1_over.Exited.value_counts()

In [None]:
df_class_0.Exited.value_counts()

In [None]:
df_test_over=pd.concat([df_class_0,df_class_1_over],axis=0)
df_test_over.Exited.value_counts()

In [None]:
X=df_test_over.drop('Exited',axis=1)
y=df_test_over['Exited']

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=15,stratify=y)

In [None]:
X_train.shape

In [None]:
y_test.shape

In [None]:
y_train.value_counts()

In [None]:
y_preds=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

### Method 3 : SMOTE

In [None]:
X=df.drop('Exited',axis='columns')
y=df['Exited']

In [None]:
y.value_counts()

In [None]:
from imblearn.over_sampling import SMOTE
smote=SMOTE(sampling_strategy='minority')
X_sm,y_sm=smote.fit_resample(X,y)

y_sm.value_counts()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_sm,y_sm,random_state=15,stratify=y_sm,test_size=0.2)

In [None]:
y_train.value_counts()

In [None]:
y_preds=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

### Method 4: use of Ensemble with undersampling

In [None]:
df.Exited.value_counts()

In [None]:
df3_class0=df[df.Exited==0]
df3_class1=df[df.Exited==1]

In [None]:
df3_class0.shape

In [None]:
7963/4

In [None]:
df3_class1.shape

In [None]:
def get_train_batch(df_majority,df_minority,start,end):
    df_train=pd.concat([df_majority[start:end],df_minority],axis=0)
    
    X=df_train.drop('Exited',axis='columns')
    y=df_train['Exited']
    
    return X,y

In [None]:
X_en,y_en=get_train_batch(df3_class0,df3_class1,0,1990)
y_en.value_counts()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_en,y_en,random_state=15,stratify=y_en,test_size=0.2)

In [None]:
y_pred1=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

In [None]:
X_en_1,y_en_1=get_train_batch(df3_class0,df3_class1,1991,3981)
y_en_1.value_counts()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_en_1,y_en_1,random_state=15,stratify=y_en_1,test_size=0.2)

In [None]:
y_pred2=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

In [None]:
X_en_2,y_en_2=get_train_batch(df3_class0,df3_class1,3982,5971)
y_en_2.value_counts()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_en_2,y_en_2,random_state=15,stratify=y_en_2,test_size=0.2)

In [None]:
y_pred3=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

In [None]:
X_en_3,y_en_3=get_train_batch(df3_class0,df3_class1,5972,7963)
y_en_3.value_counts()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_en_3,y_en_3,random_state=15,stratify=y_en_3,test_size=0.21)
X_test.shape

In [None]:
y_pred4=ANN(X_train,y_train,X_test,y_test,'binary_crossentropy',-1)

In [None]:
len(y_test)

In [None]:
y_pred3.shape


In [None]:
y_pred_final=y_pred1.copy()

for i in range(len(y_pred1)):
    n_ones=y_pred1[i]+y_pred2[i]+y_pred3[i]
    if n_ones>1:
        y_pred_final[i]=1
    else:
        y_pred_final[i]=0

y_test.shape 

In [None]:
print(classification_report(y_test[:806],y_pred_final))

So, based on all possible techniques "Over Sampling Technique" and "SMOTE" has performed the best. Hence, we can use either of the models to predict probable 
customers with churn possibility.