## Deep Learning Neural Network to predict why customers are leaving

In [52]:
# Imports

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib

import tensorflow as tf
from tensorflow import keras

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import seaborn as sn

In [2]:
df = pd.read_csv("../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df

In [3]:
# Dropping cutomerID column..
df.drop('customerID',axis='columns',inplace=True)

In [4]:
df.dtypes

In [6]:
pd.to_numeric(df.TotalCharges)

In [5]:
df[pd.to_numeric(df.TotalCharges,errors='coerce').isnull()]

In [7]:
# removing blank spaces from totalCharges column
df1 = df[df.TotalCharges!=' ']
df1.shape

In [9]:
# converting total charges column to numeric vals
df1.TotalCharges = pd.to_numeric(df1.TotalCharges)
df1.dtypes

#### Visualising customers on churn

In [23]:
# visualising tenure
tenure_churn_no = df1[df1.Churn=='No'].tenure
tenure_churn_yes = df1[df1.Churn=='Yes'].tenure

plt.xlabel("tenure")
plt.ylabel("Number Of Customers")
plt.title("Tenure Visualiztion")

plt.hist([tenure_churn_yes, tenure_churn_no], rwidth=0.95, label=['Churn=Yes','Churn=No']);
plt.legend();

In [22]:
# visualising monthly charges

mc_churn_no = df1[df1.Churn=='No'].MonthlyCharges      
mc_churn_yes = df1[df1.Churn=='Yes'].MonthlyCharges      

plt.xlabel("Monthly Charges")
plt.ylabel("Number Of Customers")
plt.title("Monthly Charges Visualiztion")

plt.hist([mc_churn_yes, mc_churn_no], rwidth=0.95, color=['green','red'],label=['Churn=Yes','Churn=No']);
plt.legend();

In [25]:
# Function for Checking types/number of values in each column

def print_unique_col_values(df):
    for column in df:
        if df[column].dtypes == 'object':
            print(f'{column} : {df[column].unique()}')
            

In [26]:
print_unique_col_values(df1)

In [27]:
# Replacing complex values with No

df1.replace('No internet service','No',inplace=True)
df1.replace('No phone service','No',inplace=True)

In [32]:
len(df1[df1['OnlineBackup'] == 'No Internet Service'])

In [33]:
# Converting Yes/No to 0/1

yes_no_columns = ['Partner','Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup',
                  'DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']
for col in yes_no_columns:
    df1[col].replace({'Yes': 1,'No': 0},inplace=True)

In [35]:
df1['gender'].replace({'Female':1,'Male':0},inplace=True)

In [36]:
# One Hot Encoding for remaining columns

df2 = pd.get_dummies(data=df1, columns=['InternetService','Contract','PaymentMethod'])
df2.columns

In [38]:
# Scaling the integer values using sklearn's MinMaxScaler()

cols_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [39]:
for col in df2:
    print(f'{col}: {df2[col].unique()}')

In [41]:
# TrainTest Splitting the inputs

X = df2.drop('Churn',axis='columns')
y = df2['Churn']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [44]:
# Building an ANN using TesorFlow

model = keras.Sequential([
    keras.layers.Dense(26, input_shape = (26,), activation='relu'),
    keras.layers.Dense(15, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  
])

model.compile(
    optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']
)

model.fit(X_train, y_train, epochs=100)

In [45]:
model.evaluate(X_test, y_test)

In [46]:
yp = model.predict(X_test)
yp[:5]

In [47]:
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [50]:
y_pred[:10], y_test[:10]

In [54]:
# Checking Classification Report

print(classification_report(y_test, y_pred))

In [55]:
# Conf Matrix using seaborn

cm = tf.math.confusion_matrix(labels=y_test, predictions=y_pred)

plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')