# Import Libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

# Read Data

In [None]:
df = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

# Data Inspection

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

There is some string in TotalCharges column, converting that string to missing values and other string to float.

In [None]:
def tofloat(x):
  try:
    a = float(x)
    return a
  except:
    return np.nan

In [None]:
df.TotalCharges = df.TotalCharges.apply(tofloat)

Looking unique values

In [None]:
category_cols = []
for x in df.columns:
  if df[x].dtypes=='O':
    print(x,df[x].unique())
    print()

Replacing No internet service and No phone service with No


In [None]:
df.replace('No internet service','No',inplace=True)
df.replace('No phone service','No',inplace=True)

In [None]:
category_cols = []
for x in df.columns:
  if df[x].dtypes=='O':
    print(x,df[x].unique())
    print()

# Checking Null Values

In [None]:
df.isna().sum()

Handling Missing Values

In [None]:
round(df.TotalCharges.median(),2)

In [None]:
df.TotalCharges.fillna(round(df.TotalCharges.median(),2),inplace=True)

No Null Values now. Data is clean.

# Checking Outliers

In [None]:
df.plot(kind='box')

No Outliers in the dataset.

# Target Variable

In [None]:
df.head()

In [None]:
df.Churn.value_counts()

In [None]:
Y = df.Churn

In [None]:
Y

In [None]:
Y = Y.map({'Yes':1,'No':0})

Yes = 1 = Customer will stop the service \
No  = 0 = Customer will continue the service \

FALSE NEGATIVE IS CRITICAL IN THIS CASE.
RECALL VALUE SHOULD BE CLOSE TO 1

Building Neural Network Models with following re-sampling techniques and observing the recall value:

1.   Without Re-sampling Techniques (Actual)
2.   Under Sampling
3.   Over Sampling
4.   SMOTE
5.   ADASYN
6.   SMOTE+TOMEK
7.   SMOTE+ENN



In [None]:
# Input Variables
X = df.drop('Churn',axis=1)

# Label Encoding of X

In [None]:
cols = []
for x in X.columns:
  if X[x].dtypes == 'O':
    cols.append(x)
cols

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for x in cols:
  X[x] = le.fit_transform(X[x])

# Split the Data

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=1)

# Scaling of X

In [None]:
ind = X_train.index

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [None]:
X_train = pd.DataFrame(X_train,columns=X.columns,index=ind)

In [None]:
Y_train = pd.DataFrame(Y_train)

In [None]:
X_train

In [None]:
Y_train

# Neural Network

1. Without Re-sampling Techniques


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
X.shape[1]

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=X.shape[1]))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

In [None]:
trained_model = model.fit(X_train, Y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

# Prediction

In [None]:
Y_pred = model.predict(X_test)

In [None]:
Y_pred = np.where(Y_pred >= 0.5,1,0)

# Evaluation

In [None]:
from sklearn.metrics import classification_report,recall_score,f1_score,precision_score,accuracy_score
print(classification_report(Y_test,Y_pred))
actual_acc = accuracy_score(Y_test,Y_pred)
actual_rec = recall_score(Y_test,Y_pred)
actual_p = precision_score(Y_test,Y_pred)
actual_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',actual_rec)
print('Precision ->',actual_rec)
print('F1 Score ->',actual_f1)
print('Accuracy ->',actual_acc)

2. Under Sampling

In [None]:
# Import the resampling package
from sklearn.utils import resample

In [None]:
# Split into training and test sets
#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25)

In [None]:
# Returning to one dataframe
training_set = pd.concat([X_train, Y_train], axis=1)

In [None]:
training_set

In [None]:
# Separating classes
churn = training_set[training_set.Churn == 1]
no_churn = training_set[training_set.Churn == 0]

In [None]:
len(churn)

In [None]:
len(no_churn)

Under sampling the majority

In [None]:
undersample = resample(no_churn,replace=True,n_samples=len(churn),random_state=10)

In [None]:
len(undersample) # len of no_churn is reduced to len of churn
# both are equal now

In [None]:
# Returning to new training set
undersample_train = pd.concat([churn, undersample])

In [None]:
undersample_train.Churn.value_counts()

In [None]:
# Separate undersampled data into X and y sets
undersample_x_train = undersample_train.drop('Churn', axis=1)
undersample_y_train = undersample_train.Churn

In [None]:
undersample_x_train.shape

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

In [None]:
trained_model = model.fit(undersample_x_train, undersample_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)

In [None]:
Y_pred = np.where(Y_pred >= 0.5,1,0)

In [None]:
print(classification_report(Y_test,Y_pred))
under_acc = accuracy_score(Y_test,Y_pred)
under_rec = recall_score(Y_test,Y_pred)
under_p = precision_score(Y_test,Y_pred)
under_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',under_rec)
print('Precision ->',under_rec)
print('F1 Score ->',under_f1)
print('Accuracy ->',under_acc)

3. Over Sampling the minority

In [None]:
oversample = resample(churn,replace=True,n_samples=len(no_churn),random_state=10)

In [None]:
# Returning to new training set
oversample_train = pd.concat([no_churn, oversample])

In [None]:
oversample_train.Churn.value_counts()


In [None]:
# Separate oversampled data into X and y sets
oversample_x_train = oversample_train.drop('Churn', axis=1)
oversample_y_train = oversample_train.Churn

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

In [None]:
trained_model = model.fit(oversample_x_train, oversample_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred >= 0.5,1,0)

In [None]:
print(classification_report(Y_test,Y_pred))
over_acc = accuracy_score(Y_test,Y_pred)
over_rec = recall_score(Y_test,Y_pred)
over_p = precision_score(Y_test,Y_pred)
over_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',over_rec)
print('Precision ->',over_rec)
print('F1 Score ->',over_f1)
print('Accuracy ->',over_acc)

4. SMOTE (synthetic minority oversampling technique)

In [None]:
# Import the SMOTE package
from imblearn.over_sampling import SMOTE

In [None]:
# Synthesize minority class datapoints using SMOTE
sm = SMOTE(random_state=42)
smote_x_train, smote_y_train = sm.fit_resample(X_train, Y_train)

In [None]:
# Separate into training and test sets
smote_x_train = pd.DataFrame(smote_x_train, columns = X_train.columns)
smote_y_train = pd.DataFrame(smote_y_train, columns = ['Churn'])

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

In [None]:
trained_model = model.fit(smote_x_train, smote_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred >= 0.5,1,0)

In [None]:
print(classification_report(Y_test,Y_pred))
smote_acc = accuracy_score(Y_test,Y_pred)
smote_rec = recall_score(Y_test,Y_pred)
smote_p = precision_score(Y_test,Y_pred)
smote_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',smote_rec)
print('Precision ->',smote_rec)
print('F1 Score ->',smote_f1)
print('Accuracy ->',smote_acc)

In [None]:
smote_y_train.value_counts()

5. ADASYN: Adaptive Synthetic Sampling Approach

In [None]:
from imblearn.over_sampling import ADASYN

# Synthesize minority class datapoints using SMOTE
ada = ADASYN(random_state=42)
ada_x_train, ada_y_train = ada.fit_resample(X_train, Y_train)

In [None]:
# Separate into training and test sets
ada_x_train = pd.DataFrame(ada_x_train, columns = X_train.columns)
ada_y_train = pd.DataFrame(ada_y_train, columns = ['Churn'])

In [None]:
ada_y_train.value_counts()

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

In [None]:
trained_model = model.fit(ada_x_train, ada_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred >= 0.5,1,0)

In [None]:
print(classification_report(Y_test,Y_pred))
ada_acc = accuracy_score(Y_test,Y_pred)
ada_rec = recall_score(Y_test,Y_pred)
ada_p = precision_score(Y_test,Y_pred)
ada_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',ada_rec)
print('Precision ->',ada_rec)
print('F1 Score ->',ada_f1)
print('Accuracy ->',ada_acc)

6. Hybridization: SMOTE + Tomek Links


In [None]:
from imblearn.combine import SMOTETomek

In [None]:
# oversampling using SMOTE+TOMEK
stom = SMOTETomek(random_state=42)
stom_x_train, stom_y_train = stom.fit_resample(X_train, Y_train)

In [None]:
# Separate into training and test sets
stom_x_train = pd.DataFrame(stom_x_train, columns = X_train.columns)
stom_y_train = pd.DataFrame(stom_y_train, columns = ['Churn'])

In [None]:
stom_y_train.value_counts()

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer="adam", loss="binary_crossentropy")

trained_model = model.fit(stom_x_train, stom_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred >= 0.5,1,0)


print(classification_report(Y_test,Y_pred))
smtom_acc = accuracy_score(Y_test,Y_pred)
smtom_rec = recall_score(Y_test,Y_pred)
smtom_p = precision_score(Y_test,Y_pred)
smtom_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',smtom_rec)
print('Precision ->',smtom_rec)
print('F1 Score ->',smtom_f1)
print('Accuracy ->',smtom_acc)

7. Hybridization: SMOTE + ENN


In [None]:
from imblearn.combine import SMOTEENN

In [None]:
# oversampling using SMOTE+ENN
senn = SMOTEENN(random_state=42)
senn_x_train, senn_y_train = senn.fit_resample(X_train, Y_train)

In [None]:
# Separate into training and test sets
senn_x_train = pd.DataFrame(senn_x_train, columns = X_train.columns)
senn_y_train = pd.DataFrame(senn_y_train, columns = ['Churn'])

In [None]:
senn_y_train.value_counts()

In [None]:
model = Sequential()
model.add(Dense(8, activation="relu", input_dim=20))
model.add(Dense(8, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer="adam", loss="binary_crossentropy")

trained_model = model.fit(senn_x_train, senn_y_train, epochs=150,batch_size=20)

In [None]:
plt.plot(trained_model.history["loss"])

In [None]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred >= 0.5,1,0)


print(classification_report(Y_test,Y_pred))
smenn_acc = accuracy_score(Y_test,Y_pred)
smenn_rec = recall_score(Y_test,Y_pred)
smenn_p = precision_score(Y_test,Y_pred)
smenn_f1 = f1_score(Y_test,Y_pred)
print('Recall ->',smenn_rec)
print('Precision ->',smenn_rec)
print('F1 Score ->',smenn_f1)
print('Accuracy ->',smenn_acc)

In [None]:
performance = pd.DataFrame([[actual_acc,under_acc,over_acc,smote_acc,ada_acc,smtom_acc,smenn_acc],
              [actual_rec,under_rec,over_rec,smote_rec,ada_rec,smtom_rec,smenn_rec],
              [actual_p,under_p,over_p,smote_p,ada_p,smtom_p,smenn_p],
              [actual_f1,under_f1,over_f1,smote_f1,ada_f1,smtom_f1,smenn_f1]]).T

performance.columns=['Accuracy','Recall','Precision','F1 Score']
performance.index=['Actual','Under_Sampling','Over_Sampling','SMOTE','ADASYN','SMOTE+TOMEK','SMOTE+ENN']

In [None]:
performance

In [None]:
plt.figure(figsize=(20,12))
plt.subplot(2,2,1)
sns.barplot(x=performance.Accuracy.values,y=performance.Accuracy.index)
plt.title('Accuracy')

plt.subplot(2,2,2)
sns.barplot(x=performance.Recall.values,y=performance.Recall.index)
plt.title('Recall')

plt.subplot(2,2,3)
sns.barplot(x=performance.Precision.values,y=performance.Precision.index)
plt.title('Precision')

plt.subplot(2,2,4)
sns.barplot(x=performance['F1 Score'].values,y=performance['F1 Score'].index)
plt.title('F1 Score')

plt.show()