 <div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:LightSlateGray;
           font-size:150%;
           text-align:center;
           letter-spacing:0.5px">
     CUSTOMER CHURN ANALYSIS AND CLASSIFICATION
</div>

<center><img src="https://www.revechat.com/wp-content/uploads/2018/06/telecom-customer-expereience.png"></center>

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:LightSlateGray;
           font-size:150%;
           text-align:center;
           letter-spacing:0.5px">
     Contents
</div>

1. [Import data and python packages](#t1.)
    * Pandas
    * NumPy
    * Scikit Learn
    * Tensorflow
    * Matplotlib
    * Seaborn
    * Missingno
    1.1 [Regressor for Missing Value](#t1.1)

2. [Data visualization](#t2.)
    * Pie Chart
    * Pair Plot

3. [Classification](#t3.)
    * Multi - Layer Perceptron with Tensorflow 

    3.1 [Classification Report](#t3.1)
    
    3.2 [Accuracy and Loss Curves](#t3.2)
    
    3.3 [Confusion Matrix](#t3.3)

4. [Comparison](#t4.)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import missingno

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation
from tensorflow.keras.utils import to_categorical

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
url = "/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv"
df = pd.read_csv(url)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
missingno.matrix(df)
plt.show()

In [None]:
df.info()

In [None]:
numeric = ['SeniorCitizen','tenure','MonthlyCharges','TotalCharges']
categorical = ['gender','Partner','Dependents','PhoneService','MultipleLines','InternetService',
               'OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies',
               'Contract','PaperlessBilling','PaymentMethod','Churn']

<a id="t1.1"></a>
# 1.1 Regressor for Missing Value

In [None]:
df_null = df[df['TotalCharges']==" "]
df_null.head()

In [None]:
df_not_null = df[df['TotalCharges']!=" "]
df_not_null = df_not_null.drop('TotalCharges', axis = 1)
df_not_null['TotalCharges'] = pd.to_numeric(df[df['TotalCharges']!=" "]['TotalCharges'])
df_not_null.head()

In [None]:
df_not_null.describe().T

In [None]:
ax = plt.figure(figsize=(12,10))
sns.heatmap(df_not_null.corr(),annot=True,cmap="Reds", fmt='.0%')
plt.show()

In [None]:
df_not_null.info()

In [None]:
scaler = StandardScaler().fit(df_not_null[numeric].drop("TotalCharges",axis=1))

In [None]:
X1 = scaler.transform(df_not_null[numeric].drop('TotalCharges', axis=1))
y1 = df_not_null['TotalCharges']

X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size =0.20, random_state=0)

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from yellowbrick.regressor import PredictionError
from yellowbrick.regressor import ResidualsPlot

regressor = RandomForestRegressor().fit(X_train1, y_train1)
y_pred1 = regressor.predict(X_test1)

print("R2 Score:",r2_score(y_test1, y_pred1))
print("MAE     :",mean_absolute_error(y_test1,y_pred1))
print("MSE     :",mean_squared_error(y_test1,y_pred1))


visualizer = PredictionError(regressor)
visualizer.fit(X_train1, y_train1)  
visualizer.score(X_test1, y_test1)        
visualizer.show();   

visualizer = ResidualsPlot(regressor)
visualizer.fit(X_train1, y_train1) 
visualizer.score(X_test1, y_test1) 
visualizer.show();

In [None]:
df_pred_X = scaler.transform(df_null[numeric].drop('TotalCharges', axis=1))
df_null = df_null.drop('TotalCharges', axis=1)
df_null['TotalCharges'] = regressor.predict(df_pred_X)
df_null.head()

<a id="t2."></a>
# 2. Data visualization

In [None]:
train = pd.concat([df_null, df_not_null], axis=0).reset_index(drop=True)
train.head()

In [None]:
print("==========================================")
for x in train[categorical]:
    print(train[x].value_counts())
    print("==========================================")

In [None]:
plt.figure(figsize=(15,30))
for i in range(1,17):
    plt.subplot(8,2,i)
    labels = train[categorical[i-1]].value_counts().index
    sizes = train[categorical[i-1]].value_counts().values

    colors = ["#ffb3b3",'#c78d8d',"#C2C4E2",'#8082a8']
    plt.pie(sizes, colors=colors, autopct='%1.1f%%', shadow=True,
            startangle=90, textprops={"fontsize":12, 'fontfamily':'monospace'})
    plt.legend(labels,
               loc='center left',
               title="{}".format(categorical[i-1].upper()),
               bbox_to_anchor=(1, 0, 0.5, 1))

In [None]:
sns.set_style("whitegrid")
sns.pairplot(pd.concat([train[numeric],train['Churn']],axis=1), hue='Churn',palette='Reds')
plt.grid(False)
plt.show()

In [None]:
data = {}
X_df = train[numeric]
for col in categorical[:15]:
    data[col] = pd.get_dummies(train[col])
    X_df = pd.concat([X_df, data[col]], axis=1)
X_df.head()

<a id="t3."></a>
# 3. Classification

In [None]:
scale = StandardScaler()
scale.fit(X_df)

X = scale.transform(X_df)
y = train.Churn.replace(['Yes', 'No'], [0, 1])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.30, random_state=42)

In [None]:
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

In [None]:
model = Sequential()

model.add(Dense(64,input_shape=X_train[0].shape,activation="sigmoid"))

model.add(Dense(128,activation="sigmoid"))

model.add(Dense(128,activation="sigmoid"))

model.add(Dense(64,activation="sigmoid"))

model.add(Dense(2,activation="softmax"))

model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["acc"])

In [None]:
history = model.fit(X_train,y_train_cat,batch_size=0,epochs=20,validation_data=(X_test, y_test_cat))

<a id="t3.1"></a>
# 3.1 Classification Report

In [None]:
def toClass(pred):
    
    class_ = np.zeros(len(pred))
    for i in range(len(pred)):
        index = pred[i].argmax()
        class_[i] = index
        
    return class_

In [None]:
pred_class = model.predict(X_test)

from sklearn.metrics import classification_report
print(classification_report(toClass(y_test_cat),toClass(pred_class)))

<a id="t3.2"></a>
# 3.2 Accuracy and Loss Curves

In [None]:
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(history.history["acc"],color='#6b0e0e')
plt.plot(history.history["val_acc"],color='#f56c6c')
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.legend(["Training","Validation"])
plt.grid(True)
plt.subplot(1,2,2)
plt.plot(history.history["loss"],color='#6b0e0e')
plt.plot(history.history["val_loss"],color='#f56c6c')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Training","Validation"])
plt.grid(True)
plt.show()

<a id="t3.3"></a>
# 3.3 Confusion Matrix

In [None]:
cm = confusion_matrix(toClass(y_test_cat),toClass(pred_class))

df1 = pd.DataFrame(columns=['Yes', 'No'], index= ['Yes', 'No'], data= cm )

f,ax = plt.subplots(figsize=(6,6))

sns.heatmap(df1, annot=True,cmap="Greens", fmt= '.0f',ax=ax,linewidths = 5, cbar = False,annot_kws={"size": 16})
plt.xlabel("Predicted Label")
plt.xticks(size = 12)
plt.yticks(size = 12, rotation = 0)
plt.ylabel("True Label")
plt.title("YSA Confusion Matrix", size = 12)
plt.grid(False)
plt.show()

<a id="t4."></a>
# 4. Comparison

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from yellowbrick.classifier import ROCAUC,ConfusionMatrix

In [None]:
classes = ['Yes','No']

In [None]:
model = GradientBoostingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
visualizer = ROCAUC(model, classes=classes)
visualizer.fit(X_train, y_train)      
visualizer.score(X_test, y_test)        
visualizer.show();

plt.figure(figsize=(3,3))
cm = ConfusionMatrix(model, classes=classes)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
plt.xticks(rotation=0)
cm.show();

print(classification_report(y_test,y_pred))