In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
df.sample(2)

In [None]:
# Dropping CustomerID which is less important to predict 'Customer Churn'

In [None]:
df.drop('customerID',axis=1,inplace=True)

In [None]:
df.TotalCharges.values

In [None]:
df.MonthlyCharges.values

In [None]:
# As we can see TotalCharges is in objedt dtype , So converting it into Int dtype

pd.to_numeric(df.TotalCharges,errors='coerce')

In [None]:
pd.to_numeric(df.TotalCharges,errors='coerce').isnull()

In [None]:
df[pd.to_numeric(df.TotalCharges,errors='coerce').isnull()]

In [None]:
# Checking random value
df.iloc[488]['TotalCharges']

In [None]:
df1 = df[df.TotalCharges!=' ']
df1.shape

In [None]:
df1.dtypes

In [None]:
# it still does not change,so
df1.TotalCharges = pd.to_numeric(df1.TotalCharges)
df1.TotalCharges.dtypes

In [None]:
# Making base for plottng Churn Rate
df1[df1.Churn=='No'].tenure

In [None]:
tenure_churn_no = df1[df1.Churn=='No'].tenure
tenure_churn_yes = df1[df1.Churn=='Yes'].tenure

plt.hist([tenure_churn_yes, tenure_churn_no], color=['orange','yellow'], label=['Churn=yes','Churn=No'])
plt.legend()

In [None]:
mc_churn_no = df1[df1.Churn=='No'].MonthlyCharges
mc_churn_yes = df1[df1.Churn=='Yes'].MonthlyCharges

plt.xlabel('Monthly Charges')
plt.ylabel('Number of Customers')
plt.title('Customer Churn Prediction Visualization')

blood_sugar_men = [113,85,90,150,149,88,93,115,135,80,77,82,129]
blood_sugar_woman = [67,98,89,120,133,150,84,69,89,79,120,112,100]

plt.hist([mc_churn_yes, mc_churn_no], rwidth=0.95, color=['red','blue'], label=['Churn=yes','Churn=No'])
plt.legend()

In [None]:
# Finding unique value in each column with column name
for column in df:
    print(f'{column}: {df[column].unique()}')

In [None]:
# Finding dtype=object


In [None]:
def print_unique_col_values(df):
    for column in df:
        if df[column].dtypes=='object':
            print(f'{column}:{df[column].unique()}')
            
print_unique_col_values(df1)            

In [None]:
# to replace 'No internet service','No phone service' with no
df1.replace('No phone service','No',inplace=True)
df1.replace('No internet service','No',inplace=True)

In [None]:
print_unique_col_values(df1)

In [None]:
# Replacing all columns with yes or no with 0 or 1
yes_no_columns = ['Partner','Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup',
                   'DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']

for col in yes_no_columns:
    df1[col].replace({"Yes":1,"No":0}, inplace=True)

In [None]:
# ignore we got 
for col in df1:
    print(f'{col}: {df1[col].unique()}')

In [None]:
df1['gender'].replace({'Female':1,'Male':0}, inplace=True)

In [None]:
# Applying  one hot encoding for categorical columns
df2 = pd.get_dummies(data=df1, columns=['InternetService','PaymentMethod','Contract'])
df2.columns

In [None]:
# to scale columns which are not in range  0 to 1

col_to_scale = ['tenure','MonthlyCharges','TotalCharges']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df2[col_to_scale] = scaler.fit_transform(df2[col_to_scale])

df2.sample(2)

In [None]:
# Now finally checking model for is it prepare for to train the model
for col in df2:
    print(f'{col}:{df2[col].unique()}')

In [None]:
# Yeah, it is ready,Hurray!

In [None]:
# Now making model reday for training
X = df2.drop('Churn',axis='columns')
y = df2['Churn']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test,y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)
X_train.shape,X_test.shape

In [None]:
len(X_train.columns)

In [None]:
model = keras.Sequential([
    keras.layers.Dense(26, input_shape=(26,), activation='relu'),
    keras.layers.Dense(20,  activation='relu'),
    keras.layers.Dense(15,  activation='relu'),
    keras.layers.Dense(1,  activation='sigmoid'),
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy']
)

model.fit(X_train,y_train, epochs=59)

In [None]:
# After doing 59 epoch ,we are getting 83% accuracy which is quite good looking random variables are taken 

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp = model.predict(X_test)
yp[:5]

In [None]:
y_test[:5]

In [None]:
# Making more readable 
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
y_pred[:10]

In [None]:
y_test[:10]

In [None]:
# classification report
from sklearn.metrics import confusion_matrix , classification_report

print(classification_report(y_test,y_pred))

In [None]:
# Building Confusion Matrix
import seaborn as sns
cm = tf.math.confusion_matrix(labels=y_test,predictions=y_pred)

plt.figure(figsize=(10,7))
sns.heatmap(cm,annot=True, fmt='d')
plt.xlabel('predicted')
plt.ylabel('truth')