In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
df = pd.read_csv("../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()

In [None]:
df.drop('customerID',axis="columns",inplace=True)


In [None]:
df.dtypes

In [None]:
df.TotalCharges.values #dtype is object

In [None]:
pd.to_numeric(df.TotalCharges, errors="coerce").isnull()

In [None]:
df[pd.to_numeric(df.TotalCharges, errors="coerce").isnull()] ## there are 11 example which totalcharges part is empty

In [None]:
df.iloc[488]["TotalCharges"]

In [None]:
df.shape #7043 rows

In [None]:
df1 = df[df.TotalCharges != ' ']

In [None]:
df1.shape #7032 rows so 11 rows are dropped

In [None]:
df1.TotalCharges = pd.to_numeric(df1.TotalCharges)

In [None]:
df1.TotalCharges.dtypes

In [None]:
tenure_churn_no = df1[df1.Churn=="No"].tenure
tenure_churn_yes = df1[df1.Churn=="Yes"].tenure
plt.hist([tenure_churn_yes,tenure_churn_no],color=["green","red"],label=["Churn=Yes","Churn=No"])
plt.xlabel("tenure")
plt.ylabel("Number of Customers")
plt.title("Customer churn prediction visualization")
plt.legend()

In [None]:
mc_churn_no = df1[df1.Churn=="No"].MonthlyCharges
mc_churn_yes = df1[df1.Churn=="Yes"].MonthlyCharges
plt.hist([mc_churn_yes,mc_churn_no],color=["green","red"],label=["Churn=Yes","Churn=No"])
plt.xlabel("MonthlyCharges")
plt.ylabel("Number of Customers")
plt.title("Customer churn prediction visualization")
plt.legend()

In [None]:
def print_unique_col_values(df):   
    for column in df:
        if df[column].dtypes == "object":
            print(f"{column}: {df[column].unique()}")

In [None]:
print_unique_col_values(df1)

In [None]:
df1.replace("No internet service", "No", inplace = True)
df1.replace("No phone service", "No",inplace = True)

In [None]:
print_unique_col_values(df1)

In [None]:
yes_no_columns = ["Partner", "Dependents", "PhoneService", "MultipleLines", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "PaperlessBilling", "Churn"]

for col in yes_no_columns:
    df1[col].replace({"Yes":1, "No":0}, inplace=True)

In [None]:
for col in df1:
    print(f"{col} : {df1[col].unique()}")

In [None]:
df1["gender"].replace({"Female":1 ,"Male":0},inplace=True)

In [None]:
df2 = pd.get_dummies(data=df1, columns = ["InternetService", "Contract", "PaymentMethod"])
df2.columns

In [None]:
df2.sample(2)

In [None]:
cols_to_scale = ["MonthlyCharges","TotalCharges","tenure"]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

df2.sample(3)

In [None]:
for col in df2:
    print(f"{col}: {df2[col].unique()}")

In [None]:
X = df2.drop("Churn", axis = "columns")
y = df2["Churn"]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Dense(20, input_shape = (26,), activation="relu"), ##input layer 26, hidden layer 20, output layer 0
    keras.layers.Dense(1, activation="sigmoid"),
])

model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=["accuracy"])

model.fit(X_train, y_train, epochs=100)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp = model.predict(X_test)
yp[:5]

In [None]:
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
y_pred[:10]


In [None]:
y_test[:10]

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test,y_pred))

In [None]:
import seaborn as sn
cm = tf.math.confusion_matrix(labels=y_test, predictions=y_pred)

plt.figure(figsize=(10,7))
sn.heatmap(cm, annot=True, fmt="d")
plt.xlabel("Predicted")
plt.ylabel("Truth")

In [None]:
round((893+209)/(893+209+106+199),2) #accuracy

In [None]:
round(896/(896+192),2) #precision for 0 class

In [None]:
round(216/(216+103),2) #precision for 1 class

In [None]:
round(896/(896+103),2) #recall for 0 class

In [None]:
round(216/(216+192),2) #recall for 1 class