In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle

In [None]:
df = pd.read_csv("/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head(10)

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.TotalCharges.values

In [None]:
df.MonthlyCharges.values

In [None]:
df[pd.to_numeric(df.TotalCharges,errors="coerce").isnull()]

In [None]:
df_new = df.copy()
df_new.head()

In [None]:
df_new.shape

In [None]:
df_new[pd.to_numeric(df_new.TotalCharges,errors="coerce").isnull()]

In [None]:
df_new = df_new[df_new.TotalCharges!=" "]
df_new.shape

In [None]:
df_new.TotalCharges = pd.to_numeric(df_new.TotalCharges)

In [None]:
df_new.info()

In [None]:
df_new.drop("customerID",axis="columns",inplace=True)

In [None]:
df_new[df_new.Churn=="No"]

In [None]:
def correlation(dataset, threshold):
    col_corr = set() #set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range (i):
            if abs(corr_matrix.iloc[i,j])> threshold: #we are interested in absolute coeff value
                colname = corr_matrix.columns[i] #getting the name of column
                col_corr.add(colname)
    return col_corr


In [None]:
corr_features=correlation(df_new,0.8)
len(set(corr_features))


In [None]:
corr_features

In [None]:
df_new.drop(corr_features, axis=1,inplace = True)

In [None]:
df_new.columns

In [None]:
def print_unique_col_values(df):
    for column in df_new:
        if df_new[column].dtypes=="object":
            print(f'{column}: {df_new[column].unique()}')

In [None]:
df_new.replace("No internet service","No",inplace=True)
df_new.replace("No phone service","No",inplace=True)

In [None]:
print_unique_col_values(df_new)

In [None]:
yes_no_columns=["Partner","Dependents","PhoneService","MultipleLines","OnlineSecurity","OnlineBackup","DeviceProtection",
               "TechSupport","StreamingTV","StreamingMovies","PaperlessBilling","Churn"]

for col in yes_no_columns:
    df_new[col].replace({"Yes":1,"No":0},inplace=True)
    

In [None]:
for col in df_new:
    print(f'{col}: {df_new[col].unique()}')

In [None]:
df_new["gender"].replace({"Female":1,"Male":0},inplace=True)

In [None]:
df1=pd.get_dummies(data=df_new,columns=["InternetService","Contract","PaymentMethod"])

In [None]:
df1.columns

In [None]:
 df1 =df1.drop_duplicates()

In [None]:
df1=shuffle(df1)
df1

In [None]:
cols_to_scale = ["tenure","MonthlyCharges"]

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df1[cols_to_scale]= scaler.fit_transform(df1[cols_to_scale])

In [None]:
X= df1.drop("Churn",axis="columns")
Y= df1["Churn"]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=5)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
y_train.value_counts()

In [None]:
from imblearn import over_sampling
from imblearn.over_sampling import SMOTE

In [None]:
sm = SMOTE(random_state=12)
X_train, y_train = sm.fit_sample(X_train,y_train)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
y_train.value_counts()

In [None]:
25,30,40,40,40,30,20,15

**After find the best number of neurons and layers with the help of keras Tuner**, let's built our model.

# model built

In [None]:
model = keras.Sequential()
model.add(keras.layers.Dense(20,activation="elu",kernel_initializer= "he_normal",input_dim = X_train.shape[1]))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(20,activation="elu",kernel_initializer= "he_normal"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(2,activation="softmax"))

model.compile(optimizer=keras.optimizers.Adam(lr=0.01,decay=1e-4),loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
history = model.fit(X_train, y_train, epochs=30, validation_split= 0.2, batch_size= 128)

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
pred_y = model.predict(X_test)

In [None]:
pred_y

In [None]:
y_test.value_counts()

In [None]:
rounded_prediction= np.argmax(pred_y,axis=1)

In [None]:
rounded_prediction

In [None]:
cm= confusion_matrix(y_test,y_test)
cm

In [None]:
cm= confusion_matrix(rounded_prediction,y_test)
cm

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
cm_plot_labels = ['no_Churn','Churn']


plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')



In [None]:
print(classification_report(rounded_prediction,y_test))

Our model got 79% accuracy in our test case and predict 251 customers out of 386 correctly those leave the company in a year.

Now I have to improve this model more.

In [None]:
model.save("churn_model.h5")