In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:

data.head()

In [None]:
data.dtypes

In [None]:
data.drop(['customerID'], axis = 1, inplace = True) # There is no need of "customerID" in prediction

In [None]:
data.dtypes

In [None]:
data['TotalCharges'].values

In [None]:
# we can use astype but "to_numeric" function converts blank/space into NaN and astype does not

data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors = 'coerce')

In [None]:
data['TotalCharges'].values

In [None]:
data.dropna(inplace = True)
data.dtypes

In [None]:
data.head()

In [None]:
tenure_churn_yes = data[data['Churn'] == 'Yes'].tenure
tenure_churn_no = data[data['Churn'] == 'No'].tenure
tenure_churn_no

In [None]:
plt.hist([tenure_churn_yes,tenure_churn_no], label = ['churnYes', 'churnNo'])
plt.xlabel('Tenure')
plt.ylabel('Number of Customers')
plt.legend()

In [None]:
MC_churn_yes = data[data['Churn'] == 'Yes'].MonthlyCharges
MC_churn_no = data[data['Churn'] == 'No'].MonthlyCharges

In [None]:
plt.hist([MC_churn_yes,MC_churn_no], label = ['churnYes', 'churnNo'])
plt.xlabel('Monthly Charges')
plt.ylabel('Number of Customers')
plt.legend()

In [None]:
data.keys()

In [None]:
# find the unique values in each column
def Find_unique_val(data):
    for col in data.keys():
        print(col,': ',data[col].unique())

In [None]:
data['OnlineSecurity'].replace({'No internet service' : 'No'}, inplace = True)

In [None]:
for col in data.keys():
    if data[col].dtypes == 'object':
        data[col].replace({'No internet service' : 'No'}, inplace = True)

In [None]:
Find_unique_val(data)

In [None]:
data.replace({'No phone service' : 'No'}, inplace = True)

In [None]:
col = ['Partner', 'Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']

In [None]:
for c in col:
    data[c].replace({'Yes': 1, 'No': 0}, inplace = True)

In [None]:
Find_unique_val(data)

In [None]:
data['gender'].replace({'Female': 1, 'Male': 0}, inplace = True)
data['InternetService'].replace({'DSL': 2, 'Fiber optic': 1, 'No': 0}, inplace = True)
data['Contract'].replace({'Two year': 2, 'One year': 1, 'Month-to-month': 0}, inplace = True)

In [None]:
Find_unique_val(data)

In [None]:
data_1 = data.copy()

In [None]:
data_1.head()

In [None]:
data_final = pd.get_dummies(data = data_1, columns = ['PaymentMethod'])
data_final.head()

In [None]:
x = data_final.drop(['Churn'], axis = 1)
x.head()

In [None]:
y = data_final['Churn']

y.head()

In [None]:
from sklearn import preprocessing

col_for_scale = ['tenure','MonthlyCharges','TotalCharges']

scaler = preprocessing.MinMaxScaler()

x[col_for_scale] = scaler.fit_transform(x[col_for_scale])

In [None]:
x.head()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)

In [None]:
x_train.head()

In [None]:
x_train.shape

In [None]:
model = keras.Sequential([
    keras.layers.Dense(1, input_shape = (x_train.shape[1],), activation = 'sigmoid')
])

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

In [None]:
model.fit(x_train,y_train,epochs = 300)

In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_pred = model.predict(x_test)
y_pred[:5]

In [None]:
y_test[:5]

In [None]:
y_predicted = []
for i in y_pred:
    if i >0.5:
       y_predicted.append(1)
    else:
        y_predicted.append(0)

In [None]:
y_predicted[:5]

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test,y_predicted))

In [None]:
cm = tf.math.confusion_matrix(labels = y_test, predictions = y_predicted)
cm

In [None]:
import seaborn as sns
sns.heatmap(cm, annot = True, fmt = 'd')