In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')
data.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

enc_gender = LabelEncoder()
data['gender'] = enc_gender.fit_transform(data['gender'])
data.head()

In [None]:
data.columns

In [None]:
data[:2].values

In [None]:
data.info()

In [None]:
def transform_to_label(col):
    enc = LabelEncoder()
    return enc.fit_transform(data[col])

category_cols = ['Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','DeviceProtection','TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod','Churn']
for col in category_cols:
    data[col] = transform_to_label(col)

data.head()

In [None]:
data.info()

In [None]:
enc_backup = LabelEncoder()
data['OnlineBackup'] = enc_backup.fit_transform(data['OnlineBackup'])

In [None]:
data.info()

In [None]:
data.value_counts('TotalCharges')

In [None]:
import re
[d for d in data['TotalCharges'] if re.sub(r'[0-9.]+','',d)!='']

In [None]:
data['TotalCharges'].replace(' ','0',inplace=True)
data['TotalCharges'] = data['TotalCharges'].astype('float64')
data.info()

In [None]:
data.drop(columns=['customerID'], inplace=True)
data.info()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,6))
sns.heatmap(data.corr())

In [None]:
from sklearn.model_selection import train_test_split

train_X,test_X,train_y,test_y = train_test_split(data.drop(columns=['Churn']),data['Churn'].values,random_state=42)
train_X

In [None]:
train_y

In [None]:
import tensorflow as tf

classifier = tf.keras.models.Sequential()
classifier.add(tf.keras.layers.Dense(units=8, activation='relu', input_shape=(train_X.shape[1],)))
classifier.add(tf.keras.layers.Dropout(rate=0.1))
classifier.add(tf.keras.layers.Dense(units=8, activation='relu'))
classifier.add(tf.keras.layers.Dropout(rate=0.1))
classifier.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
classifier.compile(optimizer='adam',metrics=['accuracy'],loss='binary_crossentropy')
classifier.summary()

In [None]:
classifier.fit(train_X,train_y,epochs=200,batch_size=32,validation_split=0.2,verbose=2)

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

predictions = classifier.predict(test_X)
mse = mean_squared_error(test_y,predictions)
rmse = np.sqrt(mse)
rmse

In [None]:
from sklearn.metrics import roc_auc_score,roc_curve,precision_recall_curve

scores = classifier.predict_proba(test_X)
precisions,recalls,thresholds = precision_recall_curve(test_y,scores)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(thresholds,precisions[:-1],'b--',label="Precision")
plt.plot(thresholds,recalls[:-1],'g-',label="Recall")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(recalls,precisions)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()

In [None]:
fpr,tpr,thresholds = roc_curve(test_y,scores)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(fpr,tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()