In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data = pd.read_csv('telco_dataset.csv')
# remove customerID col
data.drop('customerID', axis=1, inplace=True)
data.head()

In [None]:
# data exploration
pd.to_numeric(data.TotalCharges) #error: unable to parse string " " at position... there exists empty data!


## Data Cleaning

In [None]:
pd.to_numeric(data.TotalCharges, errors="coerce").isnull()

# get rows which are have null values in TotalCharges column
data[pd.to_numeric(data.TotalCharges, errors="coerce").isnull()]

# drop rows which have null values in TotalCharges column
data = data[data.TotalCharges != ' ']
data.TotalCharges = pd.to_numeric(data.TotalCharges)
data.dtypes

In [None]:
# function to get unique cols
def print_unique_cols(df):
    for column in data:
        if data[column].dtypes=='object':
            print(f'{column}: {data[column].unique()}')

print_unique_cols(data)

In [None]:
# data cleaning -- change "No.. service" to "No"

data.replace("No internet service", "No", inplace=True)
data.replace("No phone service", "No", inplace=True)

# data cleaning -- change (Yes, No) to (1,0)
yes_no_columns = ['Partner', 'Dependents', 'PhoneService',
                  'MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']

for col in yes_no_columns:
    data[col].replace({'Yes':1, 'No': 0}, inplace=True)

# data cleaning -- gender
data['gender'].replace({'Female': 1, 'Male': 0}, inplace=True)

In [None]:
# one hot encoding
df2 = pd.get_dummies(data=data, columns=['InternetService', 'Contract', 'PaymentMethod'])
df2.sample(3)


In [None]:
df2.dtypes

## Scaling

In [None]:
# scale columns which are not (0,1) 
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

cols_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']

df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])
df2.sample(5)

## Train Test Split

In [None]:
X = df2.drop('Churn', axis=1)
y = df2['Churn']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [None]:
num_inputs = len(X_train.columns)

## Train Model

In [None]:
import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([
    # input layer
    keras.layers.Dense(num_inputs, input_shape=(num_inputs,), activation="relu"),
    # hidden layers
    keras.layers.Dense(num_inputs, activation="relu"),
    keras.layers.Dense(num_inputs, activation="relu"),
    # output layers
    keras.layers.Dense(1, activation="sigmoid"),
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model.fit(X_train, y_train, epochs=100)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_predict = model.predict(X_test)
y_predict

In [None]:
# convert prediction output to 0 to 1 with treshold as p=0.5
y_predictions = []
for element in y_predict:
    if(element > 0.5):
        y_predictions.append(1)
    else:
        y_predictions.append(0)

y_predictions[:5]

In [None]:
# classification report

from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test, y_predictions))

""" 

"""