In [None]:
# Project : Customer Churn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
import os
for dirname, _, filenames in os.walk('../input'):
    for filename in filenames:
        print("Dir path :",os.path.join(dirname, filename))

In [None]:
# na_values='' replaces blank values with NA
customerData = pd.read_csv("/kaggle/input/test-customer-churn/Test_customer_churn.csv") 

In [None]:
customerData.head()

In [None]:
# Replacing Churn Yes/No with 1/0

customerData["Churn"] = customerData["Churn"].replace(to_replace = ['Yes','No'], value = [1,0])
customerData["Churn"]

In [None]:
customerData.dtypes

In [None]:
# A) Data Manipulation:

In [None]:
# a. Find the total number of male customers
numMales = customerData[customerData['gender']=='Male']
numMales.shape

# sum(customerData['gender']=='Male')


In [None]:
# b. Find the total number of customers whose Internet Service is ‘DSL’
sum(customerData['InternetService']=='DSL')

In [None]:
numofIS = customerData[customerData['InternetService']=='DSL']
numofIS.shape

In [None]:
# c. Extract all the Female senior citizens whose Payment Method is Mailed check & store the
# result in ‘new_customer’

new_customer = customerData[(customerData["gender"] == "Female") & 
                            (customerData["SeniorCitizen"] == 1) & 
                            (customerData["PaymentMethod"] == "Mailed check")]
new_customer


In [None]:
# Replacing empty string '' values with NaN

customerData["TotalCharges"] = customerData["TotalCharges"].replace(r'^\s*$', np.NaN, regex=True)
customerData[488:489]

In [None]:
# Converting string values to float by using astype(float)

customerData["TotalCharges"] = customerData["TotalCharges"].astype(float)
customerData["TotalCharges"]

In [None]:
# d. Extract all those customers whose tenure is less than 10 months or their Total charges is less
# than 500$ & store the result in ‘new_customer’

new_customer = customerData[(customerData["tenure"]<10) | (customerData["TotalCharges"] <500)]
new_customer

In [None]:
# B) Data Visualization:

In [None]:
# a. Build a pie-chart to show the distribution of customers would be churning out

# custChurnY = sum(customerData['Churn'] == "Yes")
# custChurnN = sum(customerData['Churn'] == "No")
# print(custChurnY, custChurnN)
# counts = [custChurnY,custChurnN]


chList = customerData['Churn'].value_counts()
labels = ["Churn No","Churn Yes"]


plt.pie(chList, labels = labels, autopct='%1.1f%%')
plt.show()


In [None]:
# b. Build a bar-plot to show the distribution of ‘Internet Service’

intservList = customerData['InternetService'].value_counts()
print(intservList)
names = intservList.keys()
print("keys :",names)

fig = plt.figure(figsize=(10,10))

ax = plt.subplot()
ax.bar(names, intservList, color='orange')
plt.title("Internet Service Distribution")
plt.show()

In [None]:
# C) Model Building:

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.activations import relu,softmax,sigmoid
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy,binary_crossentropy

In [None]:
# a. Build a sequential model using Keras, to find out if the customerwouldchurn or not, using
# ‘tenure’ as the feature and ‘Churn’ as the dependent/target column:

x = customerData['tenure']
y = customerData["Churn"]


x.shape, y.shape, type(x), type(y)

In [None]:
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state= 0)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
x_train = np.array(x_train).reshape(-1,1)
y_train = np.array(y_train).reshape(-1,1)
x_test = np.array(x_test).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)

x_train.shape,y_train.shape, x_test.shape, y_test.shape, type(x_train), type(y_train), type(x_test), type(y_test)

In [None]:
# i. The visible/input layer should have 12 nodes with ‘Relu’ as activation function.
# ii. This model would have 1 hidden layer with 8 nodes and ‘Relu’ as activation function

model = Sequential()
model.add(Dense(12, activation=relu, input_dim=1))
model.add(Dense(8, activation=relu))
model.add(Dense(1,activation=sigmoid))

In [None]:
model.summary()

In [None]:
# iii. Use ‘Adam’ as the optimization algorithm

model.compile(optimizer='Adam', loss = binary_crossentropy, metrics = ["acc"])

In [None]:
# iv. Fit the model on the train set, with number of epochs to be 150

history = model.fit(x_train,y_train, epochs= 150, validation_data=(x_test, y_test))


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt

In [None]:
# v. Predict the values on the test set and build a confusion matrix
# y_predict = model.predict(x_test)

y_predict = model.predict_classes(x_test)

y_predict

In [None]:
cm = confusion_matrix(y_test, y_predict)
print ("Confusion Matrix :\n",cm)

aScore = accuracy_score(y_test, y_predict)
print ("Accuracy Score :\n",aScore)

In [None]:
# Confusion Matrix :
# [[1410  150]
# [ 382  171]]
# Accuracy Score :
# 0.7482252721249408

(1410+171)/(1410+150+382+171)

In [None]:
# vi. Plot the ‘Accuracy vs Epochs’ graph

accr1 = history.history['acc']
epochs1 = history.history["val_acc"]

plt.plot(accr1, c = 'r')
plt.plot(epochs1, c = 'y')
plt.show()


In [None]:
# b. Build the 2nd model using same target and feature variables:
# i. Add a drop-out layer after the input layer with drop-out value of 0.3
# ii. Add a drop-out layer after the hidden layer with drop-out value of 0.2
# iii. Predict the values on the test set and build a confusion matrix
# iv. Plot the ‘Accuracy vs Epochs’ graph

In [None]:
model = Sequential()
model.add(Dense(12, activation=relu, input_dim=1))
model.add(Dropout(0.3))
model.add(Dense(8, activation=relu))
model.add(Dropout(0.2))
model.add(Dense(1,activation=sigmoid))

In [None]:
model.summary()

In [None]:
model.compile(loss=categorical_crossentropy, optimizer='Adam', metrics = ['acc'])

In [None]:
history2 = model.fit(x_train, y_train, epochs= 100, validation_data=(x_test, y_test))

In [None]:
y_predict2 = model.predict_classes(x_test)

cm2 = confusion_matrix(y_test, y_predict2)
accS2 = accuracy_score(y_test, y_predict2)

print("Confustion Matrix :\n", cm2)
print("Accuracy Score :\n", accS2)

In [None]:
acc2 = history2.history['acc']
epoch2 = history2.history['val_acc']

plt.plot(acc2, c = 'b')
plt.plot(epoch2, c = 'r')
plt.show()

In [None]:
# c. Build the 3rd model using ‘Tenure’, ’Monthly Charges’ & ‘Total Charges’ as the features and
# ‘Churn’ as the dependent/target column:
# i. The visible/input layer should have 12 nodes with ‘Relu’ as activation function.
# ii. This model would have 1 hidden layer with 8 nodes and ‘Relu’ as activation function
# iii. Use ‘Adam’ as the optimization algorithm
# iv. Fit the model on the train set, with number of epochs to be 150
# v. Predict the values on the test set and build a confusion matrix
# vi. Plot the ‘Accuracy vs Epochs’ graph

In [None]:
type(customerData), type(customerData['TotalCharges'])

In [None]:
# customerData["tenure"] = customerData["tenure"].astype(str)
customerData["MonthlyCharges"] = customerData["MonthlyCharges"].astype(float)

In [None]:
x = customerData[['tenure','MonthlyCharges','TotalCharges']]
y = customerData['Churn']

x.shape, y.shape, y[10], type(y)

In [None]:
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)

x_train.shape,x_test.shape,y_train.shape, y_test.shape, type(x_train),type(y_train),

In [None]:
y_train = np.array(y_train).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)

In [None]:
customerData.dtypes

In [None]:
y_train.shape, y_test.shape

In [None]:
# x_train = tf.convert_to_tensor(x_train)
# y_train = tf.convert_to_tensor(y_train)

In [None]:
model = Sequential()
model.add(Dense(12, activation=relu, input_dim=3))
model.add(Dense(8, activation=relu))
model.add(Dense(1, activation=sigmoid))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='Adam',loss=binary_crossentropy, metrics =['acc'])

In [None]:
history3 = model.fit(x_train, y_train, epochs=150, validation_data=(x_test, y_test))

In [None]:
y_predict3 = model.predict_classes(x_test)

cm3 = confusion_matrix(y_test, y_predict3)
accS3 = accuracy_score(y_test, y_predict3)

print("Confustion Matrix :\n", cm3)
print("Accuracy Score :\n", accS3)

In [None]:
acc3 = history3.history['acc']
epoch3 = history3.history['val_acc']

plt.plot(acc3, c = 'y')
plt.plot(epoch3, c = 'b')
plt.show()