# Predicting Churn using Customer Behaviour data

In [19]:

import sys
import types
import pandas as pd


df_data_1 = pd.read_csv('/Users/pramodkumar/Downloads/poc/Finocracy/CustomerRetention.csv')
df_data_1.head()



Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,DigitalBanking,MultipleDigitalServices,OnlineService,ShareTrade,...,Deposits,TechSupport,DebitCard,CreditCard,Contract,PaperlessStatement,PaymentMethod,MonthlyRevenue,TotalRevenue,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No digital service,S1,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,S1,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,S1,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No digital service,S1,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,S2,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [20]:
df_data_1.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'DigitalBanking', 'MultipleDigitalServices', 'OnlineService',
       'ShareTrade', 'Loan', 'Deposits', 'TechSupport', 'DebitCard',
       'CreditCard', 'Contract', 'PaperlessStatement', 'PaymentMethod',
       'MonthlyRevenue', 'TotalRevenue', 'Churn'],
      dtype='object')

In [21]:
print("List of features with their corresponding count of null values : ")
print("---------------------------------------------------------------- ")
print(df_data_1.isnull().sum())

List of features with their corresponding count of null values : 
---------------------------------------------------------------- 
customerID                 0
gender                     0
SeniorCitizen              0
Partner                    0
Dependents                 0
tenure                     0
DigitalBanking             0
MultipleDigitalServices    0
OnlineService              0
ShareTrade                 0
Loan                       0
Deposits                   0
TechSupport                0
DebitCard                  0
CreditCard                 0
Contract                   0
PaperlessStatement         0
PaymentMethod              0
MonthlyRevenue             0
TotalRevenue               0
Churn                      0
dtype: int64


# Drop the NaN

In [22]:
df_data_1 = df_data_1.dropna(how='any',axis=0)

# Total Data Set

In [23]:
len(df_data_1)

7043

In [24]:
df_data_1['Churn'].value_counts()

No     5174
Yes    1869
Name: Churn, dtype: int64

# Encoding and Binning Transformations

In [25]:
cleanup_nums = {"gender":{"Female": 1, "Male": 0},
                "Partner": {"Yes": 1, "No": 0},
                "Dependents":{"Yes":1,"No":0},
                "DigitalBanking":{"Yes":1,"No":0},
                "MultipleDigitalServices":{"No digital service":2,"Yes":1,"No":0},
                "OnlineService":{"S1":1,"S2":1,"No":0},
                "ShareTrade":{"No online service":2,"Yes":1,"No":0},
                "Loan":{"No online service":2,"Yes":1,"No":0},
                "Deposits":{"No online service":2,"Yes":1,"No":0},
                "TechSupport":{"No online service":2,"Yes":1,"No":0},
                "DebitCard":{"No online service":2,"Yes":1,"No":0},
                "CreditCard":{"No online service":2,"Yes":1,"No":0},
                "Contract":{"Two year":2,"One year":1,"Month-to-month":0},
                "PaperlessStatement":{"Yes":1,"No":0},
                "PaymentMethod":{"Electronic check":0,"Mailed check":1,"Bank transfer (automatic)":2,"Credit card (automatic)":3},
                "Churn":{"Yes":1,"No":0}
                }


In [26]:
df_data_1.replace(cleanup_nums, inplace=True)
df_data_1.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,DigitalBanking,MultipleDigitalServices,OnlineService,ShareTrade,...,Deposits,TechSupport,DebitCard,CreditCard,Contract,PaperlessStatement,PaymentMethod,MonthlyRevenue,TotalRevenue,Churn
0,7590-VHVEG,1,0,1,0,1,0,2,1,0,...,0,0,0,0,0,1,0,29.85,29.85,0
1,5575-GNVDE,0,0,0,0,34,1,0,1,1,...,1,0,0,0,1,0,1,56.95,1889.5,0
2,3668-QPYBK,0,0,0,0,2,1,0,1,1,...,0,0,0,0,0,1,1,53.85,108.15,1
3,7795-CFOCW,0,0,0,0,45,0,2,1,1,...,1,1,0,0,1,0,2,42.3,1840.75,0
4,9237-HQITU,1,0,0,0,2,1,0,1,0,...,0,0,0,0,0,1,0,70.7,151.65,1


In [27]:
feature_cols = ["Contract",
"OnlineService",
"tenure",
"TotalRevenue",
"TechSupport",
"PaperlessStatement",
"CreditCard",
"MultipleDigitalServices",
"DigitalBanking",
"PaymentMethod",
"DebitCard",
"MonthlyRevenue",
"SeniorCitizen",
"gender"]
features_df = df_data_1[feature_cols]

In [28]:
from sklearn import ensemble
from sklearn import model_selection

# From the Dataset , split the dataset into Testing and training

In [29]:
fitbit_train, fitbit_test, target_train, target_test = model_selection.train_test_split(features_df, df_data_1.loc[:,'Churn'], test_size=0.33, random_state=10)


# Train the Model and Get the accuracy

In [30]:
clf1 = ensemble.GradientBoostingClassifier()
clf1.fit(fitbit_train, target_train)
clf1.score(fitbit_test,target_test)

0.8068817204301075

# Use the model to Classify if the person is having Sleep Apnea or Not

In [31]:
testPd = fitbit_test.head(1)

In [32]:
clf1.predict(testPd)

array([0])