In [1]:
# We will try to predict variable Churn using a logistic regression on variables tenure, SeniorCitizen, MonthlyCharges.

# Churn quantifies the number of customers who have left your brand by 
# cancelling their subscription or stopping paying for your services.


# Extract the target variable.
# Extract the independent variables and scale them.
# Build the logistic regression model.
# Evaluate the model.
# Even a simple model will give us more than 70% accuracy. Why?
# Synthetic Minority Oversampling TEchnique (SMOTE) is an over sampling technique based on nearest neighbors that adds new points between existing points. Apply imblearn.over_sampling.SMOTE to the dataset. Build and evaluate the logistic regression model. Is it there any improvement?
# Tomek links are pairs of very close instances, but of opposite classes. Removing the instances of the majority class of each pair increases the space between the two classes, facilitating the classification process. Apply imblearn.under_sampling.TomekLinks to the dataset. Build and evaluate the logistic regression model. Is it there any improvement?

In [2]:
# Load the dataset and explore the variables.

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline


In [3]:
data = pd.read_csv('customer_churn.csv')

In [6]:
data.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [10]:
data.head(20)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes
5,9305-CDSKC,Female,0,No,No,8,Yes,Yes,Fiber optic,No,No,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes
6,1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,Yes,No,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.1,1949.4,No
7,6713-OKOMC,Female,0,No,No,10,No,No phone service,DSL,Yes,No,No,No,No,No,Month-to-month,No,Mailed check,29.75,301.9,No
8,7892-POOKP,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,No,Yes,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes
9,6388-TABGU,Male,0,No,Yes,62,Yes,No,DSL,Yes,Yes,No,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No


In [5]:
data.info

<bound method DataFrame.info of       customerID  gender  SeniorCitizen Partner Dependents  tenure  \
0     7590-VHVEG  Female              0     Yes         No       1   
1     5575-GNVDE    Male              0      No         No      34   
2     3668-QPYBK    Male              0      No         No       2   
3     7795-CFOCW    Male              0      No         No      45   
4     9237-HQITU  Female              0      No         No       2   
...          ...     ...            ...     ...        ...     ...   
7038  6840-RESVB    Male              0     Yes        Yes      24   
7039  2234-XADUH  Female              0     Yes        Yes      72   
7040  4801-JZAZL  Female              0     Yes        Yes      11   
7041  8361-LTMKD    Male              1     Yes         No       4   
7042  3186-AJIEK    Male              0      No         No      66   

     PhoneService     MultipleLines InternetService OnlineSecurity  \
0              No  No phone service             DSL      

In [7]:
data.dtypes

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [19]:
# Checking for null values

nulls = pd.DataFrame(data.isna().sum()/len(data))
nulls= nulls.reset_index()
nulls.columns = ['column_name', 'Percentage Null Values']
nulls.sort_values(by='Percentage Null Values', ascending = False)

Unnamed: 0,column_name,Percentage Null Values
0,customerID,0.0
11,DeviceProtection,0.0
19,TotalCharges,0.0
18,MonthlyCharges,0.0
17,PaymentMethod,0.0
16,PaperlessBilling,0.0
15,Contract,0.0
14,StreamingMovies,0.0
13,StreamingTV,0.0
12,TechSupport,0.0


In [23]:
# Checking the numerical values

data_numeric = data.select_dtypes(include=[np.number])
data_categorical = data.select_dtypes(exclude=[np.number])

In [26]:
data_numeric.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
0,0,1,29.85
1,0,34,56.95
2,0,2,53.85
3,0,45,42.3
4,0,2,70.7


In [27]:
data_categorical.head()

Unnamed: 0,customerID,gender,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn
0,7590-VHVEG,Female,Yes,No,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,No
1,5575-GNVDE,Male,No,No,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,1889.5,No
2,3668-QPYBK,Male,No,No,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,108.15,Yes
3,7795-CFOCW,Male,No,No,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),1840.75,No
4,9237-HQITU,Female,No,No,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,151.65,Yes


In [33]:
# Converting some of the categorical values to numerical with dummies

dummies = pd.get_dummies(data_categorical, columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod'])


In [34]:
dummies

Unnamed: 0,customerID,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,7590-VHVEG,29.85,No,1,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
1,5575-GNVDE,1889.5,No,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1
2,3668-QPYBK,108.15,Yes,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1
3,7795-CFOCW,1840.75,No,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0
4,9237-HQITU,151.65,Yes,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,1990.5,No,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,1
7039,2234-XADUH,7362.9,No,1,0,0,1,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,1,0,0
7040,4801-JZAZL,346.45,No,1,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
7041,8361-LTMKD,306.6,Yes,0,1,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1


In [37]:
to_concat = [data_numeric, dummies]

In [38]:
df = pd.concat(to_concat, axis = 1)

In [39]:
df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,customerID,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,29.85,7590-VHVEG,29.85,No,1,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
1,0,34,56.95,5575-GNVDE,1889.5,No,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1
2,0,2,53.85,3668-QPYBK,108.15,Yes,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1
3,0,45,42.3,7795-CFOCW,1840.75,No,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0
4,0,2,70.7,9237-HQITU,151.65,Yes,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0


In [40]:
df.corr()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
SeniorCitizen,1.0,0.016567,0.220173,0.001874,-0.001874,-0.016479,0.016479,0.211185,-0.211185,-0.008576,0.008576,-0.136213,-0.008576,0.142948,-0.108322,0.255338,-0.182742,0.185532,-0.182742,-0.038653,0.087952,-0.182742,0.066572,0.09481,-0.182742,0.059428,0.20562,-0.182742,-0.060625,0.049062,-0.182742,0.105378,0.03421,-0.182742,0.120176,0.13836,-0.046262,-0.117,-0.15653,0.15653,-0.016159,-0.024135,0.171718,-0.153477
tenure,0.016567,1.0,0.2479,-0.005106,0.005106,-0.379697,0.379697,-0.159712,0.159712,-0.008448,0.008448,-0.323088,-0.008448,0.331941,0.013274,0.01972,-0.039062,-0.263746,-0.039062,0.327203,-0.312694,-0.039062,0.360277,-0.31274,-0.039062,0.360653,-0.262143,-0.039062,0.324221,-0.245039,-0.039062,0.279756,-0.25222,-0.039062,0.286111,-0.645561,0.20257,0.558533,-0.006152,0.006152,0.24351,0.233006,-0.208363,-0.233852
MonthlyCharges,0.220173,0.2479,1.0,0.014569,-0.014569,-0.096848,0.096848,0.11389,-0.11389,-0.247398,0.247398,-0.338314,-0.247398,0.490434,-0.160189,0.787066,-0.763557,0.360898,-0.763557,0.296594,0.210753,-0.763557,0.44178,0.171836,-0.763557,0.482692,0.322076,-0.763557,0.338304,0.016951,-0.763557,0.629603,0.018075,-0.763557,0.627429,0.060165,0.004904,-0.074681,-0.35215,0.35215,0.042812,0.03055,0.271625,-0.377437
gender_Female,0.001874,-0.005106,0.014569,1.0,-1.0,-0.001808,0.001808,0.010517,-0.010517,-0.006488,0.006488,-0.004476,-0.006488,0.008414,-0.006568,0.011286,-0.006026,-0.010429,-0.006026,0.017021,-0.008191,-0.006026,0.013773,0.002988,-0.006026,0.002105,-0.003397,-0.006026,0.009212,-0.003267,-0.006026,0.008393,-0.005374,-0.006026,0.010487,0.003386,-0.008026,0.003695,-0.011754,0.011754,0.016024,-0.001215,-0.000752,-0.013744
gender_Male,-0.001874,0.005106,-0.014569,-1.0,1.0,0.001808,-0.001808,-0.010517,0.010517,0.006488,-0.006488,0.004476,0.006488,-0.008414,0.006568,-0.011286,0.006026,0.010429,0.006026,-0.017021,0.008191,0.006026,-0.013773,-0.002988,0.006026,-0.002105,0.003397,0.006026,-0.009212,0.003267,0.006026,-0.008393,0.005374,0.006026,-0.010487,-0.003386,0.008026,-0.003695,0.011754,-0.011754,-0.016024,0.001215,0.000752,0.013744
Partner_No,-0.016479,-0.379697,-0.096848,-0.001808,0.001808,1.0,-1.0,0.452676,-0.452676,0.017706,-0.017706,0.129929,0.017706,-0.142057,0.000851,-0.000304,-0.000615,0.129936,-0.000615,-0.143106,0.136058,-0.000615,-0.141498,0.147692,-0.000615,-0.153786,0.109443,-0.000615,-0.119999,0.124357,-0.000615,-0.124666,0.117529,-0.000615,-0.117412,0.280865,-0.082783,-0.248091,-0.014877,0.014877,-0.110706,-0.082029,0.083852,0.095125
Partner_Yes,0.016479,0.379697,0.096848,0.001808,-0.001808,-1.0,1.0,-0.452676,0.452676,-0.017706,0.017706,-0.129929,-0.017706,0.142057,-0.000851,0.000304,0.000615,-0.129936,0.000615,0.143106,-0.136058,0.000615,0.141498,-0.147692,0.000615,0.153786,-0.109443,0.000615,0.119999,-0.124357,0.000615,0.124666,-0.117529,0.000615,0.117412,-0.280865,0.082783,0.248091,0.014877,-0.014877,0.110706,0.082029,-0.083852,-0.095125
Dependents_No,0.211185,-0.159712,0.11389,0.010517,-0.010517,0.452676,-0.452676,1.0,-1.0,-0.001762,0.001762,-0.023198,-0.001762,0.024526,-0.05201,0.165818,-0.139812,0.188434,-0.139812,-0.080972,0.138756,-0.139812,-0.023671,0.129415,-0.139812,-0.013963,0.172645,-0.139812,-0.063268,0.101176,-0.139812,0.016558,0.078198,-0.139812,0.039741,0.23172,-0.068368,-0.204613,-0.111377,0.111377,-0.052021,-0.060267,0.150642,-0.059071
Dependents_Yes,-0.211185,0.159712,-0.11389,-0.010517,0.010517,-0.452676,0.452676,-1.0,1.0,0.001762,-0.001762,0.023198,0.001762,-0.024526,0.05201,-0.165818,0.139812,-0.188434,0.139812,0.080972,-0.138756,0.139812,0.023671,-0.129415,0.139812,0.013963,-0.172645,0.139812,0.063268,-0.101176,0.139812,-0.016558,-0.078198,0.139812,-0.039741,-0.23172,0.068368,0.204613,0.111377,-0.111377,0.052021,0.060267,-0.150642,0.059071
PhoneService_No,-0.008576,-0.008448,-0.247398,-0.006488,0.006488,0.017706,-0.017706,-0.001762,0.001762,1.0,-1.0,-0.315431,1.0,-0.27969,0.452425,-0.289999,-0.172209,0.05788,-0.172209,0.092893,0.092867,-0.172209,0.052312,0.074776,-0.172209,0.071227,0.054447,-0.172209,0.09634,0.122455,-0.172209,0.022574,0.112254,-0.172209,0.032959,0.000742,0.002791,-0.003519,0.016505,-0.016505,-0.007556,0.007721,-0.003062,0.003319


In [42]:
corrMatrix = df.corr()

In [55]:
# Splitting into training and test dataset 

y = data['Churn']
X = df[['SeniorCitizen', 'tenure', 'MonthlyCharges', 'gender_Female', 'gender_Male', 'Partner_No', 'Partner_Yes', 'Dependents_No',
        'Dependents_Yes','PhoneService_No', 'PhoneService_Yes', 'MultipleLines_No', 
        'MultipleLines_No phone service','MultipleLines_Yes', 'InternetService_DSL', 'InternetService_Fiber optic',
        'InternetService_No','OnlineSecurity_No', 'OnlineSecurity_No internet service', 'OnlineSecurity_Yes',
        'OnlineBackup_No','OnlineBackup_No internet service', 'OnlineBackup_Yes', 'DeviceProtection_No', 
        'DeviceProtection_No internet service','DeviceProtection_Yes', 'TechSupport_No', 'TechSupport_No internet service',
        'TechSupport_Yes','StreamingTV_No', 'StreamingTV_No internet service', 'StreamingTV_Yes',
        'StreamingMovies_No','StreamingMovies_No internet service', 'StreamingMovies_Yes', 'Contract_Month-to-month', 'Contract_One year',
        'Contract_Two year', 'PaperlessBilling_No', 'PaperlessBilling_Yes', 'PaymentMethod_Bank transfer (automatic)', 'PaymentMethod_Credit card (automatic)',
        'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check']]


In [56]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
X_train, X_test,y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state=42)
#scaled_x = StandardScaler().fit_transform(X)

In [57]:
X_train

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
6607,0,1,25.30,0,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
2598,0,7,75.15,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
2345,0,4,20.05,1,0,1,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0
4093,0,29,76.00,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0
693,0,3,75.10,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3772,0,1,95.00,0,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0
5191,0,23,91.10,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0
5226,0,12,21.15,0,1,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0
5390,1,12,99.45,0,1,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0


In [58]:
X_test

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
185,0,1,24.80,1,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
2715,0,41,25.25,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0
3825,0,52,19.35,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,1
1807,0,1,76.35,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0
132,0,67,50.55,0,1,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5845,0,3,75.80,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
2301,0,8,90.25,1,0,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0
5121,0,29,70.90,0,1,1,0,1,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,1,0,0
677,0,2,34.70,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0


In [59]:
y_train

6607    Yes
2598     No
2345     No
4093     No
693     Yes
       ... 
3772    Yes
5191     No
5226     No
5390    Yes
860      No
Name: Churn, Length: 5282, dtype: object

In [60]:
y_test

185     Yes
2715     No
3825     No
1807    Yes
132      No
       ... 
5845    Yes
2301     No
5121     No
677     Yes
6062     No
Name: Churn, Length: 1761, dtype: object

In [61]:
# Transform training and test data 

from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [62]:
X_train

array([[-0.43609145, -1.28288214, -1.31004561, ..., -0.52344917,
         1.40621907, -0.54453143],
       [-0.43609145, -1.03785653,  0.34483164, ..., -0.52344917,
         1.40621907, -0.54453143],
       [-0.43609145, -1.16036933, -1.48433058, ..., -0.52344917,
        -0.71112675, -0.54453143],
       ...,
       [-0.43609145, -0.83366851, -1.44781373, ..., -0.52344917,
         1.40621907, -0.54453143],
       [ 2.293097  , -0.83366851,  1.15152206, ..., -0.52344917,
         1.40621907, -0.54453143],
       [-0.43609145, -0.26194207, -1.49262986, ...,  1.91040516,
        -0.71112675, -0.54453143]])

In [63]:
X_test

array([[-0.43609145, -1.28288214, -1.32664418, ..., -0.52344917,
         1.40621907, -0.54453143],
       [-0.43609145,  0.35062198, -1.31170547, ..., -0.52344917,
        -0.71112675, -0.54453143],
       [-0.43609145,  0.79983561, -1.50756857, ..., -0.52344917,
        -0.71112675,  1.83644127],
       ...,
       [-0.43609145, -0.13942926,  0.20374381, ...,  1.91040516,
        -0.71112675, -0.54453143],
       [-0.43609145, -1.24204454, -0.99799253, ..., -0.52344917,
         1.40621907, -0.54453143],
       [ 2.293097  ,  0.84067322,  0.30001551, ...,  1.91040516,
        -0.71112675, -0.54453143]])

In [64]:
y_train.value_counts()

No     3892
Yes    1390
Name: Churn, dtype: int64

In [65]:
classifier = LogisticRegression(random_state=0, solver='lbfgs',
                        multi_class='ovr')

#classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(multi_class='ovr', random_state=0)

In [66]:
y_pred = classifier.predict(X_test)
y_pred

array(['Yes', 'No', 'No', ..., 'No', 'Yes', 'No'], dtype=object)