# Libraries

In [147]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFECV, f_classif, chi2
from boruta import BorutaPy
from collections import Counter

# Feature Selection
For feature selection, it will be used a majority voting method applying 3 selection proposals:
 - Statistical test
 - RFECV
 - Boruta

Features maintained by at least 2 algorithms will be used in the modeling process.

In [149]:
df_telco = pd.read_csv('df_telco_clean.csv')
df_telco.drop(['customerID'], axis = 1, inplace = True)
df_telco.head(5)

Unnamed: 0,Churn,customer_gender,customer_SeniorCitizen,customer_Partner,customer_Dependents,customer_tenure,phone_PhoneService,phone_MultipleLines,internet_InternetService,internet_OnlineSecurity,...,internet_DeviceProtection,internet_TechSupport,internet_StreamingTV,internet_StreamingMovies,account_Contract,account_PaperlessBilling,account_PaymentMethod,account_Charges_Monthly,account_Charges_Total,account_Charges_Daily
0,0,Female,< 65 years,Yes,Yes,9,Yes,No,DSL,No,...,No,Yes,Yes,No,One year,Yes,Mailed check,65.6,593.3,2.19
1,0,Male,< 65 years,No,No,9,Yes,Yes,DSL,No,...,No,No,No,Yes,Month-to-month,No,Mailed check,59.9,542.4,2.0
2,1,Male,< 65 years,No,No,4,Yes,No,Fiber optic,No,...,Yes,No,No,No,Month-to-month,Yes,Electronic check,73.9,280.85,2.46
3,1,Male,>= 65 years,Yes,No,13,Yes,No,Fiber optic,No,...,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,98.0,1237.85,3.27
4,1,Female,>= 65 years,Yes,No,3,Yes,No,Fiber optic,No,...,No,Yes,Yes,No,Month-to-month,Yes,Mailed check,83.9,267.4,2.8


In [151]:
# Getting dummies
df_telco_dummies = df_telco.copy()
df_telco_dummies = pd.get_dummies(df_telco_dummies)
df_telco_dummies

Unnamed: 0,Churn,customer_tenure,account_Charges_Monthly,account_Charges_Total,account_Charges_Daily,customer_gender_Female,customer_gender_Male,customer_SeniorCitizen_< 65 years,customer_SeniorCitizen_>= 65 years,customer_Partner_No,...,internet_StreamingMovies_Yes,account_Contract_Month-to-month,account_Contract_One year,account_Contract_Two year,account_PaperlessBilling_No,account_PaperlessBilling_Yes,account_PaymentMethod_Bank transfer (automatic),account_PaymentMethod_Credit card (automatic),account_PaymentMethod_Electronic check,account_PaymentMethod_Mailed check
0,0,9,65.60,593.30,2.19,1,0,1,0,0,...,0,0,1,0,0,1,0,0,0,1
1,0,9,59.90,542.40,2.00,0,1,1,0,1,...,1,1,0,0,1,0,0,0,0,1
2,1,4,73.90,280.85,2.46,0,1,1,0,1,...,0,1,0,0,0,1,0,0,1,0
3,1,13,98.00,1237.85,3.27,0,1,0,1,0,...,1,1,0,0,0,1,0,0,1,0
4,1,3,83.90,267.40,2.80,1,0,0,1,0,...,0,1,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,13,55.15,742.90,1.84,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,1
7028,1,22,85.10,1873.70,2.84,0,1,1,0,0,...,1,1,0,0,0,1,0,0,1,0
7029,0,2,50.30,92.75,1.68,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
7030,0,67,67.85,4627.65,2.26,0,1,1,0,0,...,1,0,0,1,1,0,0,0,0,1


In [152]:
# Split target and features
X = df_telco_dummies.drop('Churn',axis=1)
Y = df_telco_dummies['Churn']

In [153]:
# Train test split, stratify because it is an unbalanced dataset
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.7, random_state=123, stratify = Y )

In [154]:
df_telco_dummies.columns

Index(['Churn', 'customer_tenure', 'account_Charges_Monthly',
       'account_Charges_Total', 'account_Charges_Daily',
       'customer_gender_Female', 'customer_gender_Male',
       'customer_SeniorCitizen_< 65 years',
       'customer_SeniorCitizen_>= 65 years', 'customer_Partner_No',
       'customer_Partner_Yes', 'customer_Dependents_No',
       'customer_Dependents_Yes', 'phone_PhoneService_No',
       'phone_PhoneService_Yes', 'phone_MultipleLines_No',
       'phone_MultipleLines_No phone service', 'phone_MultipleLines_Yes',
       'internet_InternetService_DSL', 'internet_InternetService_Fiber optic',
       'internet_InternetService_No', 'internet_OnlineSecurity_No',
       'internet_OnlineSecurity_No internet service',
       'internet_OnlineSecurity_Yes', 'internet_OnlineBackup_No',
       'internet_OnlineBackup_No internet service',
       'internet_OnlineBackup_Yes', 'internet_DeviceProtection_No',
       'internet_DeviceProtection_No internet service',
       'internet_Dev

In [155]:
# Categorical features and numerical features
list_cat = ['customer_gender_Female', 'customer_gender_Male',
       'customer_SeniorCitizen_< 65 years',
       'customer_SeniorCitizen_>= 65 years', 'customer_Partner_No',
       'customer_Partner_Yes', 'customer_Dependents_No',
       'customer_Dependents_Yes', 'phone_PhoneService_No',
       'phone_PhoneService_Yes', 'phone_MultipleLines_No',
       'phone_MultipleLines_No phone service', 'phone_MultipleLines_Yes',
       'internet_InternetService_DSL', 'internet_InternetService_Fiber optic',
       'internet_InternetService_No', 'internet_OnlineSecurity_No',
       'internet_OnlineSecurity_No internet service',
       'internet_OnlineSecurity_Yes', 'internet_OnlineBackup_No',
       'internet_OnlineBackup_No internet service',
       'internet_OnlineBackup_Yes', 'internet_DeviceProtection_No',
       'internet_DeviceProtection_No internet service',
       'internet_DeviceProtection_Yes', 'internet_TechSupport_No',
       'internet_TechSupport_No internet service', 'internet_TechSupport_Yes',
       'internet_StreamingTV_No', 'internet_StreamingTV_No internet service',
       'internet_StreamingTV_Yes', 'internet_StreamingMovies_No',
       'internet_StreamingMovies_No internet service',
       'internet_StreamingMovies_Yes', 'account_Contract_Month-to-month',
       'account_Contract_One year', 'account_Contract_Two year',
       'account_PaperlessBilling_No', 'account_PaperlessBilling_Yes',
       'account_PaymentMethod_Bank transfer (automatic)',
       'account_PaymentMethod_Credit card (automatic)',
       'account_PaymentMethod_Electronic check',
       'account_PaymentMethod_Mailed check']

list_num = ['customer_tenure', 'account_Charges_Monthly', 'account_Charges_Total', 'account_Charges_Daily']

In [156]:
#Spliting categorical and numerical features
X_train_num = X_train[list_num]
X_train_cat = X_train[list_cat]

## Statistical test 

### ANOVA

In [157]:
selected_anova = f_classif(X_train_num, y_train)
selected_anova

(array([669.51091063, 205.66713631, 185.50497653, 205.77847468]),
 array([1.62435924e-138, 1.00072015e-045, 1.70768928e-041, 9.48406411e-046]))

In [158]:
# pavalues
p_values_num = pd.Series(selected_anova[1])

p_values_num.index = X_train_num.columns 
p_values_num.sort_values(ascending=True, inplace=True)

p_values_num

customer_tenure            1.624359e-138
account_Charges_Daily       9.484064e-46
account_Charges_Monthly     1.000720e-45
account_Charges_Total       1.707689e-41
dtype: float64

In [159]:
# features p_values_num<0.05
p_values_num = p_values_num[p_values_num<0.05]

p_values_num.index

Index(['customer_tenure', 'account_Charges_Daily', 'account_Charges_Monthly',
       'account_Charges_Total'],
      dtype='object')

In [160]:
X_train_num_anova = X_train_num.loc[:,p_values_num.index]
X_train_num_anova.head(5)

Unnamed: 0,customer_tenure,account_Charges_Daily,account_Charges_Monthly,account_Charges_Total
5640,8,2.36,70.7,553.4
3454,72,3.66,109.7,7898.45
6961,23,1.86,55.8,1327.85
4590,8,1.71,51.3,411.6
3680,57,3.02,90.65,5199.8


### Chi2

In [161]:
# Chi2 for categorical features
selected_chi2 = chi2(X_train_cat, y_train)
selected_chi2

(array([2.71303422e-03, 2.69764424e-03, 1.84790870e+01, 9.66526426e+01,
        5.45849290e+01, 5.74997687e+01, 3.51024582e+01, 8.32361030e+01,
        9.14206651e-01, 9.67403578e-02, 2.47797402e+00, 9.14206651e-01,
        4.57217603e+00, 5.49100838e+01, 2.74894392e+02, 2.07935096e+02,
        3.06272383e+02, 2.07935096e+02, 1.09873911e+02, 1.92229643e+02,
        2.07935096e+02, 1.69740084e+01, 1.64337928e+02, 2.07935096e+02,
        8.39469101e+00, 2.93586824e+02, 2.07935096e+02, 9.99644314e+01,
        5.74671822e+01, 2.07935096e+02, 1.02260886e+01, 5.03824805e+01,
        2.07935096e+02, 1.37030472e+01, 3.52802607e+02, 1.16386549e+02,
        3.44420675e+02, 1.16001916e+02, 7.98010436e+01, 5.02598784e+01,
        7.24401861e+01, 3.03518258e+02, 3.36626916e+01]),
 array([9.58459535e-01, 9.58577417e-01, 1.71778911e-05, 8.26220306e-23,
        1.48872961e-13, 3.38019025e-14, 3.12805671e-09, 7.28156287e-20,
        3.39000729e-01, 7.55776964e-01, 1.15450882e-01, 3.39000729e-01,
      

In [162]:
#p values
p_values_cat = pd.Series(selected_chi2[1])

p_values_cat.index = X_train_cat.columns 
p_values_cat.sort_values(ascending=True, inplace=True)

In [163]:
p_values_cat = p_values_cat[p_values_cat<0.05]
p_values_cat

account_Contract_Month-to-month                    1.039535e-78
account_Contract_Two year                          6.952553e-77
internet_OnlineSecurity_No                         1.416696e-68
account_PaymentMethod_Electronic check             5.639960e-68
internet_TechSupport_No                            8.223110e-66
internet_InternetService_Fiber optic               9.732686e-62
internet_InternetService_No                        3.875964e-47
internet_DeviceProtection_No internet service      3.875964e-47
internet_OnlineBackup_No internet service          3.875964e-47
internet_OnlineSecurity_No internet service        3.875964e-47
internet_StreamingMovies_No internet service       3.875964e-47
internet_TechSupport_No internet service           3.875964e-47
internet_StreamingTV_No internet service           3.875964e-47
internet_OnlineBackup_No                           1.036723e-43
internet_DeviceProtection_No                       1.276262e-37
account_Contract_One year               

In [164]:
X_train_cat_chi2 = X_train_cat[p_values_cat.index]

In [165]:
#X_train filtered statistic
X_train_st = pd.concat([X_train_num_anova, X_train_cat_chi2], axis=1)

cols_keep_st = list(X_train_st.columns)
cols_keep_st

['customer_tenure',
 'account_Charges_Daily',
 'account_Charges_Monthly',
 'account_Charges_Total',
 'account_Contract_Month-to-month',
 'account_Contract_Two year',
 'internet_OnlineSecurity_No',
 'account_PaymentMethod_Electronic check',
 'internet_TechSupport_No',
 'internet_InternetService_Fiber optic',
 'internet_InternetService_No',
 'internet_DeviceProtection_No internet service',
 'internet_OnlineBackup_No internet service',
 'internet_OnlineSecurity_No internet service',
 'internet_StreamingMovies_No internet service',
 'internet_TechSupport_No internet service',
 'internet_StreamingTV_No internet service',
 'internet_OnlineBackup_No',
 'internet_DeviceProtection_No',
 'account_Contract_One year',
 'account_PaperlessBilling_No',
 'internet_OnlineSecurity_Yes',
 'internet_TechSupport_Yes',
 'customer_SeniorCitizen_>= 65 years',
 'customer_Dependents_Yes',
 'account_PaperlessBilling_Yes',
 'account_PaymentMethod_Credit card (automatic)',
 'customer_Partner_Yes',
 'internet_Strea

## Recursive Feature Elimination

In [166]:
# Target unbalanced
y_train.value_counts()

0    3614
1    1308
Name: Churn, dtype: int64

In [167]:
forest = RandomForestClassifier(n_jobs = -1, max_depth = 5, n_estimators= 10, random_state=123, class_weight= 'balanced')

In [168]:
# Selection (Random Forest)
rfecv_RFC = RFECV(estimator=forest, scoring='precision')
rfecv_RFC.fit(X_train,y_train)

RFECV(estimator=RandomForestClassifier(class_weight='balanced', max_depth=5,
                                       n_estimators=10, n_jobs=-1,
                                       random_state=123),
      scoring='precision')

In [169]:
sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

In [170]:
print(rfecv_RFC.n_features_) # number of most importante features
print(rfecv_RFC.support_) # Booleans vector. True if the variable is to be kept and False otherwise
mask_RFC = rfecv_RFC.support_

rfecv_RFC.ranking_

26
[ True  True  True  True False False False False  True  True False  True
 False False  True False False  True  True  True  True False  True  True
 False  True False  True False  True False  True  True False False False
 False False  True  True  True  True  True  True False  True False]


array([ 1,  1,  1,  1,  6,  4,  5, 21,  1,  1,  9,  1, 13, 16,  1,  7, 10,
        1,  1,  1,  1, 14,  1,  1, 11,  1, 20,  1, 15,  1, 22,  1,  1, 19,
       12, 18, 17,  2,  1,  1,  1,  1,  1,  1,  8,  1,  3])

In [171]:
cols_drop_RFE= [not x for x in mask_RFC] # inverting boolean vector because is mask
cols_drop_RFE= X_train.loc[:,cols_drop_RFE].columns.tolist()

cols_drop_RFE

['customer_gender_Female',
 'customer_gender_Male',
 'customer_SeniorCitizen_< 65 years',
 'customer_SeniorCitizen_>= 65 years',
 'customer_Dependents_No',
 'phone_PhoneService_No',
 'phone_PhoneService_Yes',
 'phone_MultipleLines_No phone service',
 'phone_MultipleLines_Yes',
 'internet_OnlineSecurity_No internet service',
 'internet_OnlineBackup_No internet service',
 'internet_DeviceProtection_No',
 'internet_DeviceProtection_Yes',
 'internet_TechSupport_No internet service',
 'internet_StreamingTV_No internet service',
 'internet_StreamingTV_Yes',
 'internet_StreamingMovies_No',
 'internet_StreamingMovies_No internet service',
 'internet_StreamingMovies_Yes',
 'account_PaymentMethod_Credit card (automatic)',
 'account_PaymentMethod_Mailed check']

In [172]:
cols_keep_RFE= mask_RFC 
cols_keep_RFE= X_train.loc[:,cols_keep_RFE].columns.tolist()
cols_keep_RFE

['customer_tenure',
 'account_Charges_Monthly',
 'account_Charges_Total',
 'account_Charges_Daily',
 'customer_Partner_No',
 'customer_Partner_Yes',
 'customer_Dependents_Yes',
 'phone_MultipleLines_No',
 'internet_InternetService_DSL',
 'internet_InternetService_Fiber optic',
 'internet_InternetService_No',
 'internet_OnlineSecurity_No',
 'internet_OnlineSecurity_Yes',
 'internet_OnlineBackup_No',
 'internet_OnlineBackup_Yes',
 'internet_DeviceProtection_No internet service',
 'internet_TechSupport_No',
 'internet_TechSupport_Yes',
 'internet_StreamingTV_No',
 'account_Contract_Month-to-month',
 'account_Contract_One year',
 'account_Contract_Two year',
 'account_PaperlessBilling_No',
 'account_PaperlessBilling_Yes',
 'account_PaymentMethod_Bank transfer (automatic)',
 'account_PaymentMethod_Electronic check']

In [173]:
X_train_RFECV = X_train.loc[:,cols_keep_RFE]

X_train_RFECV

Unnamed: 0,customer_tenure,account_Charges_Monthly,account_Charges_Total,account_Charges_Daily,customer_Partner_No,customer_Partner_Yes,customer_Dependents_Yes,phone_MultipleLines_No,internet_InternetService_DSL,internet_InternetService_Fiber optic,...,internet_TechSupport_No,internet_TechSupport_Yes,internet_StreamingTV_No,account_Contract_Month-to-month,account_Contract_One year,account_Contract_Two year,account_PaperlessBilling_No,account_PaperlessBilling_Yes,account_PaymentMethod_Bank transfer (automatic),account_PaymentMethod_Electronic check
5640,8,70.70,553.40,2.36,1,0,0,1,0,1,...,1,0,1,1,0,0,1,0,1,0
3454,72,109.70,7898.45,3.66,0,1,0,0,0,1,...,0,1,0,0,0,1,1,0,0,0
6961,23,55.80,1327.85,1.86,1,0,0,1,1,0,...,1,0,1,0,1,0,1,0,1,0
4590,8,51.30,411.60,1.71,0,1,1,1,1,0,...,1,0,1,1,0,0,0,1,0,0
3680,57,90.65,5199.80,3.02,0,1,0,1,0,1,...,1,0,0,0,1,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5714,1,69.65,69.65,2.32,1,0,0,1,0,1,...,1,0,1,1,0,0,0,1,0,1
2515,55,60.00,3316.10,2.00,0,1,0,0,1,0,...,1,0,1,0,1,0,1,0,0,0
3696,27,20.30,595.05,0.68,1,0,0,1,0,0,...,0,0,0,0,1,0,1,0,0,0
3079,2,93.85,196.75,3.13,1,0,0,0,0,1,...,1,0,0,1,0,0,0,1,0,1


### Boruta

In [174]:
# fit boruta
boruta_selector = BorutaPy(forest, n_estimators = 50, max_iter=100, random_state = 0)
boruta_selector.fit(np.array(X_train), np.array(y_train))

BorutaPy(estimator=RandomForestClassifier(class_weight='balanced', max_depth=5,
                                          n_estimators=50, n_jobs=-1,
                                          random_state=RandomState(MT19937) at 0x17F69858378),
         n_estimators=50, random_state=RandomState(MT19937) at 0x17F69858378)

In [175]:
boruta_selector.get_params().keys()

dict_keys(['alpha', 'estimator__bootstrap', 'estimator__ccp_alpha', 'estimator__class_weight', 'estimator__criterion', 'estimator__max_depth', 'estimator__max_features', 'estimator__max_leaf_nodes', 'estimator__max_samples', 'estimator__min_impurity_decrease', 'estimator__min_impurity_split', 'estimator__min_samples_leaf', 'estimator__min_samples_split', 'estimator__min_weight_fraction_leaf', 'estimator__n_estimators', 'estimator__n_jobs', 'estimator__oob_score', 'estimator__random_state', 'estimator__verbose', 'estimator__warm_start', 'estimator', 'max_iter', 'n_estimators', 'perc', 'random_state', 'two_step', 'verbose'])

In [176]:
boruta_selector.support_.tolist()

[True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False]

In [177]:
# features selected by boruta
cols_drop_boruta= [not x for x in boruta_selector.support_.tolist()] # inverting boolean vector
cols_drop_boruta= X_train.loc[:,cols_drop_boruta].columns.tolist()

cols_drop_boruta

['customer_gender_Female',
 'customer_gender_Male',
 'customer_SeniorCitizen_< 65 years',
 'customer_SeniorCitizen_>= 65 years',
 'customer_Partner_No',
 'customer_Partner_Yes',
 'customer_Dependents_No',
 'customer_Dependents_Yes',
 'phone_PhoneService_No',
 'phone_PhoneService_Yes',
 'phone_MultipleLines_No',
 'phone_MultipleLines_No phone service',
 'phone_MultipleLines_Yes',
 'internet_OnlineBackup_Yes',
 'internet_DeviceProtection_No',
 'internet_DeviceProtection_Yes',
 'internet_StreamingTV_No',
 'internet_StreamingTV_Yes',
 'internet_StreamingMovies_No',
 'internet_StreamingMovies_Yes',
 'account_PaperlessBilling_No',
 'account_PaperlessBilling_Yes',
 'account_PaymentMethod_Bank transfer (automatic)',
 'account_PaymentMethod_Credit card (automatic)',
 'account_PaymentMethod_Mailed check']

In [178]:
cols_keep_boruta=  boruta_selector.support_.tolist()
cols_keep_boruta= X_train.loc[:,cols_keep_boruta].columns.tolist()

cols_keep_boruta

['customer_tenure',
 'account_Charges_Monthly',
 'account_Charges_Total',
 'account_Charges_Daily',
 'internet_InternetService_DSL',
 'internet_InternetService_Fiber optic',
 'internet_InternetService_No',
 'internet_OnlineSecurity_No',
 'internet_OnlineSecurity_No internet service',
 'internet_OnlineSecurity_Yes',
 'internet_OnlineBackup_No',
 'internet_OnlineBackup_No internet service',
 'internet_DeviceProtection_No internet service',
 'internet_TechSupport_No',
 'internet_TechSupport_No internet service',
 'internet_TechSupport_Yes',
 'internet_StreamingTV_No internet service',
 'internet_StreamingMovies_No internet service',
 'account_Contract_Month-to-month',
 'account_Contract_One year',
 'account_Contract_Two year',
 'account_PaymentMethod_Electronic check']

## Vote for features selected in the feature selection

In [179]:
#Counting votes
counts = Counter(sum(([list(set(i)) for i in (cols_keep_st, cols_keep_RFE, cols_keep_boruta)]), []))
print('Vote',counts)

def df_feature_selected(X_to_transform):
  #df with majority features (2 or more votes)
  majority_votes = [i for i, c in counts.items() if c >= 2]
  df_model = pd.DataFrame(X_to_transform.loc[:,majority_votes])
  df_model['Churn'] = Y

  return df_model.head
df_model

Vote Counter({'account_Charges_Total': 3, 'internet_OnlineBackup_No': 3, 'account_Charges_Monthly': 3, 'internet_OnlineSecurity_No': 3, 'internet_TechSupport_Yes': 3, 'internet_InternetService_DSL': 3, 'internet_TechSupport_No': 3, 'internet_OnlineSecurity_Yes': 3, 'account_Contract_Month-to-month': 3, 'account_Contract_Two year': 3, 'internet_InternetService_Fiber optic': 3, 'account_Charges_Daily': 3, 'account_Contract_One year': 3, 'customer_tenure': 3, 'account_PaymentMethod_Electronic check': 3, 'internet_DeviceProtection_No internet service': 3, 'internet_InternetService_No': 3, 'internet_OnlineBackup_No internet service': 2, 'customer_Partner_Yes': 2, 'customer_Dependents_Yes': 2, 'account_PaperlessBilling_Yes': 2, 'internet_OnlineSecurity_No internet service': 2, 'internet_StreamingTV_No': 2, 'account_PaymentMethod_Bank transfer (automatic)': 2, 'internet_TechSupport_No internet service': 2, 'internet_StreamingMovies_No internet service': 2, 'internet_StreamingTV_No internet se

Unnamed: 0,account_Charges_Total,internet_OnlineBackup_No internet service,internet_OnlineBackup_No,account_Charges_Monthly,customer_Partner_Yes,customer_Dependents_Yes,account_PaperlessBilling_Yes,internet_OnlineSecurity_No internet service,internet_OnlineSecurity_No,internet_StreamingTV_No,...,account_Charges_Daily,customer_Partner_No,internet_OnlineBackup_Yes,account_PaperlessBilling_No,account_Contract_One year,customer_tenure,internet_DeviceProtection_No internet service,account_PaymentMethod_Electronic check,internet_InternetService_No,Churn
0,593.30,0,0,65.60,1,1,1,0,1,0,...,2.19,0,1,0,1,9,0,0,0,0
1,542.40,0,1,59.90,0,0,0,0,1,1,...,2.00,1,0,1,0,9,0,0,0,0
2,280.85,0,1,73.90,0,0,1,0,1,1,...,2.46,1,0,0,0,4,0,1,0,1
3,1237.85,0,0,98.00,1,0,1,0,1,0,...,3.27,0,1,0,0,13,0,1,0,1
4,267.40,0,1,83.90,1,0,1,0,1,0,...,2.80,0,0,0,0,3,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,742.90,0,1,55.15,0,0,0,0,0,1,...,1.84,1,0,1,1,13,0,0,0,0
7028,1873.70,0,1,85.10,1,0,1,0,1,1,...,2.84,0,0,0,0,22,0,1,0,1
7029,92.75,0,0,50.30,0,0,1,0,1,1,...,1.68,1,1,0,0,2,0,0,0,0
7030,4627.65,0,1,67.85,1,1,0,0,0,1,...,2.26,0,0,1,0,67,0,0,0,0


In [180]:
df_model.to_csv('df_model.csv', index = False)