In [76]:
import pandas as pd 

In [77]:
df = pd.read_csv("telco_churn.csv")

In [78]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [79]:
#convert categorical columns
df['gender'] = df['gender'].astype('category')
df['gender_cat'] = df['gender'].cat.codes
df['SeniorCitizen'] = df['SeniorCitizen'].astype('category')
df['SeniorCitizen_cat'] = df['SeniorCitizen'].cat.codes
df['InternetService'] = df['InternetService'].astype('category')
df['InternetService_cat'] = df['InternetService'].cat.codes
df['DeviceProtection'] = df['DeviceProtection'].astype('category')
df['DeviceProtection_cat'] = df['DeviceProtection'].cat.codes

In [80]:
df[['gender_cat', 'SeniorCitizen_cat', 'InternetService_cat', 'DeviceProtection_cat']].head()

Unnamed: 0,gender_cat,SeniorCitizen_cat,InternetService_cat,DeviceProtection_cat
0,0,0,0,0
1,1,0,0,2
2,1,0,0,0
3,1,0,0,2
4,0,0,1,0


In [9]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,gender_cat
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,1
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,0


In [81]:
df['Churn'] = df['Churn'].astype('category')
df['Churn_cat'] = df['Churn'].cat.codes
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], 'coerce')
df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)
print(Counter(df['Churn']))

Counter({'No': 5174, 'Yes': 1869})


In [83]:
#define input and output
X = df[['TotalCharges', 'MonthlyCharges', 'gender_cat', 'SeniorCitizen_cat', 'InternetService_cat', 'DeviceProtection_cat']]
y = df['Churn_cat']

print(Counter(y))

Counter({0: 5174, 1: 1869})


In [84]:
#import train test split method and split data for training and testing 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [85]:
#import random forest classifier and 
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

RandomForestClassifier()

In [86]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [132]:
from sklearn.metrics import precision_score
y_pred_default = model.predict(X_test)
precision = precision_score(y_test, y_pred_default)
precision

0.5631313131313131

In [87]:
from sklearn.model_selection import GridSearchCV

In [88]:
params = { 
    'n_estimators': [10, 100],
    'max_features': ['sqrt'],
    'max_depth' : [5, 20],
    'criterion' :['gini']
}


In [113]:
grid_search_rf = GridSearchCV(estimator=model, param_grid=params, cv= 20, scoring='precision')
grid_search_rf.fit(X_train, y_train)

GridSearchCV(cv=20, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini'], 'max_depth': [5, 20],
                         'max_features': ['sqrt'], 'n_estimators': [10, 100]},
             scoring='precision')

In [114]:
gscv_params = grid_search_rf.best_params_
gscv_params


{'criterion': 'gini',
 'max_depth': 5,
 'max_features': 'sqrt',
 'n_estimators': 100}

In [115]:
gscv_params = grid_search_rf.best_params_
model_rf_gscv = RandomForestClassifier(**gscv_params)
model_rf_gscv.fit(X_train, y_train)

RandomForestClassifier(max_depth=5, max_features='sqrt')

In [133]:
y_pred_gscv = model_rf_gscv.predict(X_test)
precision_gscv = precision_score(y_test, y_pred_gscv)
precision_gscv


0.6996197718631179

In [92]:
%pip install -U rbfopt

You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [117]:
lbounds = [10, 5] 
ubounds = [100, 20] 

In [118]:
import rbfopt
from sklearn.model_selection import cross_val_score

Counter({0: 3892, 1: 1390})


In [135]:
%pip install pyomo

You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [139]:
def precision_objective(X):
   n_estimators, max_depth = X
   n_estimators = int(n_estimators)
   max_depth = int(max_depth)
   params = {'n_estimators':n_estimators, 'max_depth': max_depth}
   model_rbfopt = RandomForestClassifier(criterion='gini', max_features='sqrt', **params)
   model_rbfopt.fit(X_train, y_train)
   precision = cross_val_score(model_rbfopt, X_train, y_train, cv=20, scoring='precision')

   return -np.mean(precision)

# specify number of runs for optimizer
num_runs = 1
# specify number of function calls
max_fun_calls = 8
# specify diminsionality of numerical input
ndim = 2

obj_fun = precision_objective

bb = rbfopt.RbfoptUserBlackBox(
    dimension=ndim,
    var_lower=np.array(lbounds, dtype=np.float),
    var_upper=np.array(ubounds, dtype=np.float),
    var_type=['R'] * ndim,
    obj_funct=obj_fun)
settings = rbfopt.RbfoptSettings(max_evaluations=max_fun_calls)
alg = rbfopt.RbfoptAlgorithm(settings, bb)

fval, sol, iter_count, eval_count, fast_eval_count = alg.optimize()

obj_vals  = fval


  Iter  Cycle  Action             Objective value      Time      Gap
  ----  -----  ------             ---------------      ----      ---
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
     0      0  Initialization           -0.592415     18.72   100.00  
     0      0  Initialization           -0.666765     18.72   100.00 *
     0      0  Initialization           -0.621460     18.72   100.00  
     0      0  GlobalStep               -0.676269     25.98   100.00 *
     1      0  GlobalStep               -0.685104     30.75   100.00 *
     2      0  GlobalStep               -0.688581     36.70   100.00 *
     3      0  GlobalStep               -0.686750     42.17   100.00  
     4      0  GlobalStep               -0.681975     48.14   100.00  
Summary: iters   5 evals   8 noisy_evals   0 cycle

In [128]:
sol_int = [int(x) for x in sol]
params_rbfopt = {'n_estimators': sol_int[0], 'max_depth': sol_int[1]}
params_rbfopt

{'n_estimators': 81, 'max_depth': 5}

In [129]:
model_rbfopt = RandomForestClassifier(criterion='gini', max_features='sqrt', **params_rbfopt)
model_rbfopt.fit(X_train, y_train)


RandomForestClassifier(max_depth=5, max_features='sqrt', n_estimators=81)

In [131]:
y_pred_rbfopt = model_rbfopt.predict(X_test)
precision_rbfopt = precision_score(y_test, y_pred_rbfopt)
precision_rbfopt

0.7015503875968992

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5ddaf308-57af-4a6e-ac7b-d39835033e12' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>