In [21]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout

%matplotlib inline

In [13]:
data = pd.read_csv('./data/cell_phone_churn.csv')
data.head()

Unnamed: 0,state,account_length,area_code,intl_plan,vmail_plan,vmail_message,day_mins,day_calls,day_charge,eve_mins,eve_calls,eve_charge,night_mins,night_calls,night_charge,intl_mins,intl_calls,intl_charge,custserv_calls,churn
0,KS,128,415,no,yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,NJ,137,415,no,no,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,yes,no,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,yes,no,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


In [14]:
data = data.drop('state', axis=1)

# Many ways to binarize:
data['intl_plan'] = data['intl_plan'].map(lambda x: 1 if x=='yes' else 0)
data['vmail_plan'] = data['vmail_plan'].map(lambda x: 1 if x=='yes' else 0)

In [15]:
data.head()

Unnamed: 0,account_length,area_code,intl_plan,vmail_plan,vmail_message,day_mins,day_calls,day_charge,eve_mins,eve_calls,eve_charge,night_mins,night_calls,night_charge,intl_mins,intl_calls,intl_charge,custserv_calls,churn
0,128,415,0,1,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,107,415,0,1,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,137,415,0,0,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,84,408,1,0,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,75,415,1,0,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


In [16]:
X = data.drop('churn', axis=1)
y = data['churn'].astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y)


In [29]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from keras import optimizers

In [34]:
def model_func(layer_one_neurons=18,layer_one_dropout=0.5,opt_learning_rate=0.01):
    model = Sequential()
    model.add(Dense(layer_one_neurons,activation='relu',input_dim=18))
    model.add(Dropout(layer_one_dropout))
    model.add(Dense(1,activation='sigmoid'))
    
    Ad = optimizers.Adam(learning_rate=opt_learning_rate,beta_1=0.9, beta_2=0.999, amsgrad=False)
    model.compile(loss='binary_crossentropy',optimizer=Ad,metrics=['accuracy'])
    return model

In [38]:
nn = KerasClassifier(build_fn=model_func,epochs=20 ,verbose=0)

In [39]:
ss = StandardScaler()
pipe= Pipeline([
    ('ss',ss),
    ('nn',nn)
])

In [40]:
params={
    'nn__layer_one_neurons' : [15, 18, 20],
    #'nn__layer_one_dropout': [0.5, 0.8],
    #'nn__epochs' : [20,30]
    'nn__opt_learning_rate' : [0.01, 0.001]
}
gs = GridSearchCV(pipe,param_grid = params)
gs.fit(X_train,y_train)
print(gs.best_score_)
print(gs.best_params_)


0.9223689436912537
{'nn__layer_one_neurons': 18, 'nn__opt_learning_rate': 0.01}


In [41]:
gs.score(X_test,y_test)

0.922062337398529