# Churn Prediction - Model Comparison

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import GridSearchCV


# load data
print("Loading dataset ... ")
df = pd.read_csv("churn_clean.csv", header=0, delimiter=",")
y = df["churn"]
X = df.drop(["churn"],axis=1)
features = X.columns
print("This data set contains %d data and %d features" % X.shape)
print()


# standandize features
scaler = StandardScaler()
X = scaler.fit_transform(X)


# GridSearch - KNN
print("****** KNeighborsClassifier ******")
parameters = {"n_neighbors":[3,5,7,10,30]}          
grid_search = GridSearchCV(KNeighborsClassifier(), parameters, cv=10, scoring="roc_auc", n_jobs=-1, verbose=1)
grid_search.fit(X,y)
print("Best parameters set :")
print(grid_search.best_params_)
print("Best score: %0.3f" % grid_search.best_score_)
print("Grid scores :")
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))
print()


# GridSearch - SGDClassifier
print("****** SGDClassifier ******")
parameters = {"alpha" : [1e-7,1e-5,1e-3,0.1,1,10]}          
grid_search = GridSearchCV(SGDClassifier(), parameters, cv=10, scoring="roc_auc", n_jobs=-1, verbose=1)
grid_search.fit(X,y)
print("Best parameters set :")
print(grid_search.best_params_)
print("Best score: %0.3f" % grid_search.best_score_)
print("Grid scores :")
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))
print()

    
# GridSearch - SVC
print("****** SVC ******")
parameters = {"C" : [0.1,1,10], "kernel":["rbf","poly","linear", "sigmoid"]}    
grid_search = GridSearchCV(SVC(), parameters, cv=10, scoring="roc_auc", n_jobs=-1, verbose=1)
grid_search.fit(X,y)
print("Best parameters set :")
print(grid_search.best_params_)
print("Best score: %0.3f" % grid_search.best_score_)
print("Grid scores :")
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))
print()


# GridSearch - LinearSVC
print("****** LinearSVC ******")
parameters = {"C" : [0.1,1,10]}    
grid_search = GridSearchCV(LinearSVC(), parameters, cv=10, scoring="roc_auc", n_jobs=-1, verbose=1)
grid_search.fit(X,y)
print("Best parameters set :")
print(grid_search.best_params_)
print("Best score: %0.3f" % grid_search.best_score_)
print("Grid scores :")
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))
print()


Loading dataset ... 
This data set contains 3333 data and 17 features

****** KNeighborsClassifier ******
Fitting 10 folds for each of 5 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    8.2s


Best parameters set :
{'n_neighbors': 30}
Best score: 0.877
Grid scores :
0.790 (+/-0.073) for {'n_neighbors': 3}
0.829 (+/-0.057) for {'n_neighbors': 5}
0.851 (+/-0.059) for {'n_neighbors': 7}
0.866 (+/-0.055) for {'n_neighbors': 10}
0.877 (+/-0.078) for {'n_neighbors': 30}

****** SGDClassifier ******
Fitting 10 folds for each of 6 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:   10.0s finished


Best parameters set :
{'alpha': 1}
Best score: 0.774
Grid scores :
0.656 (+/-0.161) for {'alpha': 1e-07}
0.653 (+/-0.147) for {'alpha': 1e-05}
0.709 (+/-0.119) for {'alpha': 0.001}
0.758 (+/-0.116) for {'alpha': 0.1}
0.774 (+/-0.102) for {'alpha': 1}
0.767 (+/-0.097) for {'alpha': 10}

****** SVC ******
Fitting 10 folds for each of 12 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:   18.4s finished


Best parameters set :
{'C': 1, 'kernel': 'rbf'}
Best score: 0.892
Grid scores :
0.890 (+/-0.079) for {'C': 0.1, 'kernel': 'rbf'}
0.874 (+/-0.093) for {'C': 0.1, 'kernel': 'poly'}
0.755 (+/-0.097) for {'C': 0.1, 'kernel': 'linear'}
0.563 (+/-0.078) for {'C': 0.1, 'kernel': 'sigmoid'}
0.892 (+/-0.082) for {'C': 1, 'kernel': 'rbf'}
0.875 (+/-0.096) for {'C': 1, 'kernel': 'poly'}
0.736 (+/-0.096) for {'C': 1, 'kernel': 'linear'}
0.503 (+/-0.071) for {'C': 1, 'kernel': 'sigmoid'}
0.884 (+/-0.079) for {'C': 10, 'kernel': 'rbf'}
0.847 (+/-0.109) for {'C': 10, 'kernel': 'poly'}
0.735 (+/-0.116) for {'C': 10, 'kernel': 'linear'}
0.462 (+/-0.086) for {'C': 10, 'kernel': 'sigmoid'}

****** LinearSVC ******
Fitting 10 folds for each of 3 candidates, totalling 30 fits
Best parameters set :
{'C': 0.1}
Best score: 0.807
Grid scores :
0.807 (+/-0.101) for {'C': 0.1}
0.807 (+/-0.101) for {'C': 1}
0.798 (+/-0.103) for {'C': 10}



[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    3.4s finished
