# Parameter Tuning using GridSearchCV

In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sb
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.tree import plot_tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing, linear_model, neighbors
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from matplotlib import pyplot
import time

%matplotlib inline

# import data
churn_data = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn_R2.csv')

input_names = list(churn_data.columns)

all_inputs = churn_data[input_names].values

# set target data
churn_target = churn_data.iloc[:,-1]

# set feature data
churn_data.drop(["customerID","Churn"], axis=1, inplace=True)

# set training and test variables
X = np.array(churn_data)
y = np.array(churn_target)

# set variables for target and features
features = list(churn_data)
targets = list(churn_target)

# divide data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100)

# set seaborn style and fontsize
sb.set_style('ticks')
sb.set_context('paper', font_scale=1.6)

# data normalization
scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Decision Tree

In [19]:
dtc_param_grid = {'criterion': ['gini', 'entropy'],
                  'splitter': ['best', 'random'],
                  'max_depth': list(range(1,10)),
                  'max_features' : list(range(1,19))}

dtc_grid_search = GridSearchCV(DecisionTreeClassifier(),
                           param_grid=dtc_param_grid,
                           cv=10,
                           verbose=3)

dtc_grid_search.fit(X_train, y_train)
print('Best score: {}'.format(dtc_grid_search.best_score_))
print('Best parameters: {}'.format(dtc_grid_search.best_params_))
print('Best estimator: {}'.format(dtc_grid_search.best_estimator_))

dtc = dtc_grid_search.best_estimator_
dtc_prediction = dtc.predict(X_test)
best_report = classification_report(y_test, dtc_prediction, output_dict=True)

print("---  ---\n")
print(f"Confusion matrix: \n{confusion_matrix(y_test, dtc_prediction)}\n\nClassification report:\n {classification_report(y_test, dtc_prediction)}\n")

Fitting 10 folds for each of 648 candidates, totalling 6480 fits
[CV 1/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.735 total time=   0.0s
[CV 2/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.735 total time=   0.0s
[CV 3/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.735 total time=   0.0s
[CV 4/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 total time=   0.0s
[CV 5/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 total time=   0.0s
[CV 6/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 total time=   0.0s
[CV 7/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 total time=   0.0s
[CV 8/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 total time=   0.0s
[CV 9/10] END criterion=gini, max_depth=1, max_features=1, splitter=best;, score=0.737 

## K-NearestNeighbours

In [20]:
knn_param_grid = {'n_neighbors': list(range(1,100)),
                  'weights': ['uniform', 'distance'],
                  'algorithm': ['ball_tree', 'kd_tree', 'brute']}

knn_grid_search = GridSearchCV(KNeighborsClassifier(),
                           param_grid=knn_param_grid,
                           cv=10,
                           verbose=3,
                           n_jobs=-1)

knn_grid_search.fit(X_train, y_train)
print('Best score: {}'.format(knn_grid_search.best_score_))
print('Best parameters: {}'.format(knn_grid_search.best_params_))
print('Best estimator: {}'.format(knn_grid_search.best_estimator_))

knn = knn_grid_search.best_estimator_
knn_prediction = knn.predict(X_test)
best_report = classification_report(y_test, knn_prediction, output_dict=True)

print("---  ---\n")
print(f"Confusion matrix: \n{confusion_matrix(y_test, knn_prediction)}\n\nClassification report:\n {classification_report(y_test, knn_prediction)}\n")

Fitting 10 folds for each of 594 candidates, totalling 5940 fits
Best score: 0.787036452055364
Best parameters: {'algorithm': 'ball_tree', 'n_neighbors': 78, 'weights': 'uniform'}
Best estimator: KNeighborsClassifier(algorithm='ball_tree', n_neighbors=78)
---  ---

Confusion matrix: 
[[1352  181]
 [ 269  302]]

Classification report:
               precision    recall  f1-score   support

           0       0.83      0.88      0.86      1533
           1       0.63      0.53      0.57       571

    accuracy                           0.79      2104
   macro avg       0.73      0.71      0.72      2104
weighted avg       0.78      0.79      0.78      2104




## Neural Networks

In [21]:
nn_param_grid = {'solver': ['adam'],
    'activation' : ['tanh', 'logistic', 'identity', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter' : [1000]}

nn_grid_search = GridSearchCV(MLPClassifier(),
                        param_grid=nn_param_grid,
                        cv=10,
                        verbose=3, n_jobs=-1)

nn_grid_search.fit(X_train, y_train)
print('Best score: {}'.format(nn_grid_search.best_score_))
print('Best parameters: {}'.format(nn_grid_search.best_params_))
print('Best estimator: {}'.format(nn_grid_search.best_estimator_))

nn = nn_grid_search.best_estimator_
nn_prediction = nn.predict(X_test)
best_report = classification_report(y_test, nn_prediction, output_dict=True)

print("---  ---\n")
print(f"Confusion matrix: \n{confusion_matrix(y_test, nn_prediction)}\n\nClassification report:\n {classification_report(y_test, nn_prediction)}\n")

Fitting 10 folds for each of 36 candidates, totalling 360 fits


KeyboardInterrupt: 

## Stochastic Gradient Descent


In [25]:
sgd_param_grid = {'loss': ['hinge', 'modified_huber', 'squared_hinge', 'perceptron'],
                  'penalty': ['l2', 'l1', 'elasticnet'],
                  'max_iter' : [100]}

sgd_grid_search = GridSearchCV(SGDClassifier(),
                        param_grid=sgd_param_grid,
                        cv=10,
                        verbose=3, n_jobs=-1)

sgd_grid_search.fit(X_train, y_train)
print('Best score: {}'.format(sgd_grid_search.best_score_))
print('Best parameters: {}'.format(sgd_grid_search.best_params_))
print('Best estimator: {}'.format(sgd_grid_search.best_estimator_))

sgd = sgd_grid_search.best_estimator_
sgd_prediction = sgd.predict(X_test)
best_report = classification_report(y_test, sgd_prediction, output_dict=True)

print("---  ---\n")
print(f"Confusion matrix: \n{confusion_matrix(y_test, sgd_prediction)}\n\nClassification report:\n {classification_report(y_test, sgd_prediction)}\n")

Fitting 10 folds for each of 12 candidates, totalling 120 fits
Best score: 0.7870431023733323
Best parameters: {'loss': 'hinge', 'max_iter': 100, 'penalty': 'elasticnet'}
Best estimator: SGDClassifier(max_iter=100, penalty='elasticnet')
---  ---

Confusion matrix: 
[[1379  154]
 [ 306  265]]

Classification report:
               precision    recall  f1-score   support

           0       0.82      0.90      0.86      1533
           1       0.63      0.46      0.54       571

    accuracy                           0.78      2104
   macro avg       0.73      0.68      0.70      2104
weighted avg       0.77      0.78      0.77      2104




## Support Vector Machines

In [28]:
svc_param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                  'shrinking': [True, False]}

svc_grid_search = GridSearchCV(svm.SVC(),
                        param_grid=svc_param_grid,
                        cv=10,
                        verbose=3, n_jobs=-1)

svc_grid_search.fit(X_train, y_train)
print('Best score: {}'.format(svc_grid_search.best_score_))
print('Best parameters: {}'.format(svc_grid_search.best_params_))
print('Best estimator: {}'.format(svc_grid_search.best_estimator_))

svc = svc_grid_search.best_estimator_
svc_prediction = svc.predict(X_test)
best_report = classification_report(y_test, svc_prediction, output_dict=True)

print("---  ---\n")
print(f"Confusion matrix: \n{confusion_matrix(y_test, svc_prediction)}\n\nClassification report:\n {classification_report(y_test, svc_prediction)}\n")

Fitting 10 folds for each of 8 candidates, totalling 80 fits
Best score: 0.7996766282887899
Best parameters: {'kernel': 'rbf', 'shrinking': False}
Best estimator: SVC(shrinking=False)
---  ---

Confusion matrix: 
[[1394  139]
 [ 288  283]]

Classification report:
               precision    recall  f1-score   support

           0       0.83      0.91      0.87      1533
           1       0.67      0.50      0.57       571

    accuracy                           0.80      2104
   macro avg       0.75      0.70      0.72      2104
weighted avg       0.79      0.80      0.79      2104


