<a href="https://colab.research.google.com/github/rahulrajpr/AdvancedHyperParameterOptimisation/blob/SearchAlgorithms/GridSearchCV_MutipleParameterSpaces.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
data = pd.DataFrame(data = load_breast_cancer().data, columns = load_breast_cancer().feature_names)

In [4]:
data['target'] = load_breast_cancer().target

In [5]:
data.isnull().sum()

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
target                     0
dtype: int64

In [6]:
X = data.drop('target', axis = 1)
y = data['target']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.7, random_state = 100)

In [9]:
from sklearn.svm import SVC

In [10]:
svc = SVC(random_state =100)

In [11]:
from sklearn.model_selection import GridSearchCV

#### Definig the multiple parameter spaces

In [19]:
params = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]

In [20]:
search = GridSearchCV(estimator = svc,
                      param_grid = params,
                      scoring = 'roc_auc',
                      cv = 5,
                      verbose = 1,
                      n_jobs = -1,
                      return_train_score = True)

In [21]:
search.fit(X_train,y_train)
%time

Fitting 5 folds for each of 12 candidates, totalling 60 fits
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs


In [22]:
search.best_params_

{'C': 10, 'kernel': 'linear'}

In [23]:
best_estimator = search.best_estimator_

In [26]:
pred_train = best_estimator.predict(X_train)
pred_test = best_estimator.predict(X_test)

In [27]:
from sklearn.metrics import roc_auc_score

In [28]:
print('train roc_auc score : ',roc_auc_score(y_train,pred_train))
print('test roc_auc score : ',roc_auc_score(y_test,pred_test))

train roc_auc score :  0.967256273138626
test roc_auc score :  0.956308610400682


In [30]:
sel_cols = ['rank_test_score','param_C','param_kernel','param_gamma','mean_test_score','std_test_score']

In [31]:
results = pd.DataFrame(search.cv_results_).sort_values('rank_test_score')[sel_cols].set_index('rank_test_score')

In [32]:
results.head(10)

Unnamed: 0_level_0,param_C,param_kernel,param_gamma,mean_test_score,std_test_score
rank_test_score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,10,linear,,0.992596,0.00517
2,1,linear,,0.991461,0.006435
3,100,linear,,0.991157,0.005434
4,1000,linear,,0.991027,0.005177
5,10,rbf,0.0001,0.959901,0.02407
6,1,rbf,0.0001,0.959891,0.023417
7,1,rbf,0.001,0.948015,0.016085
8,100,rbf,0.0001,0.944784,0.028094
9,10,rbf,0.001,0.926823,0.020038
10,100,rbf,0.001,0.926533,0.019775
