# **Hyperparameter Tuning**


---



1.   GridSearchCV
2.   RandomizedSearchCV



In [4]:
# Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC

In [5]:
# load breast data set
breast_cancer = load_breast_cancer()



In [6]:
print(breast_cancer)

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [7]:
# loading to data frame
data_frame = pd.DataFrame(breast_cancer.data, columns = breast_cancer.feature_names)


In [8]:
# show first 5 rows
data_frame.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [9]:
# adding target columns
data_frame['label'] = breast_cancer.target

In [10]:
# show first 5 rows with target
data_frame.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [11]:
# how many rows and columns
data_frame.shape

(569, 31)

In [12]:
# find missing value
data_frame.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [13]:
# checking distribution each variable
for i in data_frame.columns:
  print(data_frame[i].value_counts())


mean radius
12.340    4
11.060    3
10.260    3
12.770    3
13.050    3
         ..
19.810    1
13.540    1
13.080    1
9.504     1
15.340    1
Name: count, Length: 456, dtype: int64
mean texture
16.84    3
19.83    3
15.70    3
20.52    3
18.22    3
        ..
27.88    1
22.68    1
23.93    1
29.37    1
30.62    1
Name: count, Length: 479, dtype: int64
mean perimeter
82.61     3
134.70    3
87.76     3
113.40    2
120.20    2
         ..
82.53     1
100.40    1
81.15     1
60.73     1
87.02     1
Name: count, Length: 522, dtype: int64
mean area
512.2     3
321.6     2
582.7     2
1138.0    2
477.3     2
         ..
1148.0    1
642.7     1
461.0     1
951.6     1
477.4     1
Name: count, Length: 539, dtype: int64
mean smoothness
0.10070    5
0.11500    4
0.10540    4
0.10750    4
0.10630    3
          ..
0.08523    1
0.07371    1
0.12000    1
0.07941    1
0.08871    1
Name: count, Length: 474, dtype: int64
mean compactness
0.11470    3
0.12060    3
0.15160    2
0.11170    2
0.11110   

In [14]:
# separeting feature and target
x = data_frame.drop(columns='label', axis=1)
y = data_frame['label']

In [15]:
print(x)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [16]:
x.shape

(569, 30)

In [17]:
print(y)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: label, Length: 569, dtype: int64


In [18]:
y.shape

(569,)

#**GridSearch CV**


---

gridsearch CV determining the best parameter for model

In [19]:
# loading SVC model
model = SVC()

In [20]:
# hyperparameter

Parameters = {
    'kernel':['linear','poly','rbf','sigmoid'],
    'C':[1,5,10,20]
}

In [21]:
# grid search
grid_search = GridSearchCV(model, Parameters, cv=5)

In [22]:
grid_search.fit(x,y)

In [23]:
print(grid_search)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [1, 5, 10, 20],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']})


In [24]:
grid_search.cv_results_

{'mean_fit_time': array([1.63831239e+00, 5.51862717e-03, 6.82144165e-03, 2.16908455e-02,
        3.11248512e+00, 5.82308769e-03, 5.78069687e-03, 1.76495075e-02,
        4.79684386e+00, 5.74812889e-03, 7.74917603e-03, 1.55447483e-02,
        7.87233295e+00, 5.93953133e-03, 5.52935600e-03, 1.77517414e-02]),
 'std_fit_time': array([1.01141976e+00, 9.77928261e-05, 9.02053717e-04, 5.14389208e-03,
        6.80804146e-01, 2.67324471e-04, 3.44690572e-04, 2.90027284e-03,
        1.07054337e+00, 9.78509584e-05, 1.76626167e-03, 5.85106408e-04,
        2.48642740e+00, 1.83939487e-04, 1.16904232e-04, 3.19642176e-03]),
 'mean_score_time': array([0.00266318, 0.00266953, 0.00351605, 0.00660052, 0.00288086,
        0.00252957, 0.00295858, 0.00556087, 0.00308232, 0.00233688,
        0.00351038, 0.00482264, 0.00291476, 0.00240402, 0.00263166,
        0.00544915]),
 'std_score_time': array([3.39655208e-04, 1.55205507e-04, 7.92340673e-04, 1.29933032e-03,
        4.13618496e-04, 1.14292346e-04, 4.49982209e-

In [25]:
# best parameters
best_parameters = grid_search.best_params_
print(best_parameters)


{'C': 10, 'kernel': 'linear'}


In [26]:

# loading cv result to data frame
result = pd.DataFrame(grid_search.cv_results_)
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.638312,1.01142,0.002663,0.00034,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.005519,9.8e-05,0.00267,0.000155,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.006821,0.000902,0.003516,0.000792,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.021691,0.005144,0.006601,0.001299,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,3.112485,0.680804,0.002881,0.000414,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [27]:
# Highest accuracy
highest_accuracy = grid_search.best_score_
print(highest_accuracy)

0.9525694767893185


In [28]:
grid_search_result = result[['param_C','param_kernel','mean_test_score','rank_test_score']]
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,1,linear,0.945536,4
1,1,poly,0.908663,12
2,1,rbf,0.912172,11
3,1,sigmoid,0.460441,13
4,5,linear,0.950815,2
5,5,poly,0.922729,6
6,5,rbf,0.931501,5
7,5,sigmoid,0.411178,14
8,10,linear,0.952569,1
9,10,poly,0.920975,8


In [29]:
grid_search_result.shape

(16, 4)

In [30]:
grid_search_result.sort_values(by='rank_test_score')

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
8,10,linear,0.952569,1
4,5,linear,0.950815,2
12,20,linear,0.949061,3
0,1,linear,0.945536,4
6,5,rbf,0.931501,5
5,5,poly,0.922729,6
10,10,rbf,0.922714,7
9,10,poly,0.920975,8
14,20,rbf,0.920944,9
13,20,poly,0.919221,10


High Accuracy = 95.2%



Best Parameters = {'C': 10, 'kernel': 'linear'}
********************************************************************************

# **RandomizedSearchCV**

In [31]:
# loading SVC model
model = SVC()


In [32]:
parameter = {
    'kernel':['linear','poly','rbf','sigmoid'],
    'C':[1,5,10,20]
}

In [33]:
grid_search = RandomizedSearchCV(model, parameter, cv=5)

In [34]:
grid_search.fit(x,y)

In [35]:
print(grid_search)

RandomizedSearchCV(cv=5, estimator=SVC(),
                   param_distributions={'C': [1, 5, 10, 20],
                                        'kernel': ['linear', 'poly', 'rbf',
                                                   'sigmoid']})


In [36]:
grid_search.cv_results_

{'mean_fit_time': array([1.61087513e-02, 1.73518181e-02, 5.71222305e-03, 1.53419380e+00,
        5.69558144e-03, 5.24135137e+00, 5.89599609e-03, 5.82027435e-03,
        1.64559841e-02, 7.72932010e+00]),
 'std_fit_time': array([7.53907686e-04, 3.99349469e-03, 2.56211993e-04, 8.04224007e-01,
        2.13575943e-04, 2.21361114e+00, 3.22875753e-04, 2.93671885e-04,
        4.28298220e-04, 2.52813844e+00]),
 'mean_score_time': array([0.00565529, 0.00519385, 0.00252481, 0.00248938, 0.00268445,
        0.00435352, 0.00284481, 0.00301456, 0.00533667, 0.00290618]),
 'std_score_time': array([1.31569687e-03, 6.59588963e-04, 5.38512367e-05, 3.21960082e-05,
        2.28576238e-04, 2.85985019e-03, 9.61358583e-05, 1.86039226e-04,
        6.63860425e-05, 5.79574364e-04]),
 'param_kernel': masked_array(data=['sigmoid', 'sigmoid', 'poly', 'linear', 'poly',
                    'linear', 'rbf', 'rbf', 'sigmoid', 'linear'],
              mask=[False, False, False, False, False, False, False, False,
        

In [37]:
rcv=pd.DataFrame(grid_search.cv_results_)
rcv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.016109,0.000754,0.005655,0.001316,sigmoid,10,"{'kernel': 'sigmoid', 'C': 10}",0.482456,0.403509,0.421053,0.342105,0.362832,0.402391,0.048906,9
1,0.017352,0.003993,0.005194,0.00066,sigmoid,20,"{'kernel': 'sigmoid', 'C': 20}",0.473684,0.403509,0.421053,0.342105,0.353982,0.398867,0.04764,10
2,0.005712,0.000256,0.002525,5.4e-05,poly,5,"{'kernel': 'poly', 'C': 5}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,5
3,1.534194,0.804224,0.002489,3.2e-05,linear,1,"{'kernel': 'linear', 'C': 1}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,3
4,0.005696,0.000214,0.002684,0.000229,poly,1,"{'kernel': 'poly', 'C': 1}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,7
5,5.241351,2.213611,0.004354,0.00286,linear,5,"{'kernel': 'linear', 'C': 5}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,1
6,0.005896,0.000323,0.002845,9.6e-05,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.877193,0.921053,0.921053,0.947368,0.938053,0.920944,0.024105,6
7,0.00582,0.000294,0.003015,0.000186,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,4
8,0.016456,0.000428,0.005337,6.6e-05,sigmoid,1,"{'kernel': 'sigmoid', 'C': 1}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,8
9,7.72932,2.528138,0.002906,0.00058,linear,20,"{'kernel': 'linear', 'C': 20}",0.929825,0.95614,0.95614,0.938596,0.964602,0.949061,0.012816,2


In [38]:
rcv.shape

(10, 15)

In [40]:
# best parameter
best_parameter = grid_search.best_params_
print(best_parameter)


{'kernel': 'linear', 'C': 5}


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.016109,0.000754,0.005655,0.001316,sigmoid,10,"{'kernel': 'sigmoid', 'C': 10}",0.482456,0.403509,0.421053,0.342105,0.362832,0.402391,0.048906,9
1,0.017352,0.003993,0.005194,0.00066,sigmoid,20,"{'kernel': 'sigmoid', 'C': 20}",0.473684,0.403509,0.421053,0.342105,0.353982,0.398867,0.04764,10
2,0.005712,0.000256,0.002525,5.4e-05,poly,5,"{'kernel': 'poly', 'C': 5}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,5
3,1.534194,0.804224,0.002489,3.2e-05,linear,1,"{'kernel': 'linear', 'C': 1}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,3
4,0.005696,0.000214,0.002684,0.000229,poly,1,"{'kernel': 'poly', 'C': 1}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,7
5,5.241351,2.213611,0.004354,0.00286,linear,5,"{'kernel': 'linear', 'C': 5}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,1
6,0.005896,0.000323,0.002845,9.6e-05,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.877193,0.921053,0.921053,0.947368,0.938053,0.920944,0.024105,6
7,0.00582,0.000294,0.003015,0.000186,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,4
8,0.016456,0.000428,0.005337,6.6e-05,sigmoid,1,"{'kernel': 'sigmoid', 'C': 1}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,8
9,7.72932,2.528138,0.002906,0.00058,linear,20,"{'kernel': 'linear', 'C': 20}",0.929825,0.95614,0.95614,0.938596,0.964602,0.949061,0.012816,2


In [41]:
# best accuracy
best_accuracy = grid_search.best_score_
print(best_accuracy)

0.9508150908244062


In [42]:
result = rcv[['param_C','param_kernel','mean_test_score','rank_test_score']]
result

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,10,sigmoid,0.402391,9
1,20,sigmoid,0.398867,10
2,5,poly,0.922729,5
3,1,linear,0.945536,3
4,1,poly,0.908663,7
5,5,linear,0.950815,1
6,20,rbf,0.920944,6
7,5,rbf,0.931501,4
8,1,sigmoid,0.460441,8
9,20,linear,0.949061,2
