## **HyperParameter Tuning :**

In [1]:
import pandas as pd
data = pd.read_csv('/content/sample_data/sonar.all-data.csv', header=None)
data.shape

(208, 61)

In [2]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [3]:
x = data.iloc[:, :-1]
y = data.iloc[:,-1]
print(x.shape,y.shape)

(208, 60) (208,)


In [4]:
y[:-10]

0      R
1      R
2      R
3      R
4      R
      ..
193    M
194    M
195    M
196    M
197    M
Name: 60, Length: 198, dtype: object

In [None]:
y.unique()

array(['R', 'M'], dtype=object)

In [None]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y=enc.fit_transform(y)


In [None]:
print(y[:5], y[-5:])

[1 1 1 1 1] [0 0 0 0 0]


In [None]:
from sklearn.svm import SVC

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain,ytest = train_test_split(x,y, test_size=.25, random_state=40)
print(xtrain.shape, xtest.shape, ytrain.shape, ytest.shape)

(156, 60) (52, 60) (156,) (52,)


Manual tuning: we choose values for hyper para manually. Usually recommended values of hyper para are tested and choose best hyper para value ie that gives best performance

### Manual Tuning ::

In [None]:
model = SVC(kernel='linear')
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)


In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(ytest, ypred)

0.8076923076923077

In [None]:
model = SVC(kernel='rbf')
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
accuracy_score(ytest, ypred)*100

82.6923076923077

In [None]:
model = SVC(kernel='poly', degree=3)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
accuracy_score(ytest, ypred)*100

80.76923076923077

In [None]:
model = SVC(kernel='poly', degree=2)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
accuracy_score(ytest, ypred)*100

78.84615384615384

## Grid search:

In [None]:
import numpy as np

In [None]:
space =dict()
space['C'] =[1,10,100]
space['kernel']=['rbf','linear','poly']
space['degree']=[2,3,4,5]
print(space)


{'C': [1, 10, 100], 'kernel': ['rbf', 'linear', 'poly'], 'degree': [2, 3, 4, 5]}


In [None]:
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold

In [None]:
cv = RepeatedStratifiedKFold(n_splits = 5, n_repeats=2, random_state=10)

In [None]:
model= SVC()

In [None]:
search = GridSearchCV(model, space, cv=cv)

In [None]:
search.fit(x,y)

In [None]:
search.best_score_

0.8628919860627178

In [None]:
search.best_params_

{'C': 100, 'degree': 2, 'kernel': 'rbf'}

In [None]:
[x[1] for x in search.cv_scores_]

AttributeError: ignored

# Randomized Grid Search:

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
search2 = RandomizedSearchCV(model, space, cv=cv, n_iter=10)

In [None]:
result = search2.fit(x,y)

In [None]:
type(result)

sklearn.model_selection._search.RandomizedSearchCV

In [None]:
result.best_score_

0.8628919860627178

In [None]:
result.best_params_

{'kernel': 'rbf', 'degree': 4, 'C': 100}

In [None]:
result.cv_results_

{'mean_fit_time': array([0.00762687, 0.00745528, 0.01093645, 0.00984666, 0.00834224,
        0.00974364, 0.00602086, 0.00492954, 0.00457063, 0.00637674]),
 'std_fit_time': array([0.00035492, 0.00033246, 0.0048658 , 0.00340443, 0.00240322,
        0.00613228, 0.00465991, 0.00041281, 0.00052924, 0.00179228]),
 'mean_score_time': array([0.00488117, 0.00418475, 0.00535614, 0.0039974 , 0.0041971 ,
        0.00435026, 0.00226507, 0.002056  , 0.00237026, 0.00335834]),
 'std_score_time': array([0.00050345, 0.00030452, 0.00236428, 0.00025571, 0.00033719,
        0.00037292, 0.00059127, 0.00010293, 0.00040244, 0.0012063 ]),
 'param_kernel': masked_array(data=['rbf', 'poly', 'rbf', 'linear', 'poly', 'rbf', 'poly',
                    'poly', 'rbf', 'poly'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_degree': masked_array(data=[4, 4, 4, 2, 2, 5, 5, 3, 4, 3],
             