**Hyperparameter Tuning** ~ It is a process of tweaking your ML Algorithm to give the best results.

**Grid Search CV** ~ Is exhaustive search method of the parameters

In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns = data.feature_names)
y = pd.Series(data = data.target, name = 'Target')

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

In [6]:
knn.fit(X_train, y_train)
knn.score(X_train, y_train)

0.9447236180904522

In [19]:
params = {
    "knn":{
        "n_neighbors":[3,5,7,9,11],
        "metric":['euclidean', 'manhattan', 'minkowski']
    },
    "logistic_regression":{
        "C":[1.0, 2.0, 3.0, 4.0],
        "max_iter":[100,200,300,400]
    }
}

In [20]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(knn, param_grid = params['knn'], cv = 5, verbose = 3, scoring = 'accuracy')

In [21]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV] metric=euclidean, n_neighbors=3 .................................
[CV] ..... metric=euclidean, n_neighbors=3, score=0.912, total=   0.0s
[CV] metric=euclidean, n_neighbors=3 .................................
[CV] ..... metric=euclidean, n_neighbors=3, score=0.938, total=   0.0s
[CV] metric=euclidean, n_neighbors=3 .................................
[CV] ..... metric=euclidean, n_neighbors=3, score=0.950, total=   0.0s
[CV] metric=euclidean, n_neighbors=3 .................................
[CV] ..... metric=euclidean, n_neighbors=3, score=0.899, total=   0.0s
[CV] metric=euclidean, n_neighbors=3 .................................
[CV] ..... metric=euclidean, n_neighbors=3, score=0.911, total=   0.0s
[CV] metric=euclidean, n_neighbors=5 .................................
[CV] ..... metric=euclidean, n_neighbors=5, score=0.925, total=   0.0s
[CV] metric=euclidean, n_neighbors=5 .................................
[CV] ..... metri

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] metric=manhattan, n_neighbors=7 .................................
[CV] ..... metric=manhattan, n_neighbors=7, score=0.924, total=   0.0s
[CV] metric=manhattan, n_neighbors=7 .................................
[CV] ..... metric=manhattan, n_neighbors=7, score=0.949, total=   0.0s
[CV] metric=manhattan, n_neighbors=9 .................................
[CV] ..... metric=manhattan, n_neighbors=9, score=0.900, total=   0.0s
[CV] metric=manhattan, n_neighbors=9 .................................
[CV] ..... metric=manhattan, n_neighbors=9, score=0.950, total=   0.0s
[CV] metric=manhattan, n_neighbors=9 .................................
[CV] ..... metric=manhattan, n_neighbors=9, score=0.975, total=   0.0s
[CV] metric=manhattan, n_neighbors=9 .................................
[CV] ..... metric=manhattan, n_neighbors=9, score=0.924, total=   0.0s
[CV] metric=manhattan, n_neighbors=9 .................................
[CV] ..... metric=manhattan, n_neighbors=9, score=0.949, total=   0.0s
[CV] 

[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    0.3s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': ['euclidean', 'manhattan', 'minkowski'],
                         'n_neighbors': [3, 5, 7, 9, 11]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=3)

In [22]:
grid.best_params_

{'metric': 'manhattan', 'n_neighbors': 7}

In [23]:
grid.best_score_

0.9396835443037975

In [24]:
y_pred = grid.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.935672514619883

In [25]:
y_knn = knn.predict(X_test)
accuracy_score(y_test, y_knn)

0.9181286549707602

In [26]:
grid.score(X_train, y_train)

0.9547738693467337

In [27]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'liblinear')
logreg.fit(X_train, y_train)
logreg.score(X_train, y_train)

0.957286432160804

In [28]:
lr_grid = GridSearchCV(logreg, param_grid=params['logistic_regression'], cv = 5, verbose = 3, scoring = 'accuracy')

In [29]:
lr_grid.fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=1.0, max_iter=100 .............................................
[CV] ................. C=1.0, max_iter=100, score=0.950, total=   0.0s
[CV] C=1.0, max_iter=100 .............................................
[CV] ................. C=1.0, max_iter=100, score=0.950, total=   0.0s
[CV] C=1.0, max_iter=100 .............................................
[CV] ................. C=1.0, max_iter=100, score=0.975, total=   0.0s
[CV] C=1.0, max_iter=100 .............................................
[CV] ................. C=1.0, max_iter=100, score=0.911, total=   0.0s
[CV] C=1.0, max_iter=100 .............................................
[CV] ................. C=1.0, max_iter=100, score=0.949, total=   0.0s
[CV] C=1.0, max_iter=200 .............................................
[CV] ................. C=1.0, max_iter=200, score=0.950, total=   0.0s
[CV] C=1.0, max_iter=200 .............................................
[CV] ...........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] C=3.0, max_iter=100 .............................................
[CV] ................. C=3.0, max_iter=100, score=0.963, total=   0.0s
[CV] C=3.0, max_iter=100 .............................................
[CV] ................. C=3.0, max_iter=100, score=0.963, total=   0.0s
[CV] C=3.0, max_iter=100 .............................................
[CV] ................. C=3.0, max_iter=100, score=0.924, total=   0.0s
[CV] C=3.0, max_iter=100 .............................................
[CV] ................. C=3.0, max_iter=100, score=0.962, total=   0.0s
[CV] C=3.0, max_iter=200 .............................................
[CV] ................. C=3.0, max_iter=200, score=0.950, total=   0.0s
[CV] C=3.0, max_iter=200 .............................................
[CV] ................. C=3.0, max_iter=200, score=0.963, total=   0.0s
[CV] C=3.0, max_iter=200 .............................................
[CV] ................. C=3.0, max_iter=200, score=0.963, total=   0.0s
[CV] 

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.2s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='liblinear',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1.0, 2.0, 3.0, 4.0],
                         'max_iter': [100, 200, 300, 400]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=3)

In [30]:
lr_grid.best_params_

{'C': 3.0, 'max_iter': 100}

In [31]:
lr_grid.best_score_

0.9522151898734178

In [32]:
lr_grid.score(X_train, y_train)

0.9597989949748744

In [33]:
lr_pred = logreg.predict(X_test)
lr_grid_pred = lr_grid.predict(X_test)

In [34]:
accuracy_score(y_test, lr_pred)

0.9590643274853801

In [35]:
accuracy_score(y_test, lr_grid_pred)

0.9649122807017544

In [36]:
lr_grid.get_params()

{'cv': 5,
 'error_score': nan,
 'estimator__C': 1.0,
 'estimator__class_weight': None,
 'estimator__dual': False,
 'estimator__fit_intercept': True,
 'estimator__intercept_scaling': 1,
 'estimator__l1_ratio': None,
 'estimator__max_iter': 100,
 'estimator__multi_class': 'auto',
 'estimator__n_jobs': None,
 'estimator__penalty': 'l2',
 'estimator__random_state': None,
 'estimator__solver': 'liblinear',
 'estimator__tol': 0.0001,
 'estimator__verbose': 0,
 'estimator__warm_start': False,
 'estimator': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=100,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                    warm_start=False),
 'iid': 'deprecated',
 'n_jobs': None,
 'param_grid': {'C': [1.0, 2.0, 3.0, 4.0], 'max_iter': [100, 200, 300, 400]},
 'pre_dispatch': '2*n_jobs',
 'refit': True

In [38]:
lr_grid.scoring

'accuracy'

In [40]:
lr_grid.cv_results_

{'mean_fit_time': array([0.00360322, 0.0038034 , 0.00320244, 0.00340257, 0.00340266,
        0.00380306, 0.00340261, 0.00360303, 0.00320263, 0.00320311,
        0.00380278, 0.00320277, 0.0028019 , 0.00360308, 0.00340247,
        0.0026022 ]),
 'std_fit_time': array([0.00049076, 0.00074945, 0.0004003 , 0.00049019, 0.00102102,
        0.00074911, 0.00080084, 0.0004906 , 0.00040009, 0.00098011,
        0.00040054, 0.00040014, 0.0007489 , 0.00049035, 0.00049037,
        0.00049035]),
 'mean_score_time': array([0.00060077, 0.00040045, 0.00080123, 0.00020037, 0.00040069,
        0.00060091, 0.00020013, 0.00060096, 0.00060067, 0.00060062,
        0.        , 0.00060053, 0.00040059, 0.00020032, 0.00060101,
        0.00060067]),
 'std_score_time': array([0.00049052, 0.00049045, 0.00040061, 0.00040073, 0.00049074,
        0.00049064, 0.00040026, 0.00049068, 0.00049045, 0.00049041,
        0.        , 0.00049033, 0.00049062, 0.00040064, 0.00049072,
        0.00049045]),
 'param_C': masked_array(d