In [1]:
from sklearn import datasets
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

import pandas as pd
import numpy as np
from scipy import stats

In [3]:
bc = datasets.load_breast_cancer(as_frame=True)
bc.data

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [12]:
param_grid = [
    {
        "kernel": ["rbf"],
        "gamma": [1e-2, 1e-3, 1e-4],
        "C": [1, 10, 100, 1000],
        "decision_function_shape": ["ovo", "ovr"]
    },
    {
        "kernel": ["linear"],
        "gamma": [1e-2, 1e-3, 1e-4],
        "C": [1, 10, 100, 1000],
        "decision_function_shape": ["ovo", "ovr"]
    },
]

In [13]:
clf = SVC()
grid_search = GridSearchCV(clf, param_grid=param_grid, n_jobs=-1)
grid_search.fit(bc.data, bc.target)

GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000],
                          'decision_function_shape': ['ovo', 'ovr'],
                          'gamma': [0.01, 0.001, 0.0001], 'kernel': ['rbf']},
                         {'C': [1, 10, 100, 1000],
                          'decision_function_shape': ['ovo', 'ovr'],
                          'gamma': [0.01, 0.001, 0.0001],
                          'kernel': ['linear']}])

In [14]:
grid_search.best_params_

{'C': 100, 'decision_function_shape': 'ovo', 'gamma': 0.01, 'kernel': 'linear'}

In [15]:
grid_search.best_estimator_

SVC(C=100, decision_function_shape='ovo', gamma=0.01, kernel='linear')

In [16]:
grid_search_results = pd.DataFrame(grid_search.cv_results_)
print(grid_search_results.shape)
grid_search_results

(48, 17)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_decision_function_shape,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.029792,0.001141,0.010371,0.001237,1,ovo,0.01,rbf,"{'C': 1, 'decision_function_shape': 'ovo', 'ga...",0.622807,0.622807,0.631579,0.631579,0.619469,0.625648,0.004993,47
1,0.02552,0.003845,0.008398,0.002199,1,ovo,0.001,rbf,"{'C': 1, 'decision_function_shape': 'ovo', 'ga...",0.929825,0.921053,0.921053,0.947368,0.893805,0.922621,0.017318,33
2,0.012229,0.001419,0.005633,0.000923,1,ovo,0.0001,rbf,"{'C': 1, 'decision_function_shape': 'ovo', 'ga...",0.903509,0.938596,0.938596,0.95614,0.938053,0.934979,0.017169,27
3,0.026714,0.003572,0.01274,0.002343,1,ovr,0.01,rbf,"{'C': 1, 'decision_function_shape': 'ovr', 'ga...",0.622807,0.622807,0.631579,0.631579,0.619469,0.625648,0.004993,47
4,0.032088,0.005761,0.008805,0.000705,1,ovr,0.001,rbf,"{'C': 1, 'decision_function_shape': 'ovr', 'ga...",0.929825,0.921053,0.921053,0.947368,0.893805,0.922621,0.017318,33
5,0.011387,0.000859,0.004817,0.000742,1,ovr,0.0001,rbf,"{'C': 1, 'decision_function_shape': 'ovr', 'ga...",0.903509,0.938596,0.938596,0.95614,0.938053,0.934979,0.017169,27
6,0.029276,0.003816,0.010298,0.001557,10,ovo,0.01,rbf,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.622807,0.622807,0.631579,0.640351,0.619469,0.627403,0.007619,41
7,0.020164,0.001541,0.008027,0.001434,10,ovo,0.001,rbf,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.894737,0.903509,0.921053,0.938596,0.884956,0.90857,0.019142,35
8,0.016815,0.002197,0.005561,0.001093,10,ovo,0.0001,rbf,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.894737,0.938596,0.947368,0.95614,0.920354,0.931439,0.021841,29
9,0.028503,0.00345,0.012024,0.005166,10,ovr,0.01,rbf,"{'C': 10, 'decision_function_shape': 'ovr', 'g...",0.622807,0.622807,0.631579,0.640351,0.619469,0.627403,0.007619,41


In [17]:
grid_search_results.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_decision_function_shape,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
39,15.1375,4.783526,0.002563,0.00036,100,ovr,0.01,linear,"{'C': 100, 'decision_function_shape': 'ovr', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
36,15.062762,4.434022,0.002413,7.3e-05,100,ovo,0.01,linear,"{'C': 100, 'decision_function_shape': 'ovo', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
37,14.856604,3.983625,0.002782,0.000519,100,ovo,0.001,linear,"{'C': 100, 'decision_function_shape': 'ovo', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
38,15.341355,4.65272,0.002816,0.000576,100,ovo,0.0001,linear,"{'C': 100, 'decision_function_shape': 'ovo', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
40,15.505027,4.760012,0.002576,0.000342,100,ovr,0.001,linear,"{'C': 100, 'decision_function_shape': 'ovr', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
41,14.943293,4.042782,0.002685,0.000356,100,ovr,0.0001,linear,"{'C': 100, 'decision_function_shape': 'ovr', '...",0.938596,0.947368,0.973684,0.973684,0.982301,0.963127,0.016974,1
46,12.671867,7.249082,0.002532,0.000345,1000,ovr,0.001,linear,"{'C': 1000, 'decision_function_shape': 'ovr', ...",0.947368,0.947368,0.973684,0.929825,0.973451,0.954339,0.016957,7
47,11.798654,6.281912,0.002929,0.000774,1000,ovr,0.0001,linear,"{'C': 1000, 'decision_function_shape': 'ovr', ...",0.947368,0.947368,0.973684,0.929825,0.973451,0.954339,0.016957,7
43,14.60078,10.347587,0.002633,0.000632,1000,ovo,0.001,linear,"{'C': 1000, 'decision_function_shape': 'ovo', ...",0.947368,0.947368,0.973684,0.929825,0.973451,0.954339,0.016957,7
44,13.737313,8.868071,0.003637,0.002026,1000,ovo,0.0001,linear,"{'C': 1000, 'decision_function_shape': 'ovo', ...",0.947368,0.947368,0.973684,0.929825,0.973451,0.954339,0.016957,7


In [19]:
grid_search.best_score_

0.9631268436578171

In [20]:
param_dist = {
    "average": [True, False],
    "l1_ratio": stats.uniform(0, 1),
    "alpha": stats.loguniform(1e-2, 1e0),
}

In [21]:
clf = SGDClassifier()
random_search = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=15, n_jobs=-1)
random_search.fit(bc.data, bc.target)

RandomizedSearchCV(estimator=SGDClassifier(), n_iter=15, n_jobs=-1,
                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f091c120640>,
                                        'average': [True, False],
                                        'l1_ratio': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x7f091cae31c0>})

In [22]:
random_search.best_params_

{'alpha': 0.5150037829411555, 'average': False, 'l1_ratio': 0.1562299631662032}

In [23]:
random_search.best_estimator_

SGDClassifier(alpha=0.5150037829411555, l1_ratio=0.1562299631662032)

In [24]:
random_search.best_score_

0.9137711535475859

In [25]:
param_grid = {
    "average": [True, False],
    "l1_ratio": np.linspace(0, 1, num=10),
    "alpha": np.power(10, np.arange(-2, 1, dtype=float)),
}

In [26]:
clf = SGDClassifier()
grid_search = GridSearchCV(clf, param_grid=param_grid, n_jobs=-1)
grid_search.fit(bc.data, bc.target)

GridSearchCV(estimator=SGDClassifier(), n_jobs=-1,
             param_grid={'alpha': array([0.01, 0.1 , 1.  ]),
                         'average': [True, False],
                         'l1_ratio': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])})

In [27]:
grid_search.best_params_

{'alpha': 0.1, 'average': False, 'l1_ratio': 0.8888888888888888}

In [28]:
grid_search.best_estimator_

SGDClassifier(alpha=0.1, l1_ratio=0.8888888888888888)

In [29]:
grid_search.best_score_

0.9226362366092221

In [30]:
clf = make_pipeline(StandardScaler(), SGDClassifier())

In [31]:
param_grid = {
    "sgdclassifier__average": [True, False],
    "sgdclassifier__l1_ratio": np.linspace(0, 1, num=10),
    "sgdclassifier__alpha": np.power(10, np.arange(-2, 1, dtype=float)),
}

In [32]:
grid_search = GridSearchCV(clf, param_grid=param_grid, n_jobs=-1)
grid_search.fit(bc.data, bc.target)

GridSearchCV(estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('sgdclassifier', SGDClassifier())]),
             n_jobs=-1,
             param_grid={'sgdclassifier__alpha': array([0.01, 0.1 , 1.  ]),
                         'sgdclassifier__average': [True, False],
                         'sgdclassifier__l1_ratio': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])})

In [33]:
grid_search.best_params_

{'sgdclassifier__alpha': 0.01,
 'sgdclassifier__average': True,
 'sgdclassifier__l1_ratio': 0.7777777777777777}

In [34]:
grid_search.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('sgdclassifier',
                 SGDClassifier(alpha=0.01, average=True,
                               l1_ratio=0.7777777777777777))])

In [35]:
grid_search.best_score_

0.9824406148113647