In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

df = pd.read_csv("./data/breast_cancer_wisconsin_diagnostic/wdbc.data", header=None)

X = df.loc[:, 2:].values
y = df.loc[:, 1].values
le = LabelEncoder()
y = le.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV 
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1))
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
param_grid = [
    {'svc__C': param_range, 'svc__kernel': ['linear']}, 
    {'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf']}] 
gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=10, refit=True, n_jobs=-1) 
gs = gs.fit(X_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.9846859903381642
{'svc__C': 100.0, 'svc__gamma': 0.001, 'svc__kernel': 'rbf'}


In [5]:
clf = gs.best_estimator_ 
clf.fit(X_train, y_train) 
print(f'测试集准确率: {clf.score(X_test, y_test):.3f}')

测试集准确率: 0.974


In [8]:
import scipy
from sklearn.model_selection import RandomizedSearchCV 

pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) 
param_range = scipy.stats.loguniform(0.0001, 1000.0)
param_grid = [
    {'svc__C': param_range, 'svc__kernel': ['linear']}, 
    {'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf']}] 
rs = RandomizedSearchCV(estimator=pipe_svc, param_distributions=param_grid, scoring='accuracy', 
                        refit=True, n_iter=20, cv=10, random_state=1, n_jobs=-1) 

rs = rs.fit(X_train, y_train)
print(rs.best_score_) 
print(rs.best_params_)

0.9780676328502416
{'svc__C': 0.05971247755848463, 'svc__kernel': 'linear'}


In [12]:
from sklearn.experimental import enable_halving_search_cv

from sklearn.model_selection import HalvingRandomSearchCV 

param_grid={
    'svc__C': param_range,
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': param_range
}

hs = HalvingRandomSearchCV(pipe_svc, param_distributions=param_grid, 
                           n_candidates='exhaust', 
                           resource='n_samples', 
                           factor=1.5, 
                           random_state=1, n_jobs=-1)

hs = hs.fit(X_train, y_train) 
print(hs.best_score_)
print(hs.best_params_)

0.9617647058823529
{'svc__C': 4.877780549954556, 'svc__gamma': 353.8127634797474, 'svc__kernel': 'linear'}
