In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC

In [2]:
X_train, y_train = load_breast_cancer(return_X_y=True)

### Original sets with default parameters (default in libsvm and old default in scikit-learn)

In [3]:
clf = SVC(C=1, gamma=1 / X_train.shape[1])
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.6274259330511736 +/- 0.0010932697582256234


### Original sets with new default in scikit-learn

In [4]:
clf = SVC(C=1, gamma="scale")
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9123970757983839 +/- 0.034752107081010265


### Scaled sets with default parameters (MinMaxScaler)

In [5]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9613389765294343 +/- 0.004260315767389489


### Scaled sets with parameter selection (MinMaxScaler)

In [6]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.975405925355906 +/- 0.008573177202115626


### Scaled sets with parameter selection (StandardScaler)

In [7]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9754367064255483 +/- 0.01289533962757828


### Scaled sets with parameter selection (StandardScaler)

In [8]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9666487110427088 +/- 0.011492886386086948
