In [1]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC

In [2]:
X_train, y_train = load_wine(return_X_y=True)

### Original sets with default parameters (default in libsvm and old default in scikit-learn)

In [3]:
clf = SVC(C=1, gamma=1 / X_train.shape[1])
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.42772983067100717 +/- 0.03057740632767265


### Original sets with new default in scikit-learn

In [4]:
clf = SVC(C=1, gamma="scale")
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.6639228303934186 +/- 0.038893309989821455


### Scaled sets with default parameters (MinMaxScaler)

In [5]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9667903197314962 +/- 0.026336245636965735


### Scaled sets with parameter selection (MinMaxScaler)

In [6]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9668082368082368 +/- 0.020433924061513193


### Scaled sets with parameter selection (StandardScaler)

In [7]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9833333333333334 +/- 0.022222222222222233


### Scaled sets with parameter selection (StandardScaler)

In [8]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.977601130542307 +/- 0.011226532899013724
