In [1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC

In [2]:
X_train, y_train = load_digits(return_X_y=True)

### Original sets with default parameters (default in libsvm and old default in scikit-learn)

In [3]:
clf = SVC(C=1, gamma=1 / X_train.shape[1])
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.44878680061604637 +/- 0.03720832564364516


### Original sets with new default in scikit-learn

In [4]:
clf = SVC(C=1, gamma="scale")
scores = cross_val_score(clf, X_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9638434678923486 +/- 0.019490506896026296


### Scaled sets with default parameters (MinMaxScaler)

In [5]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9571885975876876 +/- 0.022629361007102642


### Scaled sets with parameter selection (MinMaxScaler)

In [6]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9732845504434204 +/- 0.016191887243654977


### Scaled sets with parameter selection (StandardScaler)

In [7]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9487921839909685 +/- 0.012240753601270521


### Scaled sets with parameter selection (StandardScaler)

In [8]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
scores = cross_val_score(clf, Xt_train, y_train)
print(np.mean(scores), "+/-", np.std(scores))

0.9476620675782994 +/- 0.017168617645177003
