In [1]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC

In [2]:
X_train, y_train = load_svmlight_file("datasets/svmguide1")
X_test, y_test = load_svmlight_file("datasets/svmguide1.t")
X_train = X_train.toarray()
X_test = X_test.toarray()

### Original sets with default parameters (default in libsvm and old default in scikit-learn)

In [3]:
clf = SVC(C=1, gamma=1 / X_train.shape[1])
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.66925

### Original sets with new default in scikit-learn

In [4]:
clf = SVC(C=1, gamma="scale")
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9625

### Scaled sets with default parameters (MinMaxScaler)

In [5]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
Xt_test = sc.transform(X_test)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
clf.fit(Xt_train, y_train)
clf.score(Xt_test, y_test)

0.9615

### Scaled sets with parameter selection (MinMaxScaler)

In [6]:
sc = MinMaxScaler(feature_range=(-1, 1))
Xt_train = sc.fit_transform(X_train)
Xt_test = sc.transform(X_test)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
clf.fit(Xt_train, y_train)
clf.score(Xt_test, y_test)

0.96925

In [7]:
clf.best_params_

{'C': 2.0, 'gamma': 8.0}

### Scaled sets with parameter selection (StandardScaler)

In [8]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
Xt_test = sc.transform(X_test)
clf = SVC(C=1, gamma=1 / Xt_train.shape[1])
clf.fit(Xt_train, y_train)
clf.score(Xt_test, y_test)

0.968

### Scaled sets with parameter selection (StandardScaler)

In [9]:
sc = StandardScaler()
Xt_train = sc.fit_transform(X_train)
Xt_test = sc.transform(X_test)
params = {"C": np.logspace(-5, 15, num=11, base=2),
          "gamma": np.logspace(3, -15, num=10, base=2)}
clf = GridSearchCV(SVC(), params, n_jobs=-1)
clf.fit(Xt_train, y_train)
clf.score(Xt_test, y_test)

0.96675

In [10]:
clf.best_params_

{'C': 2.0, 'gamma': 0.125}