<a href="https://colab.research.google.com/github/lmassaron/ml4dummies_3ed/blob/main/ML4D3E_13_going_beyond_the_basics_with_support_vector_machines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_digits

digits = load_digits()
X, y = digits.data, digits.target

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

X_train, X_test, y_train, y_test = train_test_split(X,
                    y, test_size=0.2, random_state=42)

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC

svm = make_pipeline(MinMaxScaler(
                    feature_range=(-1, 1)),
                    SVC(gamma='auto'))

In [4]:
cv_acc = cross_val_score(svm, X_train, y_train, cv=10)
test_acc = (svm.fit(X_train, y_train)
                       .score(X_test, y_test))
print(f"CV accuracy: {np.mean(cv_acc):0.3f}")
print(f"Test accuracy: {test_acc:0.3f}")

CV accuracy: 0.985
Test accuracy: 0.981


In [5]:
from sklearn.model_selection import GridSearchCV

search_space = [{"svc__kernel": ["linear"],
                 "svc__C": np.logspace(-3, 3, 7)},
                {"svc__kernel": ["rbf"],
                 "svc__C": np.logspace(-3, 3, 7),
                 "svc__gamma": np.logspace(-3, 2, 6)}]
gridsearch = GridSearchCV(svm,
                          param_grid=search_space,
                          refit=True, cv=10,
                          n_jobs=-2)
gridsearch.fit(X_train, y_train)
print(f"Best parameter: {gridsearch.best_params_}")
cv_acc = gridsearch.best_score_
test_acc = gridsearch.score(X_test, y_test)
print(f"CV accuracy: {np.mean(cv_acc):0.3f}")
print(f"Test accuracy: {test_acc:0.3f}")

Best parameter: {'svc__C': np.float64(10.0), 'svc__gamma': np.float64(0.01), 'svc__kernel': 'rbf'}
CV accuracy: 0.989
Test accuracy: 0.986
