In [1]:
import matplotlib.pyplot as plt
# ^^^ pyforest auto-imports - don't write above this line
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# importing linear support vector classifier
from sklearn.svm import LinearSVC

In [2]:
iris = datasets.load_iris()
X = iris["data"][:, (2,3)] # Petal length, width
y = (iris['target']==2).astype(np.float64) # Iris virginica

In [19]:
svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_svc', LinearSVC(C=1, loss='hinge'))
])

In [20]:
svm_clf.fit(X,y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [23]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

In [24]:
# Unlike Logistic Regression classifiers, SVM classifiers do not output
# probabilities for each class.

In [25]:
# Instead of using the LinearSVC class, we could use the SVC class with a linear kernel.
# When creating the SVC model, we would write SVC(kernel="linear", C=1). Or we
# could use the SGDClassifier class, with SGDClassifier(loss="hinge", alpha=1/
# (m*C)). This applies regular Stochastic Gradient Descent to train a
# linear SVM classifier. It does not converge as fast as the LinearSVC class, but it can be
# useful to handle online classification tasks or huge datasets that do not fit in memory
# (out-of-core training).

In [26]:
# The LinearSVC class regularizes the bias term, so you should center
# the training set first by subtracting its mean. This is automatic if
# you scale the data using the StandardScaler. Also make sure you
# set the loss hyperparameter to "hinge", as it is not the default
# value. Finally, for better performance, you should set the dual
# hyperparameter to False, unless there are more features than
# training instances

### NONLINEAR SVM CLASSIFICATION

In [3]:
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

In [4]:
X, y = make_moons(n_samples = 100, noise = 0.15)
polynomial_svm_clf = Pipeline([
    ('poly_features', PolynomialFeatures(degree=3)),
    ('scaler', StandardScaler()),
    ('svm_clf', LinearSVC(C=10, loss='hinge'))
])

polynomial_svm_clf.fit(X, y)

Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('scaler', StandardScaler()),
                ('svm_clf', LinearSVC(C=10, loss='hinge'))])

### POLYNOMIAL KERNEL

In [8]:
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf' ,SVC(kernel = "poly", degree = 3, coef0=1, C=5))
])
# The hyperparameter coef0 controls how much 
# the model is influenced by highdegree
# polynomials versus low-degree polynomials.
poly_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=5, coef0=1, kernel='poly'))])

### GAUSSIAN RBF KERNEL

In [9]:
rbf_kernel_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel='rbf', gamma=5, C=0.001))
])
rbf_kernel_svm_clf.fit(X, y)
# Gamma acts like a regularization parameter, as well as C

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=0.001, gamma=5))])