In [2]:
## Ch5 SVM
## 5.1 Linear SVM
# 5.1.1 Soft margin svm

# svms margins are sensitive to feature scales, you should scale the features before
# applying svm

# Soft margin svm can handle datasets that is not linearly separable
# And is very sensitive to outliers (consider if one point lies in the other side with different label)
# in Scikit-Learn's SVM you can control the C parameter
# The higher C values is, there are fewer violations
# If your SVM is overfitting, try reducing C
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = (iris["target"] == 2).astype(np.float64)

svm_clf = Pipeline([
    ("Scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42)),
])

svm_clf.fit(X, y)

svm_clf.predict([[5.5, 1.7]])

array([1.])

In [4]:
# 4.2 Nonlinear SVM
# Involve polynomial features to make your data linearly separable
# You can use sklearn's PolynomialFeatures
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

polynomial_svm_clf = Pipeline(
[
    ("poly_features", PolynomialFeatures()),
    ("scaler", StandardScaler()),
    ("svm_clf", LinearSVC(C=10, loss="hinge")),
])

polynomial_svm_clf.fit(X, y)


Pipeline(memory=None,
     steps=[('poly_features', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', LinearSVC(C=10, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

In [5]:
polynomial_svm_clf.predict([[5.5, 1.7]])

array([1.])

In [6]:
# 5.2.1 Polynomial Kernel
# Adding polynomial features is widely used, not only in SVMs
# Kernel trick, apply it as if you involved many polynomial features
# This trick is implemented with SVC class

# coef0 controls how much the model is controlled by high degree features versus
# low degree features
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5)),
])

poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=5, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [7]:
poly_kernel_svm_clf.predict([[5.5, 1.7]])

array([1.])

In [8]:
# 5.2.2 Adding Similarity features
# 5.2.3 RBF kernel
# there are other kernels like the Gaussian RBF kernels


In [None]:
# 5.3 SVM regression
# LinearSVR and SVR