[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ncerdan/HandsOnML/blob/master/Ch_05_SVM.ipynb)

# Linear SVM Classification

## Soft Margin Classification

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

# load the data
iris = datasets.load_iris()
X = iris['data'][:, (2, 3)] # petal length, petal width
y = (iris['target'] == 2).astype(np.float64)    # is iris virginica

In [None]:
# create a linear SVM Classifier that standardizes inputs
# C: controls penalty for margin violations (1 is smaller)
# loss: controls loss function
# LinearSVC regularizes the bias term so should first center about mean
#   StandardScaler() handles this

svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_svc', LinearSVC(C=1, loss='hinge'))
])

svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linear_svc',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [None]:
# can use it to predict
svm_clf.predict([[5.5, 1.7]])

array([1.])

In [None]:
# could also instead use the plain SVC class with a linear kernel
#   NOTE: LinearSVC is much faster than SVC(kernel='linear')
from sklearn.svm import SVC

other_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('other_linear_svc', SVC(kernel='linear', C=1))                     
])

other_svm_clf.fit(X, y)
other_svm_clf.predict([[5.5, 1.7]])

array([1])

In [None]:
""" could also use SGD as follows:
from sklearn.linear_model import SGDClassifier

sgd_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_sgd', SGDClassifier(loss='hinge', alpha=1/(m*C)))
])

sgd_clf.fit(X, y)
sgd_clf.predict([[5.5, 1.7]])
"""

" could also use SGD as follows:\nfrom sklearn.linear_model import SGDClassifier\n\nsgd_clf = Pipeline([\n    ('scaler', StandardScaler()),\n    ('linear_sgd', SGDClassifier(loss='hinge', alpha=1/(m*C)))\n])\n\nsgd_clf.fit(X, y)\nsgd_clf.predict([[5.5, 1.7]])\n"

# Nonlinear SVM CLassification

In [None]:
# can use PolynomialFeatures with LinearSVC
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

X, y = make_moons(n_samples=100, noise=0.15)

polynomial_svm_clf = Pipeline([
    ('poly_features', PolynomialFeatures(degree=3)),
    ('scaler', StandardScaler()),
    ('svm_clf', LinearSVC(C=10, loss='hinge'))
])

polynomial_svm_clf.fit(X, y)



Pipeline(memory=None,
         steps=[('poly_features',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 LinearSVC(C=10, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

## Polynomial Kernel

In [None]:
# this trick works, but when using larger polynomial degrees  it becomes expensive.
# to prevent this it uses the Kernel Trick which allows you to do this without
# actually expanding all of the features. This is implemented with the SVC class
# coef0: control how much it is influcenced by high vs low degree values
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel='poly', degree=3, coef0=1, C=5))
])

poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=5, break_ties=False, cache_size=200, class_weight=None,
                     coef0=1, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='poly', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

## Gaussian RBF Kernel

In [None]:
# runs Gaussian RBF similarity function using kernel trick
# gamma: controls width of bell curve (large gamma = narrower curve)
#   so can use this to regularize. large --> more fitting
#                                  small --> less fitting

rbf_kernerl_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel='rbf', gamma=5, C=0.001))
])

rbf_kernerl_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=0.001, break_ties=False, cache_size=200,
                     class_weight=None, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma=5,
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

# SVM Regression

In [None]:
# for linear data
from sklearn.svm import LinearSVR

svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X, y)

LinearSVR(C=1.0, dual=True, epsilon=1.5, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=None, tol=0.0001, verbose=0)

In [None]:
# kernelized for nonlinear data
from sklearn.svm import SVR

svm_poly_reg = SVR(kernel='poly', degree=2, C=100, epsilon=0.1)
svm_poly_reg.fit(X, y)

SVR(C=100, cache_size=200, coef0=0.0, degree=2, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

# Exercises

TODO