### Nonlinear SVMs using the LinearSVC class
(doesn't know the kernel trick)

In [59]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.svm import LinearSVC

# Split data up. The moons dataset consists of an instance [feature1, feature2] matched with the correct class [0/1/2]

validation_ratio = 0.25
validation_length = int(validation_ratio * len(moons[0]))
train_length = int((1 - validation_ratio) * len(moons[0]))
X_train = moons[0][:validation_length]
y_train = moons[1][:validation_length]
X_validate = moons[0][validation_length:]
y_validate = moons[1][validation_length:]

In [60]:
polynomial_svm_clf = Pipeline([
    ("poly_features", PolynomialFeatures(degree=3)), # convert to polynomial. Increase the degree to fit better (possibly)
    ("scaler", StandardScaler()), # standardize the data
    ("svm_clf", LinearSVC(C=10, loss="hinge"))
    # C defines the tradeoff between hard and soft margins
])

polynomial_svm_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('poly_features', PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', LinearSVC(C=10, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

In [64]:
y_pred = polynomial_svm_clf.predict(X_validate)
accuracy_score = np.mean(y_pred == y_validate)
accuracy_score

0.8666666666666667

### Nonlinear SVMs using the SVC class with Polynomial kernel
(preferred b/c SVC knows to use the kernel trick to save space)  
  
For an instance x_i, it will compute (x_i)^2, ..., (x_i)^n for a polynomial of degree n  
These new values will be used as **new features = adding more dimensions in hopes that the data will be more linearly separable.**  
  
When using the kernel trick, it doesn't have to store each newly computed feature. It can just compute as it goes along based on some mathematical transformation properties not outlined here.

In [71]:
from sklearn.svm import SVC

poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5)) 
    # coef=1 scales the data, C defines the tradeoff between hard and soft margins
])

poly_kernel_svm_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=5, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [72]:
y_pred = polynomial_svm_clf.predict(X_validate)
accuracy_score = np.mean(y_pred == y_validate)
accuracy_score

0.8666666666666667

### Nonlinear SVMs using the SVC class with Gaussian RBF kernel

Recall:  
The Gaussian RBF kernel randomly picks N points (corresponds to degree=N)  
**For each point, it calculates the distance of all data points from that point (aka how similiar each piece of data is to that point) through a normal-esque distribution**  
Then, you have N new features, and you can use this N-dimensional data to try and linearly separate the data!

In [73]:
poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="rbf", degree=3, gamma=5, C=0.001))
    # gamma defines how narrow the normal distribution will be, C defines the tradeoff between hard and soft margins
])

poly_kernel_svm_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=0.001, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=5, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [74]:
y_pred = polynomial_svm_clf.predict(X_validate)
accuracy_score = np.mean(y_pred == y_validate)
accuracy_score

0.8666666666666667