# Support Vector Machines

Notebook belonging to article on Support Vector Machines at [Floryn techblog](http://floryn.tech/).

In [1]:
import pandas as pd
import numpy as np

# Imports from Pythom repository
# Note: requires working directory or Pythonpath set to /pythom/10_support_vector_machines
from template import go

import datasets
from svm_2d import *
from svm_3d import *
from svm_plots import *

## Hard-margin SVM

Perfect separable dataset

In [2]:
df_hard_margin = datasets.linear_separable_df()

In [3]:
# Figure
fig = get_figure(title="A bunch of 'random' signups which Risk approved or rejected...")
fig = add_observations(fig, df_hard_margin)

fig.write_image("/work/images/svm/fig1.png")
fig.show()

### Drawing a random line for prediction


In [4]:
# Figure
fig = get_figure(title="A line is a very simple way to predict stuff")

fig = add_observations(fig, df_hard_margin)
fig = add_line(fig, x=[4, 8], y=[2.25, 4.75])

fig.write_image("/work/images/svm/fig2.png")

fig.show()


In [5]:
# Figure
fig = get_figure(title="Drawing the proper line matters when classifying new observations...")

fig = add_observations(fig, df_hard_margin)
fig = add_line(fig, x=[4, 8], y=[2.25, 4.75])
fig = add_line(fig, x=[4, 8], y=[2, 5.25])
fig = add_point(
    fig,
    x=[7],
    y=[4.3],
    color="white",
)

fig.write_image("/work/images/svm/fig3.png")
fig.show()


### Model

In [6]:
# Fit model
clf_hard_margin = fit_classifier(df_hard_margin, features=["length", "width"], kernel='linear', C=1000, shrinking=False)
print_formula(clf_hard_margin)

# Figure
fig = get_figure(title="One hyperplane to classify them all")

fig = add_observations(fig, df_hard_margin)
fig = add_hyperplane(fig, clf_hard_margin)
fig = add_parallels(fig, clf_hard_margin, space=False)
fig = add_contours(fig, clf_hard_margin)

fig.write_image("/work/images/svm/fig4.png")
fig.show()


y = 23.13 + -8.57*x1 + 7.14*x2
Margin: 0.08967360461040068


### Predicting

In [7]:
# Fit model
clf_hard_margin = fit_classifier(
    df_hard_margin, features=["length", "width"], kernel="linear", C=1000, shrinking=False
)
score = predict_score(clf_hard_margin, 7, 4.3)

# Figure
fig = get_figure(title="Predicting the label of a new point is easy now<br><sup></sup>")

fig = add_observations(fig, df_hard_margin)
fig = add_hyperplane(fig, clf_hard_margin)
fig = add_contours(fig, clf_hard_margin)

fig = add_point(
    fig,
    x=[7],
    y=[4.3],
    color="white",
)


fig.add_annotation(
    xref="x",
    yref="y",
    x=7,
    y=4.3,
    text=f"{clf_hard_margin.intercept_[0]:.2f} + {clf_hard_margin.coef_[0][0]:.2f}*problems + {clf_hard_margin.coef_[0][1]:.2f}*money = {score:.2f}",
    arrowhead=0,
    font=dict(size=14, color="white"),
    textangle=-35,
)

fig.show()


### Support Vectors

In [8]:
# Fit model
clf_hard_margin = fit_classifier(df_hard_margin, features=["length", "width"], kernel='linear', C=1000, shrinking=False)
print_formula(clf_hard_margin)

# Figure
fig = get_figure(title="Only a few observations determine the best line (in a Hard-Margin SVM)")

fig = add_observations(fig, df_hard_margin)
fig = add_hyperplane(fig, clf_hard_margin)
fig = add_parallels(fig, clf_hard_margin, space=False)
fig = add_contours(fig, clf_hard_margin)
fig = annotate_support_vectors(fig, clf_hard_margin)

fig.write_image("/work/images/svm/fig5.png")
fig.show()


y = 23.13 + -8.57*x1 + 7.14*x2
Margin: 0.08967360461040068


### Updating one support vector changes the hyperplane

In [9]:
# Drop a point to see how it affects the hyperplane
point = (
    df_hard_margin.loc[lambda d: (d["length"] == 4.9) & (d["width"] == 2.5)]
    .copy()
    .assign(length=4.9, width=2)
)

df_hard_margin_update = pd.concat(
    [
        df_hard_margin.loc[
            lambda d: ~((d["length"] == 4.9) & (d["width"] == 2.5))
        ].copy(),
        point,
    ]
)

# Fit model
clf_hard_margin_update = svm.SVC(random_state=42, kernel="linear", C=100)
clf_hard_margin_update.fit(
    df_hard_margin_update[["length", "width"]], df_hard_margin_update["name"]
)
print_formula(clf_hard_margin_update)

# Figure
fig = get_figure(title="Updating one support vector point changes the hyperplane")
fig = add_observations(fig, df_hard_margin_update)
fig = add_hyperplane(fig, clf_hard_margin, line_dash="dash", showlegend=False)
fig = add_hyperplane(fig, clf_hard_margin_update)
fig = annotate_support_vectors(fig, clf_hard_margin_update)

fig.write_image("/work/images/svm/fig6.png")
fig.show()


y = -17.32 + 6.32*x1 + -5.26*x2


## Soft Margin

Non-perfectly separable dataset

In [10]:
df_soft_margin = datasets.non_linear_separable_df()

In [11]:
# Figure
fig = get_figure(title="A bunch of more complex signups...",)

fig = add_observations(fig, df_soft_margin)

fig.write_image("/work/images/svm/fig7.png")
fig.show()


In [12]:
# Fit model
clf_soft_C10 = svm.SVC(random_state=42, kernel='linear', C=10)
clf_soft_C10.fit(X=df_soft_margin[["length", "width"]], y=df_soft_margin["name"])

print_formula(clf_soft_C10)

# Figure
fig = get_figure(title="Soft-Margin SVM uses a penalty to find a hyperplane that 'divides' them all")

fig = add_observations(fig, df_soft_margin)
fig = add_hyperplane(fig, clf_soft_C10)
fig = add_parallels(fig, clf_soft_C10)
fig = add_contours(fig, clf_soft_C10, inverse_colorscale=True)

fig.write_image("/work/images/svm/fig8.png")
fig.show()


y = -12.36 + 1.60*x1 + 0.80*x2
Margin: 0.5590042067627841


### Lower penalty means a different hyperplane

### The margin means something different now

In [13]:
# Fit model
clf_soft_C0_1 = svm.SVC(random_state=42, kernel='linear', C=0.1)
clf_soft_C0_1.fit(df_soft_margin[["length", "width"]], df_soft_margin["name"])

print_formula(clf_soft_C0_1)

# Figure
fig = get_figure(title="A lower penalty therefore also leads to a different hyperplane")

fig = add_observations(fig, df_soft_margin)
fig = add_hyperplane(fig, clf_soft_C10, line_dash="dash", showlegend=False)
fig = add_hyperplane(fig, clf_soft_C0_1)
fig = add_contours(fig, clf_soft_C0_1, inverse_colorscale=True)
fig = add_contours(fig, clf_soft_C10, inverse_colorscale=True)

fig.write_image("/work/images/svm/fig9.png")
fig.show()


y = -7.67 + 1.07*x1 + 0.31*x2


In [14]:
# Figure
fig = get_figure(title="The margin and support vectors mean something different now")

fig = add_observations(fig, df_soft_margin)
fig = add_hyperplane(fig, clf_soft_C10)
fig = add_parallels(fig, clf_soft_C10)
fig = add_contours(fig, clf_soft_C10, inverse_colorscale=True)
fig = annotate_support_vectors(fig, clf_soft_C10)

fig.write_image("/work/images/svm/fig10.png")
fig.show()

Margin: 0.5590042067627841


## Kernel trick

### Difficult to separate

In [15]:
df_kernel_trick = datasets.linear_separable_in_3d_df()

In [16]:
# Fit model
clf_kernel_soft_margin = svm.SVC(random_state=42, kernel="linear", C=10)
clf_kernel_soft_margin.fit(df_kernel_trick[["length", "width"]], df_kernel_trick["name"])

print_formula(clf_kernel_soft_margin)

# Figure
fig = get_figure(
    title="This is difficult to separate, even with a Soft-Margin SVM",
    x_range=[-2, 2],
    y_range=[-2, 2],
)

fig = add_observations(fig, df_kernel_trick)
fig = add_hyperplane(fig, clf_kernel_soft_margin, x_start=-2, x_end=2)

fig.write_image("/work/images/svm/fig11.png")
fig.show()


y = 0.13 + 1.17*x1 + -0.00*x2


### Mapping to 3D

In [17]:
# Apply kernel trick
df_kernel3d = df_kernel_trick.sample(40, random_state=43).assign(
    z=lambda d: (d["length"] ** 2) + (d["width"] ** 2)
)


# Fit model
clf_with_kernel_soft = svm.SVC(random_state=42, kernel="linear", C=1)
clf_with_kernel_soft.fit(df_kernel3d[["length", "width", "z"]], df_kernel3d["name"])

print_formula_3d(clf_with_kernel_soft)


def add_observations_3d(fig, df):
    for name, category_df in df.groupby("name"):
        fig.add_trace(
            go.Scatter3d(
                x=category_df["length"],
                y=category_df["width"],
                z=category_df["z"],
                mode="markers",
                name=name,
                marker_color=MARKER_COLORS[name],
            )
        )
    return fig


# Plot
fig = get_figure_3d(title="But with an extra dimension, it becomes separable!")
fig = add_observations_3d(fig, df_kernel3d)

fig.write_image("/work/images/svm/fig12.png")
fig.show()


y = 4.55 + -0.00*x1 + -0.00*x2 + -5.55*x3


### Hyperplane from 3D to 2D

In [18]:
# Figure
fig = get_figure(
    title="Reducing the hyperplane from 3D to 2D shows the dataset is separable in 2D",
    x_range=[-2, 2],
    y_range=[-2, 2],
)

fig = add_observations(fig, df_kernel_trick)
fig = add_hyperplane_3d(fig, clf_with_kernel_soft)

fig.write_image("/work/images/svm/fig13.png")
fig.show()


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7271adde-5589-4741-8f71-4388efe8ac8f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>