## 1. Vorbereitung


In [1]:
# Optional
%matplotlib widget

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

import seaborn as sns; sns.set()

from sklearn.datasets import make_blobs
from sklearn.svm import SVC  # (S)upport (V)ector (C)lassifier

import utils_svm

In [18]:
# TODO: Linear separierbaren Datensatz erstellen
X, y = make_blobs(n_samples=50, centers=2,
                  random_state=4, cluster_std=0.70)

In [19]:
X.shape

(50, 2)

In [20]:
y

array([0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 0])

In [24]:
# TODO: Visualisieren
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap="rainbow")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7fc427509b80>

## 2. Motivation

### 2.1 Verschiedene Entscheidungsgrenzen

In [46]:
new_data = True

xx = np.linspace(7.5, 11.5)

plt.close("all")
plt.figure(figsize=(10, 8))
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')

def line(x, m, b):
    return m * x + b

for m, b in [(0.05, 2.1), (0.55, -2.2), (-0.2, 4.5)]:
    plt.plot(xx, line(xx, m, b), '-k')

plt.xlim(7.5, 11.5)
if new_data:
    plt.plot([10.3], [3.1], 'x', color='red', markeredgewidth=2, markersize=10);

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### 2.2. Margin

In [52]:
xx = np.linspace(7.5, 11.5)

plt.figure(figsize=(10, 8))
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')

for m, b, d in [(0.05, 2.1, 0.33), (0.55, -2.2, 0.27), (-0.2, 4.5, 0.33)]:
    yy = m * xx + b
    plt.plot(xx, yy, '-k')
    plt.fill_between(xx, yy - d, yy + d, edgecolor='none',
                     color='grey', alpha=0.2)

    
plt.xlim(7.5, 11.5);

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### 2.3. Modell trainieren

In [53]:
# TODO: SVC instanziieren, Argument C beachten
svc = SVC(
    C=1000000.0,
    kernel="linear"
)


# TODO: Modell trainieren
svc.fit(X, y)

# TODO: Fit visualisieren (Entscheidungsgrenze, Margin und Support Vectors)
plt.close("all")

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap="rainbow")
utils_svm.plot_svc_decision_function(svc)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
# TODO (optional): Fit interaktiv visualisieren

plt.close("all")
utils_svm.get_interactive_svc(X, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=0.1, description='w1', max=2.0, min=-2.0, step=0.01), FloatSlider(valu…

### 2.4 Anzahl der Datenpunkte variieren

In [54]:
def plot_svm(N=10, ax=None):
    X, y = make_blobs(n_samples=200, centers=2,
                      random_state=0, cluster_std=0.60)
    X = X[:N]
    y = y[:N]
    model = SVC(kernel='linear', C=1E10)
    model.fit(X, y)
    
    ax = ax or plt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
    ax.set_xlim(-1, 4)
    ax.set_ylim(-1, 6)
    utils_svm.plot_svc_decision_function(model, ax)

In [55]:
plt.figure(figsize=(6, 4))
plot_svm(N=100)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [56]:
plt.figure(figsize=(6, 4))
plot_svm(N=200)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## 3. Nichtlinearität und Kernels

### 3.1. Motivation

In [57]:
# TODO: Erstellen eines Datensatzes, der nicht linear separierbar ist
from sklearn.datasets import make_circles
X_circles, y_circles = make_circles(100, factor=.1, noise=.1)

# TODO: Visualisieren
plt.close("all")
plt.figure(figsize=(8, 6))
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=y_circles, cmap="rainbow")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7fc425003a30>

In [58]:
# TODO: Modell instanziieren und trainieren
svc = SVC(
    C=1000000.0,
    kernel="linear"
)

svc.fit(X_circles, y_circles)

# TODO: Fit visualisieren
plt.close("all")
plt.figure(figsize=(8, 6))
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=y_circles, cmap="rainbow")
utils_svm.plot_svc_decision_function(svc)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### 3.2. Zur Illustration: manuelle Feature Expansion

In [60]:
# TODO: Funktion zur Feature Expansion (an den RBF-Kernel angelehnt)
# TODO: Feature Expansion
# TODO: Visualisieren

# Funktion zur Feature Expansion (an den RBF-Kernel angelehnt)
def func_z(X, gamma=0.1):
    return np.exp(-gamma*(X**2).sum(axis=1))

z = func_z(X_circles)

X_new = np.column_stack((X_circles, z))

from mpl_toolkits import mplot3d

plt.figure(figsize=(10, 8))
ax3d = plt.axes(projection="3d")

ax3d.scatter(
    X_circles[:, 0],
    X_circles[:, 1],
    z,
    c=y_circles,
    cmap="rainbow"
)

ax3d.set_xlabel("x1")
ax3d.set_ylabel("x2")
ax3d.set_zlabel("z (Feature Expansion)")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'z (Feature Expansion)')

### 3.3. SVM mit Kernels

In [61]:
# TODO: Modell instanziieren und trainieren
svc_rbf = SVC(
    C=1000000.0,
    kernel="rbf",  # (R)adial (B)asis (F)unctions
    gamma="scale"
)

svc_rbf.fit(X_circles, y_circles)

# TODO: Fit visualisieren
plt.close("all")
plt.figure(figsize=(8, 6))
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=y_circles, cmap="rainbow")
utils_svm.plot_svc_decision_function(svc_rbf)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [62]:
# TODO: Modell instanziieren und trainieren
svc_rbf = SVC(
    C=1000000.0,
    kernel="rbf",  # (R)adial (B)asis (F)unctions
    gamma=10.0,  # gamma kontrolliert die Flexibilität der Expansion
)

svc_rbf.fit(X_circles, y_circles)

# TODO: Fit visualisieren
plt.close("all")
plt.figure(figsize=(8, 6))
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=y_circles, cmap="rainbow")
utils_svm.plot_svc_decision_function(svc_rbf)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## 4. Hard Margin vs Soft Margin

Der Parameter `C` kann variiert werden, um einen Soft-Margin-Classifier anstelle eines Hard-Margin-Klassifikators zu erhalten, das heißt ein Klassifikator, der eine durchlässige Margin hat und dementsprechend zum Teil besser optimiert werden kann.

In [64]:
# TODO: Erstelle Datensatz
X2, y2 = make_blobs(n_samples=100, centers=2,
                    random_state=0, cluster_std=1.2)

In [65]:
# TODO: Modell instanziieren und trainieren
# TODO: Fit visualisieren
svc_soft = SVC(
    kernel="rbf",
    C=0.1  # kontrolliert Overfitting/Underfitting
)
svc_soft.fit(X2, y2)


plt.figure(figsize=(8, 6))
plt.scatter(X2[:, 0], X2[:, 1], c=y2, cmap="rainbow")
utils_svm.plot_svc_decision_function(svc_soft)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …