# Imports

In [None]:
from matplotlib import pylab as plt
import numpy as np
import pandas as pd
from pandas import DataFrame

# Load some example data

In [None]:
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()

df = DataFrame(
    data = iris.data,
    columns = iris.feature_names
)
df['class'] = iris.target_names[iris.target]
df['target'] = iris.target

iris = df
iris.loc[0:150:10]

In [None]:
from pandas.plotting import scatter_matrix

scatter_matrix(iris, c=iris.target, figsize=(12,12));

# Binary classification

#### Select tow dimensions of the data for better visualization

In [None]:
# select data
X = iris[['sepal width (cm)', 'petal length (cm)']]

# make a grid for visualization of decision function(s)
x1 = np.linspace(1, 7, 61)
x2 = np.linspace(1, 7, 61)
X1, X2 = np.meshgrid(x1, x2)
X_grid = np.array(list(zip(X1.ravel(), X2.ravel())))

#### Make binary target variable

In [None]:
Y_bin = (df.target == 0)

In [None]:
scatter_matrix(X, c=Y_bin, figsize=(10,10), marker='.', s=70);

#### Use a learning algorithm to find a separating hyperplane

In [None]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression()
classifier.fit(X, Y_bin);

#### Evaluate accuracy (=fraction corretly classified)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(Y_bin, classifier.predict(X))

#### Plot decision function and predictions of actual data

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(X_grid[:,0], X_grid[:,1], marker='.', c=classifier.predict(X_grid), s=1);
plt.scatter(X.iloc[:,0], X.iloc[:,1], marker='d', c=Y_bin);
plt.grid(True)

### Binary linear decision boundary defined by parameter vector $\beta$

Distance to hyperplane:

$$
   d(x) = \frac{\beta \cdot (x-x_0)}{\|\beta\|}
$$

$$
   d(x) = \frac{\beta \cdot x}{\|\beta\|} - \frac{\beta \cdot x_0}{\|\beta\|} = \frac{\beta \cdot x}{\|\beta\|} + \beta_0
$$

Binary decision boundary:

$$
   d(x) \ge 0
$$

# Multi-class classification

In [None]:
Y = df['target']

In [None]:
scatter_matrix(X, c=Y, figsize=(7,7), marker='.', s=90);

In [None]:
multi = LogisticRegression()
multi.fit(X,Y);
multi.score(X,Y)

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(X_grid[:,0], X_grid[:,1], c=multi.predict(X_grid), s=3, marker='.');
plt.scatter(X.iloc[:,0], X.iloc[:,1], c=Y, marker='d');
plt.grid(True)

### Typically problem is split into binary $m$ classifications problems

* $m$ ... number of classes
* $d_c$, $c = 1, \ldots, m$, distance functions for each class 
* One versus the rest (OVR, OVA - one versus all)
* for $x$ the class $c^*$ is predicted for which $d_{c^*}(x) \ge d_c(x)$ for all $c \in {1,\ldots m}$

# Maximum margin classifier

#### If problem is linear seperable

$$
\min_{\beta, \beta_0} \|\beta\|^2
$$

subject to

* $\beta \cdot x_i + \beta_0 \ge 0$ if $y_i =$ positive class
* $\beta \cdot x_i + \beta_0 < 0$ if $y_i =$ negative class

for all $i = 1, \ldots, n$

#### Standard fomulation:

$$
\min_{\beta, \beta_0} \frac{1}{2} \|\beta\|^2
$$

subject to 

$(\beta \cdot x_i + \beta_0) y_i \ge 1$

for all $i = 1, \ldots, n$ and where $y_i \in \{-1,+1\}$

#### In general problem is not linear separable: relaxation of constraints required

$$
\min_{\beta, \beta_0} \frac{1}{2} \|\beta\|^2 + C \sum_i \xi_i
$$

subject to 

* $(\beta \cdot x_i + \beta_0) y_i \ge 1 - \xi_i$
* $\xi_i \ge 0$
* for all $i = 1, \ldots, n$
* where $y_i \in \{-1,+1\}$

##### The larger $C$ the more the slack variables $\xi_i$ are pushed to zero.

# Train a support vector classifier (SVC) for the iris data

In [None]:
X = iris[['sepal width (cm)', 'petal length (cm)']]
Y = iris.target

### Linear SVC

In [None]:
from sklearn.svm import SVC

svc = SVC(kernel='linear')
svc.fit(X,Y)

In [None]:
svc.score(X,Y)

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(X_grid[:,0], X_grid[:,1], c=svc.predict(X_grid), s=3, marker='.');
plt.scatter(X.iloc[:,0], X.iloc[:,1], c=Y, marker='d');
plt.grid(True)

### Non-Linear SVC (RBF)

In [None]:
svc = SVC(kernel='rbf')
svc.fit(X,Y)
svc.score(X,Y)

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(X_grid[:,0], X_grid[:,1], c=svc.predict(X_grid), s=3, marker='.');
plt.scatter(X.iloc[:,0], X.iloc[:,1], c=Y, marker='d');
plt.grid(True)