# Capter7 支持向量机
## SVM模型
### 另外参考[论文](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-98-14.pdf)

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
class SVM():
    def __init__(self, C = 1, tol = 0.001, **kernel):
        self.C = C
        self.tol = tol
        self.kernel = kernel
    
    def cal_kernel(self, x1, x2):
        if self.kernel['type'] == 'linear':
            return np.dot(x1, x2.T)
        elif self.kernel['type'] == 'rbf':
            kernel_val = np.zeros((x1.shape[0], x2.shape[0]))
            for i in range(x1.shape[0]):
                for j in range(x2.shape[0]):
                    gamma = self.kernel.get('gamma', 0.1)
                    kernel_val[i][j] = np.exp(-1 * np.sum(np.power((x1[i] - x2[j]), 2)) / (gamma ** 2))
            return kernel_val
        else:
            print("kernel type can't be recognized")
            return []

    def cal_err(self, idx):
        return np.dot((self.alpha * self.train_y).T, self.kernel_val[:, idx]) + self.b - self.train_y[idx]

    def find_idx1(self, idx2):
        check_idx = np.nonzero(self.alpha[(self.alpha > 0) & (self.alpha < self.C)])[0]
        max_diff = -1
        idx1 = 0 
        for idx in check_idx:
            if idx == idx2:
                continue
            e_diff = np.abs(self.e_cache[idx] - self.e_cache[idx2])
            if e_diff > max_diff:
                idx1 = idx
                max_diff = e_diff
        return idx1

    def take_step(self, idx1, idx2):
        if idx1 == idx2:
            return 0
        a1 = self.alpha[idx1]
        y1 = self.train_y[idx1]
        e1 = self.e_cache[idx1]
        a2 = self.alpha[idx2]
        y2 = self.train_y[idx2]
        e2 = self.e_cache[idx2]
        s = y1 * y2
        if s < 0:
            L = max(0, a2 - a1)
            H = min(self.C, self.C + a2 - a1)
        else:
            L = max(0, a1 + a2 - self.C)
            H = min(self.C, a1 + a2)
        if L == H:
            return 0
        eta = self.kernel_val[idx1][idx1] + self.kernel_val[idx2][idx2] - 2.0 * self.kernel_val[idx1][idx2]
        if eta > 0:
            self.alpha[idx2] = a2 + y2 * (e1 - e2) / eta
            if self.alpha[idx2] < L:
                self.alpha[idx2] = L
            elif self.alpha[idx2] > H:
                self.alpha[idx2] = H
        else:
            f1 = y1(e1 + self.b) - a1 * self.kernel_val[idx1][idx1] - s * a2 * self.kernel_val[idx1][idx2]
            f2 = y2(e2 + self.b) - s * a1 * self.kernel_val[idx1][idx2] - a2 * self.kernel_val[idx2][idx2]
            L1 = a1 + s * (a2 - L)
            H1 = a1 + s * (a2 - H)
            Lobj = L1*f1+L*f2+1/2*L1**2*self.kernel_val[idx1][idx1]+1/2*L**2*self.kernel_val[idx2][idx2]+s*L*L1*self.kernel_val[idx1][idx2]
            Hobj = H1*f1+H*f2+1/2*H1**2*self.kernel_val[idx1][idx1]+1/2*H**2*self.kernel_val[idx2][idx2]+s*H*H1*self.kernel_val[idx1][idx2]
            if Lobj < Hobj - self.tol:
                self.alpha[idx2] = L
            elif Lobj > Hobj + self.tol:
                self.alpha[idx2] = H
            else:
                self.alpha[idx2] = a2
        if np.abs(self.alpha[idx2] - a2) < self.tol * (self.alpha[idx2] + a2 + self.tol):
            return 0
        self.alpha[idx1] = a1 + s * (a2 - self.alpha[idx2])
        b1 = self.b - e1 - y1*(self.alpha[idx1] - a1)*self.kernel_val[idx1][idx1] \
             - y2*(self.alpha[idx2]-a2)*self.kernel_val[idx1][idx2]
        b2 = self.b - e2 - y1*(self.alpha[idx1] - a1)*self.kernel_val[idx1][idx2] \
             - y2*(self.alpha[idx2]-a2)*self.kernel_val[idx2][idx2]
        if (self.alpha[idx1] != 0 and self.alpha[idx1] != self.C) and (self.alpha[idx2] == 0 or self.alpha[idx2] == self.C):
            self.b = b1
        elif (self.alpha[idx2] != 0 and self.alpha[idx2] != self.C) and (self.alpha[idx1] == 0 or self.alpha[idx1] == self.C):
            self.b = b2
        else:
            self.b = (b1+b2)/2
        self.e_cache[idx1] = self.cal_err(idx1)
        self.e_cache[idx2] = self.cal_err(idx2)
        return 1

    def examine(self, idx2):
        e2 = self.cal_err(idx2)
        self.e_cache[idx2] = e2
        r2 = self.train_y[idx2] * e2
        #check violates the KKT conditions
        if ((r2 < -1 * self.tol) and (self.alpha[idx2] < self.C)) or ((r2 > self.tol) and (self.alpha[idx2] > 0)):
            support_idx = np.nonzero((self.alpha != 0) & (self.alpha != self.C))[0]
            if len(support_idx) > 1:
                #find max(|e2 - e1|) index
                idx1 = self.find_idx1(idx2)
                if self.take_step(idx1, idx2):
                    return 1
            if len(support_idx > 1):
                start = random.randint(0, len(support_idx) - 1)
                for i in range(len(support_idx)):
                    idx = i + start if i + start <= len(support_idx) - 1 else i + start - len(support_idx)
                    if self.take_step(support_idx[idx], idx2):
                        return 1
            start = random.randint(0, self.n - 1)
            for i in range(self.n):
                idx1 = i + start if i + start <= self.n - 1 else i + start - self.n
                if self.take_step(idx1, idx2):
                    return 1
        return 0

    def smo(self):
        num_changed = 0
        examine_all = 1
        loop_num = 0
        while (num_changed > 0 or examine_all):
            loop_num  = loop_num + 1
            if self.epoch and loop_num > self.epoch:
                break
            num_changed = 0
            if examine_all:
                for i in range(self.n):
                    num_changed = num_changed + self.examine(i)
            else:
                for i in range(self.n):
                    #repeated passes over the non-bound examples 
                    #until all of the non-bound examples obey the KKT conditions within ε
                    if self.alpha[i] != 0 and self.alpha[i] != self.C:
                        num_changed = num_changed + self.examine(i)
            if examine_all == 1:
                examine_all = 0
            elif num_changed == 0:
                examine_all = 1
            print('loop num %d: num changed %d, examine_all %d' % (loop_num, num_changed, examine_all))

    def fit(self, train_x, train_y, epoch = None):
        self.train_x = train_x
        self.train_y = train_y
        self.n = train_x.shape[0]
        self.kernel_val = self.cal_kernel(train_x, train_x)
        if len(self.kernel_val) == 0:
            return False
        self.alpha = np.zeros(self.n)
        self.b = 0
        self.epoch = epoch
        self.e_cache = np.zeros(self.n)
        self.smo()
        if self.kernel['type'] == 'linear':
            self.pre_w = np.dot((self.alpha * train_y).T, train_x)
        return True
        
    def predict(self, test_x):
        if self.kernel['type'] == 'linear':
            pre_val = np.dot(test_x, self.pre_w.T) + self.b
        else:
            k_val = self.cal_kernel(train_x, test_x)
            pre_val = np.dot((self.alpha * self.train_y).T, k_val) + self.b
        return np.where(pre_val>=0, 1, -1)

    def get_accuracy(self, gt_y, pre_y):
        diff = np.subtract(gt_y, pre_y)
        err = diff[diff!=0].shape[0]
        return 1 - err/gt_y.shape[0]

    def plot_decision_boundary(self, fea, label, ax, title=''):
        fea1_min, fea1_max = fea[:, 0].min() - 0.5, fea[:, 0].max() + 0.5
        fea2_min, fea2_max = fea[:, 1].min() - 0.5, fea[:, 1].max() + 0.5
        s = 0.05
        g_fea1, g_fea2 = np.meshgrid(np.arange(fea1_min, fea1_max, s), np.arange(fea2_min, fea2_max, s))
        pre_y = self.predict(np.c_[g_fea1.ravel(), g_fea2.ravel()])
        pre_y = pre_y.reshape(g_fea1.shape)
        ax.contourf(g_fea1, g_fea2, pre_y, cmap=plt.cm.Spectral)
        ax.scatter(fea[:, 0], fea[:, 1], c = label, s = 20)
        if len(title) > 0:
            ax.set_title(title)

## 模型训练与测试
### 线性核

In [2]:
%matplotlib widget
from sklearn import datasets
sample_num = 200
inputs, labels = datasets.make_classification(n_samples=sample_num, n_features=2, n_classes=2, n_redundant=0)
labels = np.where(labels == 0, -1, labels)
train_num = sample_num//2
train_x = inputs[:train_num]
train_y = labels[:train_num]
test_x = inputs[train_num:]
test_y = labels[train_num:]
model = SVM(type = 'linear')
model.fit(train_x, train_y, 20)
pre_y = model.predict(test_x)
print('accuracy: %.2f' % model.get_accuracy(test_y, pre_y))
#支持向量样本使用不同颜色标记
yp = train_y.copy()
yp[(model.alpha>0)&(yp==-1)] = 0
yp[(model.alpha>0)&(yp==1)] = 2
model.plot_decision_boundary(train_x, train_y, plt)
plt.scatter(x = train_x[:, 0], y = train_x[:,1], c = yp)

loop num 1: num changed 37, examine_all 0
loop num 2: num changed 16, examine_all 0
loop num 3: num changed 6, examine_all 0
loop num 4: num changed 4, examine_all 0
loop num 5: num changed 3, examine_all 0
loop num 6: num changed 2, examine_all 0
loop num 7: num changed 4, examine_all 0
loop num 8: num changed 1, examine_all 0
loop num 9: num changed 0, examine_all 1
loop num 10: num changed 24, examine_all 0
loop num 11: num changed 12, examine_all 0
loop num 12: num changed 7, examine_all 0
loop num 13: num changed 5, examine_all 0
loop num 14: num changed 3, examine_all 0
loop num 15: num changed 2, examine_all 0
loop num 16: num changed 2, examine_all 0
loop num 17: num changed 2, examine_all 0
loop num 18: num changed 2, examine_all 0
loop num 19: num changed 3, examine_all 0
loop num 20: num changed 3, examine_all 0
accuracy: 0.94


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x121832bd0>

### 径向基核

In [3]:
sample_num=200
inputs, labels = datasets.make_circles(n_samples=sample_num, noise=0.05)
labels = np.where(labels == 0, -1, labels)
train_num = sample_num//2
train_x = inputs[:train_num]
train_y = labels[:train_num]
test_x = inputs[train_num:]
test_y = labels[train_num:]
fig, axs = plt.subplots(1, 2)
axs[0].scatter(x = inputs[:, 0], y = inputs[:, 1],c=labels)
model = SVM(type = 'rbf')
model.fit(train_x, train_y, 20)
pre_y = model.predict(test_x)
print('accuracy: %.2f' % model.get_accuracy(test_y, pre_y))
yp = train_y.copy()
yp[(model.alpha>0)&(yp==-1)] = 0
yp[(model.alpha>0)&(yp==1)] = 2
model.plot_decision_boundary(train_x, train_y, axs[1])
plt.scatter(x = train_x[:, 0], y = train_x[:,1], c = yp)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

loop num 1: num changed 86, examine_all 0
loop num 2: num changed 35, examine_all 0
loop num 3: num changed 26, examine_all 0
loop num 4: num changed 19, examine_all 0
loop num 5: num changed 17, examine_all 0
loop num 6: num changed 17, examine_all 0
loop num 7: num changed 17, examine_all 0
loop num 8: num changed 13, examine_all 0
loop num 9: num changed 12, examine_all 0
loop num 10: num changed 12, examine_all 0
loop num 11: num changed 12, examine_all 0
loop num 12: num changed 12, examine_all 0
loop num 13: num changed 12, examine_all 0
loop num 14: num changed 12, examine_all 0
loop num 15: num changed 12, examine_all 0
loop num 16: num changed 12, examine_all 0
loop num 17: num changed 12, examine_all 0
loop num 18: num changed 11, examine_all 0
loop num 19: num changed 10, examine_all 0
loop num 20: num changed 11, examine_all 0
accuracy: 0.80


<matplotlib.collections.PathCollection at 0x122ff3d10>