### 前言

支持向量机的优缺点如下:
* 优点：泛化错误率低，计算开销不大，结果易解释
* 缺点：对参数调节和核函数的选择敏感，原始分类器不修改仅适用于处理二类问题

### 支持向量机从零实现

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#导入数据
dataset=pd.read_csv('datasets/Social_Network_Ads.csv')
X=dataset.iloc[:,[2,3]].values
Y=dataset.iloc[:,[4]].values
#这里采用的类别标签是-1和1，所以将标签0转为-1
Y[Y==0]=-1

In [3]:
#划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)

In [4]:
#标准化
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

#### 简化版SMO算法

In [5]:
#SMO算法中的辅助函数
#简化版SMO算法
def selectJrand(i,m):
    '''
    随机选择一个alpha
    :param i:是第一个alpha的下标
    :param m:是所有alpha的数目
    :return j:随机选择的alpha下标
    '''
    j=i
    while(j==i):
        j=np.random.uniform(0,m)
    return int(j)

def clipAlpha(aj,H,L):
    '''
    裁剪alpha
    :param H:上限值
    :param L:下限值
    :return aj:裁剪后的alpha
    '''
    if aj>H:
        aj=H
    if aj<L:
        aj=L
    return aj

In [6]:
def smoSimple(dataMatIn,classLabels,C,toler,maxIter):
    '''
    简化版的SMO算法
    :param dataMatIn:数据源,shape(m,n)
    :param classLabels:类别标签,shape(1,m)
    :param C:惩罚参数
    :param toler:容错率
    :param maxIter:最大迭代次数
    :return b,alpha:b是学习到的偏差,alpha是学习的参数
    '''
    m,n=dataMatIn.shape
    classLabels=classLabels.reshape((-1,1))
    b=0  #初始化b为0
    alpha=np.zeros((m,1))  #初始化alpha为0
    iter=0  #迭代次数
    while(iter<maxIter):
        alphaPairsChanged=0  #标记alpha是否被优化
        for i in range(m):  #外循环
            #fXi是预测的类别
            fXi=float(np.dot(np.dot(dataMatIn[i,:],dataMatIn.T),alpha*classLabels))+b
            #Ei表示误差
            Ei=fXi-float(classLabels[i])
            #选择一个不满足KKT条件的
            #KKT条件为:
            #1.alpha=0 y_i*f(x_i)>=1
            #2.alpha=C y_i*f(x_i)<=1
            #3.0<alpha<C y_i*f(x_i)=1
            #所以违反KKT条件的alpha为:
            #1.y_i*f(x_i)>=1且alpha>0,此时不满足因为原本的alpha=0
            #2.y_i*f(x_i)<=1且alpha<C,此时不满足因为原本的alpha=C
            #3.y_i*f(x_i)==1且alpha==0,alpha==C,此时不满足因为原本0<alpha<C
            if ((classLabels[i]*Ei)>toler and (alpha[i]>0)) or \
                ((classLabels[i]*Ei)<-toler and (alpha[i]<C)):
                j=selectJrand(i,m) #随机选择第二个alpha
                fXj=float(np.dot(np.dot(dataMatIn[j,:],dataMatIn.T),alpha*classLabels))+b
                Ej=fXj-float(classLabels[j])
                alphaIold=alpha[i].copy()#记录alpha^{old}_1
                alphaJold=alpha[j].copy()#记录alpha^{old}_2
                if(classLabels[i]!=classLabels[j]):
                    #y_1!=y_2的情况
                    #L=max(0,alpha^{old}_2-alpha^{old}_1)
                    #H=min(C,C+alpha^{old}_2-alpha^{old}_1)
                    L=max(0,alphaJold-alphaIold)
                    H=min(C,C+alphaJold-alphaIold)
                else:
                    #y_1==y_2的情况
                    #L=max(0,alpha^{old}_1+alpha^{old}_2-C)
                    #H=min(C,alpha^{old}_1+alpha^{old}_2)
                    L=max(0,alphaIold+alphaJold-C)
                    H=min(C,alphaIold+alphaJold)
                if L==H:
                    #此时不做任何修改
                    print("L==H")
                    continue
                #计算eta,
                #eta=K_11+K_22-2*K_12
                #这里是线性SVM，所以
                #K_ij=x_i^T*x_j
                #这里计算的是-eta
                eta=2*np.dot(dataMatIn[i,:],dataMatIn[j,:].T)- \
                    np.dot(dataMatIn[i,:],dataMatIn[i,:].T)- \
                    np.dot(dataMatIn[j,:],dataMatIn[j,:].T)
                if(eta>=0):
                    #?
                    print("eta>=0")
                    continue
                #计算未裁剪的alpha_2
                #之前计算的是-eta，所以这里改为减
                alpha[j]-=classLabels[j]*(Ei-Ej)/eta
                #裁剪alpha_2
                alpha[j]=clipAlpha(alpha[j],H,L)
                #检测alpha_2是否有轻微的改变
                if(abs(alpha[j]-alphaJold)<0.00001):
                    print("j not moving enough")
                    continue
                #计算alpha_1
                alpha[i]+=classLabels[i]*classLabels[j]*(alphaJold-alpha[j])
                #计算偏差b
                b1=b-Ei-classLabels[i]*(alpha[i]-alphaIold)*(np.dot(dataMatIn[i,:],dataMatIn[i,:].T))- \
                   classLabels[j]*(alpha[j]-alphaJold)*(np.dot(dataMatIn[i,:],dataMatIn[j,:].T))
                b2=b-Ej-classLabels[i]*(alpha[i]-alphaIold)*(np.dot(dataMatIn[i,:],dataMatIn[j,:].T))- \
                    classLabels[j]*(alpha[j]-alphaJold)*(np.dot(dataMatIn[j,:],dataMatIn[j,:].T))
                if(0<alpha[i] and alpha[i]<C):
                    b=b1
                elif(0<alpha[j] and alpha[j]<C):
                    b=b2
                else:
                    b=(b1+b2)/2.0
                alphaPairsChanged+=1
                print("iter:%d i%d,pairs change %d"%(iter,i,alphaPairsChanged))
        if(alphaPairsChanged==0):
            iter+=1
        else:
            iter=0
        print("iteration number:%d"%iter)
    #返回偏差b和alpha
    return b,alpha
        

In [7]:
#测试简化版的SMO算法
b,alpha=smoSimple(X_train,Y_train,0.6,0.001,20)

L==H
L==H
L==H
iter:0 i3,pairs change 1
iter:0 i4,pairs change 2
iter:0 i5,pairs change 3
L==H
iter:0 i8,pairs change 4
L==H
L==H
L==H
iter:0 i15,pairs change 5
iter:0 i17,pairs change 6
L==H
j not moving enough
L==H
L==H
L==H
L==H
iter:0 i28,pairs change 7
L==H
j not moving enough
L==H
L==H
L==H
L==H
iter:0 i38,pairs change 8
L==H
j not moving enough
L==H
j not moving enough
iter:0 i46,pairs change 9
L==H
L==H
L==H
L==H
iter:0 i59,pairs change 10
j not moving enough
L==H
L==H
L==H
j not moving enough
L==H
L==H
L==H
L==H
L==H
L==H
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
iter:0 i104,pairs change 11
iter:0 i105,pairs change 12
j not moving enough
iter:0 i112,pairs change 13
iter:0 i113,pairs change 14
L==H
L==H
iter:0 i118,pairs change 15
iter:0 i119,pairs change 16
L==H
j not moving enough
L==H
L==H
L==H
iter:0 i137,pairs change 17
iter:0 i139,pairs change 18
iter:0 i143,pairs change 19
L==H
iter:0 i146

iter:0 i32,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
L==H
j not moving enough
L==H
j not moving enough
L==H
j not moving enough
L==H
L==H
j not moving enough
iter:0 i79,pairs change 2
j not moving enough
j not moving enough
iter:0 i90,pairs change 3
j not moving enough
iter:0 i100,pairs change 4
j not moving enough
j not moving enough
iter:0 i111,pairs change 5
j not moving enough
j not moving enough
j not moving enough
iter:0 i123,pairs change 6
iter:0 i130,pairs change 7
L==H
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
L==H
L==H
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
iter:0 i183,pairs change 8
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not m

j not moving enough
j not moving enough
j not moving enough
iter:0 i147,pairs change 4
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i229,pairs change 5
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i263,pairs change 6
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
L==H
j not moving enough
iter:0 i24,pairs change 1
iter:0 i26,pairs change 2
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i65,pairs change 3
L==H
j not moving enough
L==H
j 

j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
iter:0 i298,pairs change 2
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i65,pairs change 1
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
iter:0 i147,pairs change 2
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
iter:0 i212,pairs change 3
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
L==H
L==H
j not moving enough
j not moving enough
j not moving e

L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i64,pairs change 1
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:0 i298,pairs change 2
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
iter:0 i134,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving eno

j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:1
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:1 i214,pairs change 1
j not moving enough
j not

j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:4
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:4 i222,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:4 i298,pairs change 2
j not moving enough
L==H
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j n

j not moving enough
j not moving enough
iteration number:0
eta>=0
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:1
j not moving enough
j not moving enough
j not moving enough
L==H
L==H
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not movi

L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
iteration number:1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
iteration number:2
L==H
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not m

j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
iteration number:0
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
iteration number:1
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:1 i123,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j

j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:12 i272,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
L==H
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:1 i73,pairs change 1
j not moving enough
j not moving enough
j not moving 

j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:6
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:6 i268,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:1
j not moving enough
iter:1 i27,pairs change 1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not movi

j not moving enough
j not moving enough
j not moving enough
iteration number:7
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iter:7 i297,pairs change 1
j not moving enough
j not moving enough
iteration number:0
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
iteration number:1
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
eta>=0
j not moving enough
L==H
j not moving enough
j not moving enough
iteration number:2
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not moving enough
j not

In [10]:
#打印支持向量
for i in range(alpha.shape[0]):
    if alpha[i]>0.0:
        print(X_train[i],Y_train[i])

[2.02016082 0.3787193 ] [-1]
[0.36757813 0.29180308] [1]
[0.17315664 0.14694273] [-1]
[ 0.75642112 -0.83810771] [1]
[ 0.27036739 -0.28763835] [-1]
[ 0.36757813 -0.17175006] [-1]
[-1.09058306  0.75535623] [1]
[ 0.95084261 -1.06988428] [1]
[0.36757813 0.9871328 ] [1]
[ 0.56199963 -0.89605185] [-1]
[-0.60452933  1.45068594] [1]
[-0.60452933  1.88526701] [1]
[ 1.3396856  -1.41754914] [1]
[-0.02126485 -0.25866628] [-1]
[0.36757813 0.08899858] [1]
[ 0.85363187 -0.60633113] [1]
[ 1.04805336 -0.14277799] [-1]
[0.75642112 0.34974723] [-1]
[ 0.85363187 -0.54838699] [1]
[-0.02126485  1.21890937] [-1]
[0.17315664 1.07404901] [1]
[ 0.36757813 -0.49044285] [-1]
[ 0.95084261 -0.83810771] [1]
[-0.02126485  1.24788144] [1]
[-0.89616157  2.26190394] [1]
[0.75642112 0.75535623] [-1]
[0.0759459  0.75535623] [1]
[0.27036739 0.06002651] [-1]
[ 0.65921037 -1.27268878] [1]
[0.36757813 0.29180308] [-1]
[-0.50731858  2.29087602] [1]
[0.17315664 0.03105444] [1]
[0.75642112 0.26283101] [1]
[-0.31289709  0.1469427