# 第7章 SVM
## By LiuGang - 2018/11/20
## Reference Book - statistical learning method (Chinese)

### 1:  Create some data

In [116]:
import numpy as np
# X = np.array([[1,2],[2,2],[3,1],[5,-0.5],[1,1],[2,0],[0.5,1],[-1,2]])
# y = np.array([1,1,1,1,-1,-1,-1,-1])

# np.random.seed(5)
# X = np.random.randint(1,1000,size=(1000, 20))
# y = np.array([1 if i%2==0 else -1 for i in range(1000)])

#np.random.seed(5)
X_ = 10*np.random.rand(1000)
tmp = []
y = []
for i in range(len(X_)):
    if np.random.rand(1)[0]>=0.5:
        tmp.append(2*X_[i]+5+5*np.random.rand(1)[0]+2)
        y.append(1)
    else:
        tmp.append(2*X_[i]+5-5*np.random.rand(1)[0]-2)
        y.append(-1)
X = np.concatenate((X_[np.newaxis, :].T, np.array(tmp)[np.newaxis, :].T), axis=1)
y = np.array(y)

In [117]:
print(X[0:10])
print(y[0:10])

[[ 2.74844606  5.05409395]
 [ 7.73485126 17.08828543]
 [ 6.59805041 21.93574795]
 [ 3.02256578  6.35697337]
 [ 8.60519395 15.36987918]
 [ 9.13554487 28.22638588]
 [ 8.48230092 17.68149289]
 [ 1.25671561  4.79444433]
 [ 9.56274512 18.6065852 ]
 [ 8.75199645 29.32011954]]
[-1 -1  1 -1 -1  1 -1 -1 -1  1]


### 2: Class

In [118]:
class SVM():
    def __init__(self, X, y, C=100, kernel='linear', epsilon=0.001):
        self.kernel = kernel
        self.epsilon = epsilon
        self.X = X
        self.y = y
        self.b = 0.0
        self.N = len(self.X)
        self.n = len(self.X[0])
        self.C = C  # 1000
        self.alpha = [0.0 for i in range(self.N)]
        self.E = [self._E(i) for i in range(self.N)]

    
    def _K(self, x1, x2):
        #'核函数'
        if self.kernel == 'linear': 
            return sum([x1[k] * x2[k] for k in range(self.n)]) 
        elif self.kernel == 'poly': 
            return (sum([x1[k] * x2[k] for k in range(self.n)])+1)**3
        else:
            print('error!')
        
    def _g(self, i):
        res = self.b
        for j in range(self.N):
            res += self.alpha[j]*self.y[j]*self._K(self.X[i], self.X[j])
        return res
    
    def _E(self, i):
        return self._g(i) - self.y[i]
    
    def update_E(self, i):
        result = self.b-self.y[i] 
        for j in range(self.N): 
            if self.alpha[j]<0 or self.alpha[j]>self.C: 
                continue 
            result += self.y[j]*self.alpha[j]*self._K(self.X[i],self.X[j]) 
        return result

    def _is_KKT(self, i):
        ygx = self.y[i] * self._g(i) 
        if abs(self.alpha[i])<=self.epsilon: 
            return ygx > 1 or ygx == 1 
        elif self.alpha[i]< self.C+self.epsilon and self.alpha[i]>-self.epsilon: 
            return ygx == 1 
        elif abs(self.alpha[i]-self.C)<=self.epsilon:
            return ygx < 1 or ygx == 1
        else: 
            return False

    def get_ij_2param(self):
        l1,l2 = [],[]
        for i in range(self.N):
            if self.alpha[i] > 0 and self.alpha[i] < self.C:
                l1.append(i)
            else:
                l2.append(i)
        resort_list = l1 + l2
        max_EE = -1
        max_i = 0
        max_j = 0
        for i in resort_list:
            if self._is_KKT(i):
                continue
            E1 = self.E[i]
            for j in range(self.N):
                if j != i:
                    E2 = self.E[j]
                    if abs(E1 - E2) > max_EE :
                        max_EE = abs(E1 - E2)
                        max_i = i
                        max_j = j
            return max_i,max_j
    
    def predict(self, xdata):
        res = []
        for data in xdata:
            result = self.b 
            for i in range(self.N):
                result += self.alpha[i]*self.y[i]*self._K(data, self.X[i]) 
            if result > 0: 
                res.append(1)
            else:
                res.append(-1)
        return res
    
    def train(self, epoch=10):
        i1, i2 = self.get_ij_2param()
        
        for epo in range(epoch):
            i1, i2 = self.get_ij_2param()
            L = max(0, self.alpha[i2] - self.alpha[i1])
            H = min(self.C, self.C + self.alpha[i2] - self.alpha[i1])
            if self.y[i1] == self.y[i2]:
                L = max(0, self.alpha[i2] + self.alpha[i1] - self.C)
                H = min(self.C, self.alpha[i2] + self.alpha[i1])

            E1 = self.E[i1]
            E2 = self.E[i2]
            eta = self._K(self.X[i1], self.X[i1]) + self._K(self.X[i2], self.X[i2]) - 2 * self._K(self.X[i1], self.X[i2])     # 公式(7.107)

            alpha2_new_unc = self.alpha[i2] + self.y[i2] * (E1 - E2) / eta        # 公式(7.106)

            # 公式(7.108)
            alph2_new = 0
            if alpha2_new_unc > H:
                alph2_new = H
            elif alpha2_new_unc < L:
                alph2_new = L
            else:
                alph2_new = alpha2_new_unc

            # 公式(7.109)
            alph1_new = self.alpha[i1] + self.y[i1] * \
                self.y[i2] * (self.alpha[i2] - alph2_new)

            # 公式(7.115) 及 公式(7.116)
            b_new = 0
            b1_new = -E1 - self.y[i1] * self._K(self.X[i1], self.X[i1]) * (alph1_new - self.alpha[i1]) - self.y[i2] * self._K(self.X[i2], self.X[i1]) * (alph2_new - self.alpha[i2]) + self.b
            b2_new = -E2 - self.y[i1] * self._K(self.X[i1], self.X[i2]) * (alph1_new - self.alpha[i1]) - self.y[i2] * self._K(self.X[i2], self.X[i2]) * (alph2_new - self.alpha[i2]) + self.b

            if alph1_new > 0 and alph1_new < self.C:
                b_new = b1_new
            elif alph2_new > 0 and alph2_new < self.C:
                b_new = b2_new
            else:
                b_new = (b1_new + b2_new) / 2

            self.alpha[i1] = alph1_new
            self.alpha[i2] = alph2_new
            self.b = b_new

            self.E[i1] = self.update_E(i1)
            self.E[i2] = self.update_E(i2) #modify
            acc_e = np.array(self.predict(self.X))-self.y
            acc = len(acc_e[acc_e==0])/len(self.y)
            print(acc)
                



## 3. Train

In [115]:
mysvm = SVM(X, y, C=1000, kernel='linear', epsilon=0.01)
mysvm.train(epoch = 20)

0.674
0.706
0.714
0.698
0.66
0.705
0.705
0.71
0.717
0.717
0.78
0.791
0.791
0.78
0.783
0.783
0.78
0.791
0.791
0.78


### 上面代码效果不行，下面是在网上找的一段代码，但是效果好像也不行
### https://blog.csdn.net/shenxiaoming77/article/details/53508919

In [87]:
# 
def selectJrand(i,m): #在0-m中随机选择一个不是i的整数
    j=i
    while (j==i):
        j=int(random.uniform(0,m))
    return j

def clipAlpha(aj,H,L):  #保证a在L和H范围内（L <= a <= H）
    if aj>H:
        aj=H
    if L>aj:
        aj=L
    return aj

def kernelTrans(X, A, kTup): #核函数，输入参数,X:支持向量的特征树；A：某一行特征数据；kTup：('lin',k1)核函数的类型和参数 
    m,n = shape(X) 
    K = mat(zeros((m,1))) 
    if kTup[0]=='lin': #线性函数 
        K = X * A.T 
    elif kTup[0]=='rbf': # 径向基函数(radial bias function) 
        for j in range(m): 
            deltaRow = X[j,:] - A 
            K[j] = deltaRow*deltaRow.T 
            K = exp(K/(-1*kTup[1]**2)) #返回生成的结果 
    else: 
        raise NameError('Houston We Have a Problem -- That Kernel is not recognized') 
    return K

class optStruct: 
    def __init__(self,dataMatIn, classLabels, C, toler, kTup): # 存储各类参数 
        self.X = dataMatIn #数据特征 
        self.labelMat = classLabels #数据类别 
        self.C = C #软间隔参数C，参数越大，非线性拟合能力越强 
        self.tol = toler #停止阀值 
        self.m = shape(dataMatIn)[0] #数据行数 
        self.alphas = mat(zeros((self.m,1))) 
        self.b = 0 #初始设为0 
        self.eCache = mat(zeros((self.m,2))) #缓存 
        self.K = mat(zeros((self.m,self.m))) #核函数的计算结果 
        for i in range(self.m): 
            self.K[:,i] = kernelTrans(self.X, self.X[i,:], kTup)

def calcEk(oS, k): #计算Ek（参考《统计学习方法》p127公式7.105） 
    fXk = float(multiply(oS.alphas,oS.labelMat).T*oS.K[:,k] + oS.b) 
    Ek = fXk - float(oS.labelMat[k]) 
    return Ek

#随机选取aj，并返回其E值 
def selectJ(i, oS, Ei): 
    maxK = -1 
    maxDeltaE = 0 
    Ej = 0 
    oS.eCache[i] = [1,Ei] 
    validEcacheList = nonzero(oS.eCache[:,0].A)[0] #返回矩阵中的非零位置的行数 
    if (len(validEcacheList)) > 1: 
        for k in validEcacheList: 
            if k == i: 
                continue 
            Ek = calcEk(oS, k)
            deltaE = abs(Ei - Ek) 
            if (deltaE > maxDeltaE): #返回步长最大的aj 
                maxK = k 
                maxDeltaE = deltaE 
                Ej = Ek 
        return maxK, Ej 
    else: 
        j = selectJrand(i, oS.m) 
        Ej = calcEk(oS, j) 
    return j, Ej

def updateEk(oS, k): #更新os数据
    Ek = calcEk(oS, k)
    oS.eCache[k] = [1,Ek]
    
#首先检验ai是否满足KKT条件，如果不满足，随机选择aj进行优化，更新ai,aj,b值
def innerL(i, oS): #输入参数i和所有参数数据
    Ei = calcEk(oS, i) #计算E值
    if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)): 
        #检验这行数据是否符合KKT条件 参考《统计学习方法》p128公式7.111-113 
        j,Ej = selectJ(i, oS, Ei) #随机选取aj，并返回其E值 
        alphaIold = oS.alphas[i].copy() 
        alphaJold = oS.alphas[j].copy()
        if (oS.labelMat[i] != oS.labelMat[j]): #以下代码的公式参考《统计学习方法》p126 
            L = max(0, oS.alphas[j] - oS.alphas[i]) 
            H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
        else:
            L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
            H = min(oS.C, oS.alphas[j] + oS.alphas[i])
        if L==H:
            print("L==H")
            return 0

        eta = 2.0 * oS.K[i,j] - oS.K[i,i] - oS.K[j,j] #参考《统计学习方法》p127公式7.107 
        if eta >= 0: 
            print("eta>=0") 
            return 0
        oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta #参考《统计学习方法》p127公式7.106 
        oS.alphas[j] = clipAlpha(oS.alphas[j],H,L) #参考《统计学习方法》p127公式7.108 
        updateEk(oS, j)
        if (abs(oS.alphas[j] - alphaJold) < oS.tol): #alpha变化大小阀值（自己设定）
            print("j not moving enough")
            return 0
        oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])#参考《统计学习方法》p127公式7.109 
        updateEk(oS, i) #更新数据 #以下求解b的过程，参考《统计学习方法》p129公式7.114-7.116 
        b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j] 
        b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j] 
        if (0 < oS.alphas[i]<oS.C): 
            oS.b = b1
        elif (0 < oS.alphas[j]<oS.C): 
            oS.b = b2 
        else: 
            oS.b = (b1 + b2)/2.0 
        return 1 
    else: 
        return 0

#SMO函数，用于快速求解出alpha 
def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)): 
    #输入参数：数据特征，数据类别，参数C，阀值toler，最大迭代次数，核函数（默认线性核） 
    oS = optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler, kTup) 
    iter = 0 
    entireSet = True 
    alphaPairsChanged = 0 
    while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
        alphaPairsChanged = 0 
        if entireSet: 
            for i in range(oS.m): #遍历所有数据 
                alphaPairsChanged += innerL(i,oS) 
                print("fullSet, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged)) #显示第多少次迭代，那行特征数据使alpha发生了改变，这次改变了多少次alpha 
            iter += 1
        else: 
            nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0] 
            for i in nonBoundIs: #遍历非边界的数据 
                alphaPairsChanged += innerL(i,oS) 
                print("non-bound, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged)) 
            iter += 1
        if entireSet: 
            entireSet = False 
        elif (alphaPairsChanged == 0): 
            entireSet = True 
        print("iteration number: %d" % iter) 
    return oS.b,oS.alphas

def testRbf(data_train,data_y): 
    dataArr,labelArr = data_train, data_y #读取训练数据 
    b,alphas = smoP(dataArr, labelArr, 200, 0.0001, 10, ('rbf', 1.3)) #通过SMO算法得到b和alpha 
    datMat=mat(dataArr) 
    labelMat = mat(labelArr).transpose() 
    svInd=nonzero(alphas)[0] #选取不为0数据的行数（也就是支持向量） 
    sVs=datMat[svInd] #支持向量的特征数据 
    labelSV = labelMat[svInd] #支持向量的类别（1或-1）
    print("there are %d Support Vectors" % shape(sVs)[0]) #打印出共有多少的支持向量 
    m,n = shape(datMat) #训练数据的行列数 
    errorCount = 0 
    for i in range(m): 
        kernelEval = kernelTrans(sVs,datMat[i,:],('rbf', 1.3)) #将支持向量转化为核函数 
        predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b #这一行的预测结果（代码来源于《统计学习方法》p133里面最后用于预测的公式）注意最后确定的分离平面只有那些支持向量决定。 
        if sign(predict)!=sign(labelArr[i]): #sign函数 -1 if x < 0, 0 if x==0, 1 if x > 0 
            errorCount += 1
    print("the training error rate is: %f" % (float(errorCount)/m)) #打印出错误率 
#     dataArr_test,labelArr_test = loadDataSet(data_test) #读取测试数据 
#     errorCount_test = 0 
#     datMat_test=mat(dataArr_test) 
#     labelMat = mat(labelArr_test).transpose() 
#     m,n = shape(datMat_test)
#     for i in range(m): #在测试数据上检验错误率 
#         kernelEval = kernelTrans(sVs,datMat_test[i,:],('rbf', 1.3)) 
#         predict=kernelEval.T * multiply(labelSV,alphas[svInd]) + b 
#         if sign(predict)!=sign(labelArr_test[i]): 
#             errorCount_test += 1 
#     print("the test error rate is: %f" % (float(errorCount_test)/m)) 


In [88]:
X_ = 10*np.random.rand(1000)
tmp = []
y = []
for i in range(len(X_)):
    if np.random.rand(1)[0]>=0.5:
        tmp.append(2*X_[i]+5+5*np.random.rand(1)[0]+0.1)
        y.append(1)
    else:
        tmp.append(2*X_[i]+5-5*np.random.rand(1)[0]-0.1)
        y.append(-1)
X = np.concatenate((X_[np.newaxis, :].T, np.array(tmp)[np.newaxis, :].T), axis=1).tolist()
testRbf(X, y)

L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H


L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
L==H
