A self-implemented SVM classifier with similar APIs as in sklearn.svm.svc

In [15]:
from scipy import optimize
import numpy as np

In [80]:
# get iris data from testing purpose
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df=pd.read_csv('iris.data')
df.tail()

# extract setosa and versicolor
y = df.iloc[0:100,4].values
y = np.where(y =='Iris-setosa', -1,1)
#y[60]=-y[60] # flip a sample such that its not linear separable
# extract sepal length and petal length
X = df.iloc[0:100,[0,2]].values

X_std = np.copy(X)
X_std[:,0] = (X[:,0]-X[:,0].mean()) /X[:,0].std()
X_std[:,1] = (X[:,1]-X[:,1].mean()) /X[:,1].std()

In [81]:
X =X_std

def kernal_matrix(X,Y,opt):
    """Need to add kernel options, currently only support linear kernal"""
    return np.dot(X,Y.T)
        

In [167]:
def loss(z):
    """Define the dual form loss function """
    K = kernal_matrix(X,X,'linear')
    zz = z*y
    return -np.dot(np.ones(z.shape),z)+0.5*np.dot(np.dot(zz.T,K),zz)

def jac(z):
    """The jacobian of loss function"""
    K = kernal_matrix(X,X,'linear')
    zz = z*y
    return np.ones(z.shape) - np.dot(zz.T,K)


In [168]:
cons = {'type':'eq','fun':lambda x: np.dot(x,y),'jac':lambda x: y }
opt = {'disp':False}
C = 5000
x0 = np.random.normal(loc=0.0,scale=0.01,size=len(y))
bnds = tuple((0,C) for C in C*np.ones(y.shape))

In [169]:
QP_res=optimize.minimize(loss,x0,jac=jac,constraints=cons,method='SLSQP',options=opt,bounds=bnds)

In [170]:
lambdas_all=QP_res.x
# pick out the support vectors
idx = lambdas_all !=0
lambdas_s = lambdas_all[idx]
ys = y[idx]
Xs = X_std[idx,:] 
theta_hat = np.dot(lambdas_s*ys,Xs)
theta0 = (ys-np.dot(Xs,theta_hat)).sum()/len(idx)

In [171]:
# predict
def predict(x,lambdas_s,ys,Xs,theta0):
    k = kernal_matrix(x,Xs,'linear')
    return np.dot(k,lambdas_s*ys)+theta0

In [172]:
predict(X_std,lambdas_s,ys,Xs,theta0)

array([-0.23400843, -0.28218956, -0.28294258, -0.21458538, -0.10888811,
       -0.2922776 , -0.20525036, -0.33112371, -0.22467342, -0.12755814,
       -0.23476146, -0.25343149, -0.37855181, -0.07787097, -0.06928898,
       -0.14622817, -0.19516232, -0.05061895, -0.18582731, -0.10888811,
       -0.18582731, -0.32961766, -0.16715728, -0.20675641, -0.19591535,
       -0.19591535, -0.16640425, -0.17573927, -0.25418451, -0.23476146,
       -0.12755814, -0.16640425, -0.1174701 , -0.22467342, -0.23325541,
       -0.12680512, -0.22467342, -0.34045872, -0.18582731, -0.22392039,
       -0.32103567, -0.34045872, -0.19591535, -0.14848725, -0.25343149,
       -0.17649229, -0.2922776 , -0.1469812 , -0.21458538,  0.48193121,
        0.34672285,  0.48117818,  0.12524028,  0.37548092,  0.21076146,
        0.34596982, -0.05664315,  0.39490397,  0.0576361 , -0.01855007,
        0.22160253,  0.22235555,  0.30712371,  0.10732328,  0.395657  ,
        0.19133841,  0.19284446,  0.30787674,  0.13532832,  0.27

see SVM.py for the final classifier 