In [55]:
''' This is a multi-class and multi-feature SVM implementation using linear kernel.
    It is tested using iris flower data set.
    The four features are sepal length, sepal width, petal length and petal width.
    The three classes are Versicolour (class 0), Virginica (class 1) and Setosa (class 2).
    There are 100 training samples and 50 testing samples.
'''
import numpy as np
import pandas as pd
import cvxopt
import cvxopt.solvers
from sklearn.preprocessing import StandardScaler

class SVM(object):

    def __init__(self, min_lagmult=1e-6, C=0.1):
        self.C = C                      #C parameter
        self.min_lagmult = min_lagmult    #support vector's min Lagrange multiplier value
        if self.C is not None: self.C = float(self.C)

    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Gram matrix
        K = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                K[i,j] = np.dot(X[i], X[j])
        """
        To build QP problem and solve it using cvxopt.solver.qp,
        optimizing this form:
        
        min (1/2)*x^T*P*x + q^T*x
        
        """

        P = cvxopt.matrix(np.outer(y,y) * K)
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1,n_samples))
        b = cvxopt.matrix(0.0)

        if self.C is None:
            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            lower_bound = np.diag(np.ones(n_samples) * -1)
            upper_bound = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((lower_bound, upper_bound)))
            lower_bound = np.zeros(n_samples)
            upper_bound = np.ones(n_samples) * self.C
            h = cvxopt.matrix(np.hstack((lower_bound, upper_bound)))

        # solve QP problem
        result = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        a = np.ravel(result['x'])
        
        # Support vectors have non zero lagrange multipliers
        sv = a > self.min_lagmult
        ind = np.arange(len(a))[sv]
        self.a = a[sv]
        self.sv = X[sv]
        self.sv_y = y[sv]
        print ("%d support vectors out of %d points" % (len(self.a), n_samples))

        # Intercept
        self.b = 0
        for n in range(len(self.a)):
            self.b += self.sv_y[n]
            self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
        self.b = self.b / len(self.a)

        # Weight vector
        self.w = np.zeros(n_features)
        for n in range(len(self.a)):
            self.w += self.a[n] * self.sv_y[n] * self.sv[n]

    def project(self, X):
        if self.w is not None:
            return np.dot(X, self.w) + self.b
        else:
            y_predict = np.zeros(len(X))
            for i in range(len(X)):
                s = 0
                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
                    s += a * sv_y * np.dot(X[i], sv)
                y_predict[i] = s
            return y_predict + self.b

    def predict(self, X):
        return self.project(X)

#fletch the data and pregrocceing

iris_x_train=pd.read_csv('C:/jupyternotebook/iris_X_train.csv')
iris_y_train=pd.read_csv('C:/jupyternotebook/iris_y_train.csv')
iris_x_test=pd.read_csv('C:/jupyternotebook/iris_X_test.csv')
iris_y_test=pd.read_csv('C:/jupyternotebook/iris_y_test.csv')

iris_x_test = np.array(iris_x_test)
iris_x_train = np.array(iris_x_train)
iris_y_train = np.squeeze(np.array(iris_y_train))
iris_y_train1 = iris_y_train
iris_y_train2 = iris_y_train
iris_y_train = iris_y_train + 0.0
iris_y_train1 = iris_y_train1 + 0.0
iris_y_train2 = iris_y_train2 + 0.0
iris_y_test = np.squeeze(np.array(iris_y_test))

for i, k in enumerate(iris_y_train):
    if iris_y_train[i]!=0.0: iris_y_train[i] = -1.0
    else: iris_y_train[i] = 1.0


for i, k in enumerate(iris_y_train1):
    if iris_y_train1[i]!=1.0: iris_y_train1[i] = -1.0
    else: iris_y_train1[i] = 1.0


for i, k in enumerate(iris_y_train2):
    if iris_y_train2[i]!=2.0 : iris_y_train2[i] = -1.0
    else: iris_y_train2[i] = 1.0


#train the svm using ovo method
clf = SVM()

clf.fit(iris_x_train, iris_y_train)
y_predict = clf.predict(iris_x_test)
class0 = y_predict

clf.fit(iris_x_train, iris_y_train1)
y_predict = clf.predict(iris_x_test)
class1 = y_predict

clf.fit(iris_x_train, iris_y_train2)
y_predict = clf.predict(iris_x_test)
class2 = y_predict

np.column_stack((class0,class1,class2))
b = np.column_stack((class0,class1,class2))

result = np.zeros(len(iris_y_test))
for i, k in enumerate(b):
    result[i] = np.argmax(b[i])
result = result.astype(int)

print(result)
print(iris_y_test)

correct = np.sum(result == iris_y_test)
print ("%d out of %d predictions correct" % (correct, len(iris_y_test)))

     pcost       dcost       gap    pres   dres
 0: -3.6937e+01 -2.2388e+01  6e+02  3e+01  3e-14
 1: -6.1861e+00 -2.1307e+01  2e+01  4e-01  5e-14
 2: -6.0846e+00 -7.9512e+00  2e+00  4e-03  8e-15
 3: -6.6184e+00 -7.0236e+00  4e-01  5e-04  8e-15
 4: -6.7782e+00 -6.8419e+00  6e-02  6e-05  9e-15
 5: -6.8062e+00 -6.8211e+00  1e-02  1e-05  1e-14
 6: -6.8143e+00 -6.8157e+00  1e-03  4e-07  1e-14
 7: -6.8148e+00 -6.8151e+00  3e-04  9e-08  9e-15
 8: -6.8149e+00 -6.8150e+00  1e-04  1e-08  9e-15
 9: -6.8150e+00 -6.8150e+00  1e-06  1e-10  9e-15
Optimal solution found.
72 support vectors out of 100 points
     pcost       dcost       gap    pres   dres
 0: -1.9669e+01 -1.8685e+01  5e+02  2e+01  3e-14
 1: -3.7174e+00 -1.7276e+01  3e+01  7e-01  4e-14
 2: -3.0274e+00 -8.7555e+00  9e+00  2e-01  1e-14
 3: -2.4772e+00 -3.8517e+00  2e+00  3e-02  7e-15
 4: -2.6786e+00 -3.0021e+00  4e-01  6e-03  6e-15
 5: -2.7480e+00 -2.8489e+00  1e-01  2e-03  6e-15
 6: -2.7739e+00 -2.8023e+00  3e-02  3e-04  9e-15
 7: -2.784