In [1]:
from libsvm.svmutil import *
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import cvxopt
from cvxopt import matrix,solvers
import random
import time

In [2]:
#C: regularization parameter
#tol: numerical tolerance
#max passes: max # of times to iterate over α’s without changing


In [3]:
def lin_ker(Xi,Xj):
    return np.dot(Xi,Xj)

In [4]:
def lin_class(lam, x, X, Y, b):
    sum=0
    for i in range(len(Y)):
        sum+=lam[i] * Y[i] * (np.dot(X[i], x))
    sum+=b
    return sum

In [5]:
def findL(Yi,Yj,lami,lamj,C):
    if Yi!= Yj:
        return max(0, lamj - lami)
    else:
        return max(0, lami + lamj - C)

In [6]:
def findH(Yi,Yj,lami,lamj,C):
    if(Yi==Yj):
        return min(C,lami+lamj)
    else:
        return min(C,C-lami+lamj)

In [7]:
def neta(Xi,Xj):
    return 2 * (np.inner(Xi, Xj)) - np.inner(Xi, Xi) - np.inner(Xj, Xj)

In [8]:
def lin_error(Xi, Yi, lam, X,Y,b):
    fxi = lin_class(lam, Xi, X, Y, b)
    Ei = fxi - Yi
    
    return Ei


In [9]:
def compute_lamj(Yj,lamj, Ei, Ej, neta):
    return lamj - ((Yj * (Ei - Ej)) / neta)

In [10]:
def clip_lamj(lamj,L,H):
    if(lamj>H):
        return H
    elif(lamj<L):
        return L
    else:
        
        return lamj
    

In [11]:
def compute_lami(lami, Yi, Yj, lamj_old, lamj):
    return lami + (Yi * Yj) * (lamj_old - lamj)

In [12]:
def findbs(b, Xi, Yi, lami, lamj, Ei, Ej, Xj, Yj, lami_old, lamj_old):
    b1 = (b - Ei)-(Yi*(lami - lami_old) * np.inner(Xi, Xi)) - (Yj * (lamj - lamj_old) * np.inner(Xi, Xj)) 
    b2 = (b - Ej)-(Yi*(lami - lami_old) * np.inner(Xi, Xj)) - (Yj * (lamj - lamj_old) * np.inner(Xj, Xj)) 

    return b1, b2

def compute_b(b1, b2, lami, lamj, C):
    if lami > 0 and lami < C:
        return b1
    elif lamj > 0 and lamj < C:
        return b2
    else:
        return (b1 + b2) / 2

In [13]:
def gen_j(m, i):
    random.seed(time.time())
    
    j = random.randint(0, m-1)

    while i == j:
        j = random.randint(0, m-1)

    return j

In [20]:
def SMO(C, tol, max_passes, X, Y):
    m = len(Y)    
    b = 0
    lams = np.zeros(m)
    lams_old = np.zeros(m)
    passes = 0

    while(passes < max_passes):
        changedLam=0
        for i in range(m):
            Ei =lin_error(X[i], Y[i], lams, X,Y,b) #calculate_error(X[i], y[i], alphas[i], X, b)

            #print(Ei)
        
            if ((Y[i] * Ei < (-1 * tol) and lams[i] < C) or (Y[i] * Ei > tol and lams[i] > 0)):
                j = gen_j(m, i)
            
                Ej = lin_error(X[j], Y[j], lams, X,Y,b)  #calculate_error(X[j], y[j], alphas[j], X, b)
                #print(Ej)
                
                lams_old[i], lams_old[j] = lams[i], lams[j]
                
                L = findL(Y[i],Y[j],lams[i],lams[j],C)
                H = findH(Y[i],Y[j],lams[i],lams[j],C) 
                
                if L == H:
                    continue

                eta = neta(X[i], X[j])

                if eta >= 0:
                    continue
                    
                lam1=compute_lamj(Y[j],lams[j], Ei, Ej, eta)

                lams[j] = clip_lamj(lam1, L, H)
                

                if abs(lams[j] - lams_old[j]) < 1e-3:
                    continue
                
                lams[i] = compute_lami(lams[i], Y[i], Y[j], lams_old[j], lams[j])
                
                b1,b2=findbs(b, X[i], Y[i], lams[i], lams[j], Ei, Ej, X[j], Y[j], lams_old[i], lams_old[j])
            
                b = compute_b(b1, b2, lams[i], lams[j], C)
        
                changedLam += 1 
        
        if changedLam == 0:
            passes += 1
        else:
            passes = 0

    return lams, b


In [21]:
def predict(lam,b,test_data):
    sv= lam>0
    sv_lambda= lam[sv]  # Represent lambdas of all support vectors
    sv_fea= train_data[sv]     # Represent all support vectors
    sv_label = label_train[sv]
    y_predict = np.zeros(len(test_data))
    for i in range(len(test_data)):
        s = 0
        for a, sv_y, sv in zip(sv_lambda, sv_label, sv_fea):
            s += a * sv_y * np.dot(test_data[i], sv)
        y_predict[i] = s
    y_predict=np.array(y_predict)+b
    

    return y_predict

In [22]:
df = pd.read_csv('C:/Users/surya/Downloads/2019EE10481/2019EE10481.csv',header = None,prefix='Column') 
labelsAll= df['Column25'].unique() #unique labels
labels=labelsAll[0:2] # labels for Binary Clasification

df1=df[(df.Column25==labels[0]) | (df.Column25==labels[1])]

In [23]:
df3=df1.Column25

label=np.array(df3)
for i in range(len(label)):
    if(label[i]==2.0):
        label[i]=1.0
    else:
        label[i]=-1.0
label_train=label[0:177]
label_test=label[177:577]


In [24]:
df1=df1.iloc[:,0:25:1]
data = df1.values #To convert data frame into numpy array
train_data=data[0:177]
test_data=data[177:577]

In [33]:
lam,b=SMO(1/(2**(-5.63)), 1e-7, 25, train_data, label_train)
y_predict=predict(lam,b,test_data)
y_predict=np.sign(y_predict)
label_test=np.array(label_test)

Accuracy_table=np.ravel(label_test==y_predict)
count=0
for k in range(len(label_test)):
    if(Accuracy_table[k]==True):
        count+=1
print(count/4)

94.25
