# Comparing Classifiers
## Firstly
This part I'm just defining classes described and defined [here](https://github.com/jonchar/ml-python/blob/master/SVM.ipynb)

So probably just scroll down a bit

In [1]:
import numpy as np
from numpy import array,zeros,vstack,repeat,ones,eye,ndarray
from cvxopt import *
import pylab as pl
from sklearn import preprocessing as pp
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA
from timeit import default_timer as timer

In [2]:
class Linear():
    def __call__(self, a, b):
        x = np.array(a)
        y = np.array(b)
        y = np.transpose(y)
        return np.dot(x, y)

class Polynomial():
    def __call__(self, a, b, p=2):
        x = np.array(a)
        y = np.array(b)
        y = np.transpose(y)
        return (1 + np.dot(x, y)) ** p

class Gaussian():
    def __call__(self, a, b, sigma=5.0):
        x = np.array(a)
        y = np.array(b)
        y = np.transpose(y)
        return np.exp(-np.linalg.norm(x-y)**2 / (2 * (sigma ** 2)))

In [3]:
class svm_problem():
    def __init__(self, C=1.0, gamma=1.0, delta=1.0, kernel=Gaussian()):
        self.C = C
        self.gamma = gamma
        self.delta = delta
        self.kernel = kernel

    def set_variables(self, X, Xstar, Y):
        if(isinstance(X, ndarray)):
            self.X = X
        else:
            self.X = array(X)
        if(isinstance(Xstar, ndarray)):
            self.Xstar = Xstar
        else:
            self.Xstar = array(Xstar)
        if(isinstance(Y, ndarray)):
            self.Y = Y
        else:
            self.Y = array(Y)
        self.num = len(self.X)
        self.dimensions = len(self.X[0])
        self.xi_xj = self.gram_matrix(self.X, self.X)
        self.xstari_xstarj = self.gram_matrix(self.Xstar, self.Xstar)
        self.yi_yj = self.gram_matrix(self.Y, self.Y)

    def gram_matrix(self, X1, X2):
        K = zeros((len(X1), len(X1)))
        for i in range(len(X1)):
            for j in range(len(X1)):
                K[i,j] = self.kernel(X1[i], X2[j])
        return K
    
    def gram_matrix2(self, X1, X2):
        kernel2 = Polynomial()
        K = zeros((len(X1), len(X1)))
        for i in range(len(X1)):
            for j in range(len(X1)):
                K[i,j] = kernel2(X1[i], X2[j])
        return K

In [4]:
class classifier():

    def __init__(self):
        self.w = 0
        self.b = 0
        self.alphas = []
        self.support_vectors = []

    def predict(self, x):
        return np.sign(np.dot(self.w,x)+self.b)
    
    def f_star(self, x, y): # This won't make sense now, but we come back to it later
        return y*(np.dot(self.w,x)+self.b)

In [5]:
class SVM():
    def get_name(self):
        return "SVM"
    def train(self, x, prob : svm_problem):
        x = x
        y = prob.Y
        C = prob.C

        NUM = x.shape[0]
        DIM = x.shape[1]

        K = y[:, None] * x # Yeah, this is a bit different so that it can work on x and x*
        K = np.dot(K, K.T)
        P = matrix(K, tc='d')
        q = matrix(-np.ones((NUM, 1)), tc='d')
        G1 = -np.eye(NUM)
        G2 = np.eye(NUM)
        G = np.vstack((G1, G2))
        G = matrix(G, tc='d')
        h1 = np.zeros(NUM).reshape(-1,1)
        h2 = np.repeat(C, NUM).reshape(-1,1)
        h = np.vstack((h1, h2))
        h = matrix(h, tc='d')
        A = matrix(y.reshape(1, -1), tc='d')
        b = matrix(np.zeros(1), tc='d')
        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        alphas = np.array(sol['x'])
        w = np.sum(alphas * y[:, None] * x, axis = 0)
        bacond1 = (alphas > 1e-8)
        bacond2 = (alphas < (C))
        bcond = np.array([a and b for a, b in zip(bacond1, bacond2)]).flatten()
        yS = y[bcond]
        xS = x[bcond]
        aS = alphas[bcond]
        sumTotal = 0
        for s in range(len(yS)):
            innerTotal = 0
            for m in range(len(yS)):
                am = aS[m]
                ym = yS[m]
                xm_xs = prob.kernel(xS[m], xS[s])
                innerTotal += am*ym*xm_xs
            sumTotal += yS[s] - innerTotal
        bias = sumTotal/len(yS)
        clf = classifier()
        clf.w = w
        clf.b = bias[0]
        clf.alphas = alphas
        clf.support_vectors = x[bacond1.flatten()]
        return clf

In [6]:
class SVMp():
    '''
    def get_name(self):
        return "SVM+"
    def train(self, prob : svm_problem):
        self.prob = prob
        self.C = self.prob.C

        self.L = self.prob.num

        self.x = self.prob.X
        self.xStar = self.prob.Xstar
        self.y = self.prob.Y

        self.gamma = self.prob.gamma

        P1 = (self.prob.xi_xj * self.prob.yi_yj) + self.gamma*(self.prob.xstari_xstarj)
        P2 = self.gamma*(self.prob.xstari_xstarj)
        P11 = np.hstack((P1, P2))
        P22 = np.hstack((P2, P2))
        P = np.vstack((P11, P22))

        q = np.hstack((np.repeat(-1, self.L),np.zeros(self.L)))

        positiveEye = np.eye(self.L, dtype='d')
        negativeEye = -np.eye(self.L, dtype='d')
        zeros = np.zeros((self.L, self.L))
        g1 = np.hstack((negativeEye, zeros))
        g2 = np.hstack((zeros, negativeEye))

        G = np.vstack((g1,g2))

        h1 = np.zeros(((self.L),1))
        h2 = np.repeat(self.C, (self.L)).reshape(-1,1)
        h = np.vstack((h1, h2))

        A1 = np.repeat(1, 2*self.L)
        A2 = np.hstack((self.y, np.zeros(self.L)))
        A = np.vstack((A1, A2))

        b = np.zeros(2)
        b = b.reshape(-1,1)

        P = matrix(P, tc='d')
        q = matrix(q, tc='d')
        G = matrix(G, tc='d')
        h = matrix(h, tc='d')
        A = matrix(A, tc='d')
        b = matrix(b, tc='d')

        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        alphasAndDeltas = np.array(sol['x'])
        self.alphas = alphasAndDeltas[:self.L]
        self.deltas = alphasAndDeltas[self.L:]

        # Not really writing about it, w as before        
        self.w = np.sum(self.alphas * self.y[:, None] * self.x, axis = 0)
        self.wStar = (1/self.gamma)*np.sum((self.alphas + self.deltas) * self.prob.Xstar, axis = 0)
        print("w* = ", self.wStar)
        #bias = self.getB()
        
        bacond = (self.alphas > 1e-5)
        bdcond = (self.deltas + self.C > 1e-5)

        bcond = np.array([a and b for a, b in zip(bacond, bdcond)]).flatten()
        ####
        
        clf = classifier()
        clf.w = self.w
        clf.b = self.getB()
        clf.alphas = self.alphas
        clf.support_vectors = self.x[(self.alphas > 1e-5).flatten()]
        clf1 = classifier()
        clf1.w = self.wStar
        clf1.b = self.getBstar()
        clf1.alphas = self.alphas
        clf1.support_vectors = self.prob.Xstar[bcond]
        return clf
    
    def F(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += self.alphas[j] * self.y[j] * self.prob.kernel(self.x[i], self.x[j])
        return runningTotal[0]
    
    def f(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += (self.alphas[j] + self.deltas[j]) * self.prob.kernel(self.xStar[i], self.xStar[j])
            if (self.alphas[j] + self.deltas[j] > -1e-5) and (self.alphas[j] + self.deltas[j] < 1e-5):
                print("This makes a < C ",j)
        return runningTotal[0]
    
    def sPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1-(self.f(i)/self.gamma)-self.F(i)
        return runningTotal
    
    def sNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += 1-(self.f(i)/self.gamma)-self.F(i)
        return runningTotal
    
    def nPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1
        return runningTotal
    
    def nNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += 1
        return runningTotal
    
    def getB(self):
        print(np.dot(self.alphas.T, self.y), np.sum((self.alphas + self.deltas), axis = 0))
        print(self.bPlusbStar())
        print(self.nPos())
        print(self.bMinusbStar())
        print(self.nNeg())
        return ((self.bPlusbStar()/self.nPos())+(self.bMinusbStar()/self.nNeg()))/2
    
    def getBstar(self):
        print(np.dot(self.alphas.T, self.y), np.sum((self.alphas + self.deltas), axis = 0))
        print(self.bPlusbStar())
        print(self.nPos())
        print(self.bMinusbStar())
        print(self.nNeg())
        return ((self.bPlusbStar()/self.nPos())-(self.bMinusbStar()/self.nNeg()))/2
    
    def bPlusbStar(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1 - np.dot(self.wStar, self.xStar[i]) - np.dot(self.w, self.x[i])
        return runningTotal
    
    
    def bMinusbStar(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += -1 + np.dot(self.wStar, self.xStar[i]) - np.dot(self.w, self.x[i])
        return runningTotal
    
    class SVMp():
    '''
    def get_name(self):
        return "SVM+"
    def train(self, prob : svm_problem):
        self.prob = prob
        self.C = self.prob.C

        self.L = self.prob.num

        self.x = self.prob.X
        self.xStar = self.prob.Xstar
        self.y = self.prob.Y

        self.gamma = self.prob.gamma

        P1 = (self.prob.xi_xj * self.prob.yi_yj) + self.gamma*(self.prob.xstari_xstarj)
        P2 = self.gamma*(self.prob.xstari_xstarj)
        P11 = np.hstack((P1, P2))
        P22 = np.hstack((P2, P2))
        P = np.vstack((P11, P22))

        q = np.hstack((np.repeat(-1, self.L),np.zeros(self.L)))

        positiveEye = np.eye(self.L, dtype='d')
        negativeEye = -np.eye(self.L, dtype='d')
        zeros = np.zeros((self.L, self.L))
        g1 = np.hstack((negativeEye, zeros))
        g2 = np.hstack((zeros, negativeEye))

        G = np.vstack((g1,g2))

        h1 = np.zeros(((self.L),1))
        h2 = np.repeat(self.C, (self.L)).reshape(-1,1)
        h = np.vstack((h1, h2))

        A1 = np.repeat(1, 2*self.L)
        A2 = np.hstack((self.y, np.zeros(self.L)))
        A = np.vstack((A1, A2))

        b = np.zeros(2)
        b = b.reshape(-1,1)

        P = matrix(P, tc='d')
        q = matrix(q, tc='d')
        G = matrix(G, tc='d')
        h = matrix(h, tc='d')
        A = matrix(A, tc='d')
        b = matrix(b, tc='d')

        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        alphasAndDeltas = np.array(sol['x'])
        self.alphas = alphasAndDeltas[:self.L]
        self.deltas = alphasAndDeltas[self.L:]

        # Not really writing about it, w as before        
        self.w = np.sum(self.alphas * self.y[:, None] * self.x, axis = 0)
        self.wStar = (1/self.gamma)*np.sum((self.alphas + self.deltas) * self.prob.Xstar, axis = 0)
        #bias = self.getB()
        
        bacond = (self.alphas > 1e-5)
        bdcond = (self.deltas + self.C > 1e-5)

        bcond = np.array([a and b for a, b in zip(bacond, bdcond)]).flatten()
        ####
        
        clf = classifier()
        clf.w = self.w
        clf.b = self.getB()
        clf.alphas = self.alphas
        clf.support_vectors = self.x[(self.alphas > 1e-5).flatten()]
        clf1 = classifier()
        clf1.w = self.wStar
        clf1.b = self.getBstar()
        clf1.alphas = self.alphas
        clf1.support_vectors = self.prob.Xstar[bcond]
        return clf #, clf1
    
    def F(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += self.alphas[j] * self.y[j] * self.prob.kernel(self.x[i], self.x[j])
        return runningTotal[0]
    
    def f(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += (self.alphas[j] + self.deltas[j]) * self.prob.kernel(self.xStar[i], self.xStar[j])
            if (self.alphas[j] + self.deltas[j] > -1e-5) and (self.alphas[j] + self.deltas[j] < 1e-5):
                print("This makes a < C ",j)
        return runningTotal[0]
    
    def sPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1-(self.f(i)/self.gamma)-self.F(i)
        return runningTotal
    
    def sNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += 1-(self.f(i)/self.gamma)-self.F(i)
        return runningTotal
    
    def nPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1
        return runningTotal
    
    def nNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += 1
        return runningTotal
    
    def getB(self):
        return ((self.bPlusbStar()/self.nPos())+(self.bMinusbStar()/self.nNeg()))/2
    
    def getBstar(self):
        return ((self.bPlusbStar()/self.nPos())-(self.bMinusbStar()/self.nNeg()))/2
    
    def bPlusbStar(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == 1:
                runningTotal += 1 - np.dot(self.wStar, self.xStar[i]) - np.dot(self.w, self.x[i])
        return runningTotal
    
    
    def bMinusbStar(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.y[i] == -1:
                runningTotal += -1 + np.dot(self.wStar, self.xStar[i]) - np.dot(self.w, self.x[i])
        return runningTotal

In [7]:
class SVMdp_simp():
    def get_name(self):
        return "SVMd+ - simplified approach"
    def train(self, prob : svm_problem):
        x = prob.X
        xStar = prob.Xstar
        y = prob.Y
        C = prob.C

        NUM = x.shape[0]
        DIM = x.shape[1]
        
        svm = SVM()
        xStar_clf = svm.train(xStar, prob)
        
        xi_star_amended = np.zeros(prob.num)
        for i in range(prob.num):
            output = (1- prob.Y[i]*(np.dot(xStar_clf.w,prob.Xstar[i])+xStar_clf.b))
            xi_star_amended[i] = max(0, output)

        Ky = prob.yi_yj
        Kx = prob.xi_xj
        K = Ky*Kx
        P = matrix(K, tc='d')
        q = matrix(-np.ones((NUM, 1)), tc='d')
        G1 = -np.eye(NUM)
        G2 = np.eye(NUM)
        G3 = xi_star_amended.reshape(1,-1)
        G = np.vstack((G1, G2))
        G = np.vstack((G, G3))
        G = matrix(G, tc='d')
        h1 = np.zeros(NUM).reshape(-1,1)
        h2 = np.repeat(C, NUM).reshape(-1,1)
        h3 = sum(xi_star_amended)*C
        h = np.vstack((h1, h2))
        h = np.vstack((h, h3))
        h = matrix(h, tc='d')
        A = matrix(y.reshape(1, -1), tc='d')
        b = matrix(np.zeros(1), tc='d')
        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        alphas = np.array(sol['x'])
        w = np.sum(alphas * y[:, None] * x, axis = 0)

        bacond1 = (alphas > 1e-8)
        bacond2 = (alphas < C)
        bcond = np.array([a and b for a, b in zip(bacond1, bacond2)]).flatten()

        yS = y[bcond]
        xS = x[bcond]
        aS = alphas[bcond]

        sumTotal = 0
        for s in range(len(yS)):
            innerTotal = 0
            for m in range(len(yS)):
                am = aS[m]
                ym = yS[m]
                xm_xs = prob.kernel(xS[m], xS[s])
                innerTotal += am*ym*xm_xs
            sumTotal += yS[s] - innerTotal

        bias = sumTotal/len(yS)

        clf = classifier()
        clf.w = w
        clf.b = bias
        clf.alphas = alphas
        clf.support_vectors = prob.X[bacond1.flatten()]
        return clf

In [8]:
class SVMdp():
    def get_name(self):
        return "SVMd+"
    def train(self, prob : svm_problem):
        self.prob = prob
        self.kernel = self.prob.kernel
        self.C = self.prob.C

        self.L = self.prob.num
        self.M = self.prob.dimensions

        self.x = self.prob.X
        self.y = self.prob.Y

        self.gamma = self.prob.gamma
        self.delta = self.prob.delta
        
        kernel = prob.kernel
        C = prob.C

        L = prob.num
        M = prob.dimensions

        x = prob.X
        y = prob.Y

        H11 = (prob.xi_xj * prob.yi_yj) + self.gamma*(prob.xstari_xstarj * prob.yi_yj)
        H12 = self.gamma*(prob.xstari_xstarj * prob.yi_yj)
        H1 = np.hstack((H11, H12))
        H2 = np.hstack((H12, H12))
        H = np.vstack((H1, H2))

        f = np.hstack((np.repeat(-1, L),np.zeros(L)))

        positiveEye = np.eye(L, dtype='d')
        negativeEye = -np.eye(L, dtype='d')
        zeros = np.zeros((L, L))
        g1 = np.hstack((zeros, negativeEye))
        g2 = np.hstack((negativeEye, zeros))
        g3 = np.hstack((positiveEye, positiveEye))

        G = np.vstack((g1,g2))
        G = np.vstack((G,g3))

        h1 = np.repeat(C, (L)).reshape(-1,1)
        h2 = np.zeros(((L),1))
        h2 = np.vstack((h1, h2))
        h3 = np.repeat((self.delta*C), L).reshape(-1,1)
        h = np.vstack((h2, h3))

        Aeq1 = np.hstack((prob.Y, np.zeros(L)))
        Aeq2 = np.hstack((prob.Y, prob.Y))
        Aeq = np.vstack((Aeq1, Aeq2))

        beq = np.zeros(2)
        beq = beq.reshape(-1,1)

        P = matrix(H, tc='d')
        q = matrix(f, tc='d')
        G = matrix(G, tc='d')
        h = matrix(h, tc='d')
        A = matrix(Aeq, tc='d')
        b = matrix(beq, tc='d')

        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        alphasAndDeltas = np.array(sol['x'])
        self.alphas = alphasAndDeltas[:L]
        self.deltas = alphasAndDeltas[L:]
        
        self.w = np.sum(self.alphas * self.y[:, None] * self.x, axis = 0)
        self.wStar = (1/self.gamma)*np.sum((self.alphas + self.deltas) * self.y[:, None] * self.prob.Xstar, axis = 0)
        
        bacond = (self.alphas > 1e-5)
        bdcond = (self.deltas + self.C > 1e-5)

        bcond = np.array([a and b for a, b in zip(bacond, bdcond)]).flatten()
        
        clf = classifier()
        clf.w = self.w
        self.b = self.getB()
        clf.b = self.b
        clf.alphas = self.alphas
        clf.support_vectors = self.x[bacond.flatten()]
        
        priv_clf = classifier()
        priv_clf.w = self.wStar
        priv_clf.b = self.getBstar()
        priv_clf.support_vectors = self.prob.Xstar[np.array(bacond).flatten()]
        return clf #, priv_clf
    
    def F(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += self.alphas[j] * self.y[j] * self.prob.kernel(self.x[i], self.x[j])
        return runningTotal[0]
    
    def f(self, i):
        runningTotal = 0
        for j in range(self.L):
            runningTotal += (self.alphas[j] + self.deltas[j]) * self.prob.kernel(self.xStar[i], self.xStar[j])
            if (self.alphas[j] + self.deltas[j] > -1e-5) and (self.alphas[j] + self.deltas[j] < 1e-5):
                print("This makes a < C ",j)
        return runningTotal[0]
    
    def sPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.deltas[i] -self.C < 1e-5 and self.y[i] == 1:
                runningTotal += 1-np.dot(self.w, self.prob.X[i])
        return runningTotal
    
    def sNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.deltas[i] -self.C < 1e-5 and self.y[i] == -1:
                runningTotal += -1 - np.dot(self.w, self.prob.X[i])
        return runningTotal
    
    def nPos(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.deltas[i] -self.C < 1e-5 and self.y[i] == 1:
                runningTotal += 1
        return runningTotal
    
    def nNeg(self):
        runningTotal = 0
        for i in range(self.L):
            if self.alphas[i] > 1e-5 and self.deltas[i] -self.C < 1e-5 and self.y[i] == -1:
                runningTotal += 1
        return runningTotal
    
    def getB(self):
        return ((self.sPos()/self.nPos())+(self.sNeg()/self.nNeg()))/2
    
    def q(self):
        runningTotal = 0
        for i in range(self.L):
            if self.deltas[i] + self.C > 1e-5:
                runningTotal += np.dot(self.w, self.prob.X[i])/2 - np.dot(self.wStar, self.prob.Xstar[i])
        return runningTotal
    
    def getBstar(self):
        return self.q() / self.L

In [9]:
def constructProblem(data, xIndices, xStarIndices, yIndex):
    x = data[:,[xIndices]].astype(np.float)
    x = x[:,0]
    xStar = data[:,[xStarIndices]].astype(np.float)
    xStar = xStar[:,0]
    y = data[:,[yIndex]].astype(np.float).flatten()
    prob = svm_problem()
    prob.set_variables(x, xStar, y)
    return prob

In [10]:
def get_accuracy(tp, fp, fn, tn):
    return (tp+tn)/(tp+fp+fn+tn+0.000001)

In [11]:
def get_error(tp, fp, fn, tn):
    return (fp+fn)/(tp+fp+fn+tn+0.000001)

In [12]:
def get_recall(tp, fp, fn, tn):
    return (tp)/(tp+fn+0.000001)

In [13]:
def get_specificity(tp, fp, fn, tn):
    return (tn)/(fp+tn+0.000001)

In [14]:
def get_precision(tp, fp, fn, tn):
    return (tp)/(tp+fp+0.000001)

In [15]:
def get_prevalence(tp, fp, fn, tn):
    return (tp+fn)/(tp+fp+fn+tn+0.000001)

# Gawd I need to tidy the above up
### But making progress, so just going to power on and come back to this later

In [16]:
def get_array(file):
    return np.load(file)

In [17]:
'''test_labels_1_0 = get_array("Data/Dataset137/tech137-0-0-test_labels.npy")
test_x_1_0 = get_array("Data/Dataset137/tech137-0-0-test_normal.npy")
train_y_1_0 = get_array("Data/Dataset137/tech137-0-0-train_labels.npy")
train_x_1_0 = get_array("Data/Dataset137/tech137-0-0-train_normal.npy")
train_xS_1_0 = get_array("Data/Dataset137/tech137-0-0-train_priv.npy")
prob_1_0 = svm_problem()
prob_1_0.set_variables(train_x_1_0, train_xS_1_0, train_y_1_0)

test_labels_1_1 = get_array("Data/Dataset137/tech137-0-1-test_labels.npy")
test_x_1_1 = get_array("Data/Dataset137/tech137-0-1-test_normal.npy")
train_y_1_1 = get_array("Data/Dataset137/tech137-0-1-train_labels.npy")
train_x_1_1 = get_array("Data/Dataset137/tech137-0-1-train_normal.npy")
train_xS_1_1 = get_array("Data/Dataset137/tech137-0-1-train_priv.npy")
prob_1_1 = svm_problem()
prob_1_1.set_variables(train_x_1_1, train_xS_1_1, train_y_1_1)

test_labels_1_2 = get_array("Data/Dataset137/tech137-0-2-test_labels.npy")
test_x_1_2 = get_array("Data/Dataset137/tech137-0-2-test_normal.npy")
train_y_1_2 = get_array("Data/Dataset137/tech137-0-2-train_labels.npy")
train_x_1_2 = get_array("Data/Dataset137/tech137-0-2-train_normal.npy")
train_xS_1_2 = get_array("Data/Dataset137/tech137-0-2-train_priv.npy")
prob_1_2 = svm_problem()
prob_1_2.set_variables(train_x_1_2, train_xS_1_2, train_y_1_2)

test_labels_1_3 = get_array("Data/Dataset137/tech137-0-3-test_labels.npy")
test_x_1_3 = get_array("Data/Dataset137/tech137-0-3-test_normal.npy")
train_y_1_3 = get_array("Data/Dataset137/tech137-0-3-train_labels.npy")
train_x_1_3 = get_array("Data/Dataset137/tech137-0-3-train_normal.npy")
train_xS_1_3 = get_array("Data/Dataset137/tech137-0-3-train_priv.npy")
prob_1_3 = svm_problem()
prob_1_3.set_variables(train_x_1_3, train_xS_1_3, train_y_1_3)

test_labels_1_4 = get_array("Data/Dataset137/tech137-0-4-test_labels.npy")
test_x_1_4 = get_array("Data/Dataset137/tech137-0-4-test_normal.npy")
train_y_1_4 = get_array("Data/Dataset137/tech137-0-4-train_labels.npy")
train_x_1_4 = get_array("Data/Dataset137/tech137-0-4-train_normal.npy")
train_xS_1_4 = get_array("Data/Dataset137/tech137-0-4-train_priv.npy")
prob_1_4 = svm_problem()
prob_1_4.set_variables(train_x_1_4, train_xS_1_4, train_y_1_4)

test_labels_1_5 = get_array("Data/Dataset137/tech137-0-5-test_labels.npy")
test_x_1_5 = get_array("Data/Dataset137/tech137-0-5-test_normal.npy")
train_y_1_5 = get_array("Data/Dataset137/tech137-0-5-train_labels.npy")
train_x_1_5 = get_array("Data/Dataset137/tech137-0-5-train_normal.npy")
train_xS_1_5 = get_array("Data/Dataset137/tech137-0-5-train_priv.npy")
prob_1_5 = svm_problem()
prob_1_5.set_variables(train_x_1_5, train_xS_1_5, train_y_1_5)

test_labels_1_6 = get_array("Data/Dataset137/tech137-0-6-test_labels.npy")
test_x_1_6 = get_array("Data/Dataset137/tech137-0-6-test_normal.npy")
train_y_1_6 = get_array("Data/Dataset137/tech137-0-6-train_labels.npy")
train_x_1_6 = get_array("Data/Dataset137/tech137-0-6-train_normal.npy")
train_xS_1_6 = get_array("Data/Dataset137/tech137-0-6-train_priv.npy")
prob_1_6 = svm_problem()
prob_1_6.set_variables(train_x_1_6, train_xS_1_6, train_y_1_6)

test_labels_1_7 = get_array("Data/Dataset137/tech137-0-7-test_labels.npy")
test_x_1_7 = get_array("Data/Dataset137/tech137-0-7-test_normal.npy")
train_y_1_7 = get_array("Data/Dataset137/tech137-0-7-train_labels.npy")
train_x_1_7 = get_array("Data/Dataset137/tech137-0-7-train_normal.npy")
train_xS_1_7 = get_array("Data/Dataset137/tech137-0-7-train_priv.npy")
prob_1_7 = svm_problem()
prob_1_7.set_variables(train_x_1_7, train_xS_1_7, train_y_1_7)

test_labels_1_8 = get_array("Data/Dataset137/tech137-0-8-test_labels.npy")
test_x_1_8 = get_array("Data/Dataset137/tech137-0-8-test_normal.npy")
train_y_1_8 = get_array("Data/Dataset137/tech137-0-8-train_labels.npy")
train_x_1_8 = get_array("Data/Dataset137/tech137-0-8-train_normal.npy")
train_xS_1_8 = get_array("Data/Dataset137/tech137-0-8-train_priv.npy")
prob_1_8 = svm_problem()
prob_1_8.set_variables(train_x_1_8, train_xS_1_8, train_y_1_8)

test_labels_1_9 = get_array("Data/Dataset137/tech137-0-9-test_labels.npy")
test_x_1_9 = get_array("Data/Dataset137/tech137-0-9-test_normal.npy")
train_y_1_9 = get_array("Data/Dataset137/tech137-0-9-train_labels.npy")
train_x_1_9 = get_array("Data/Dataset137/tech137-0-9-train_normal.npy")
train_xS_1_9 = get_array("Data/Dataset137/tech137-0-9-train_priv.npy")
prob_1_9 = svm_problem()
prob_1_9.set_variables(train_x_1_9, train_xS_1_9, train_y_1_9)

prob_1 = [prob_1_0, prob_1_1, prob_1_2, prob_1_3, prob_1_4, prob_1_5, prob_1_6, prob_1_7, prob_1_8, prob_1_9]

test_labels_2 = get_array("Data/Dataset174/tech174-0-0-test_labels.npy")
test_x_2 = get_array("Data/Dataset174/tech174-0-0-test_normal.npy")
train_y_2 = get_array("Data/Dataset174/tech174-0-0-train_labels.npy")
train_x_2 = get_array("Data/Dataset174/tech174-0-0-train_normal.npy")
train_xS_2 = get_array("Data/Dataset174/tech174-0-0-train_priv.npy")
prob_2 = svm_problem()
prob_2.set_variables(train_x_2, train_xS_2, train_y_2)

test_labels_3 = get_array("Data/Dataset197/tech197-0-0-test_labels.npy")
test_x_3 = get_array("Data/Dataset197/tech197-0-0-test_normal.npy")
train_y_3 = get_array("Data/Dataset197/tech197-0-0-train_labels.npy")
train_x_3 = get_array("Data/Dataset197/tech197-0-0-train_normal.npy")
train_xS_3 = get_array("Data/Dataset197/tech197-0-0-train_priv.npy")
prob_3 = svm_problem()
prob_3.set_variables(train_x_3, train_xS_3, train_y_3)

test_labels_4 = get_array("Data/Dataset219/tech219-0-0-test_labels.npy")
test_x_4 = get_array("Data/Dataset219/tech219-0-0-test_normal.npy")
train_y_4 = get_array("Data/Dataset219/tech219-0-0-train_labels.npy")
train_x_4 = get_array("Data/Dataset219/tech219-0-0-train_normal.npy")
train_xS_4 = get_array("Data/Dataset219/tech219-0-0-train_priv.npy")
prob_4 = svm_problem()
prob_4.set_variables(train_x_4, train_xS_4, train_y_4)

test_labels_5 = get_array("Data/Dataset254/tech254-0-0-test_labels.npy")
test_x_5 = get_array("Data/Dataset254/tech254-0-0-test_normal.npy")
train_y_5 = get_array("Data/Dataset254/tech254-0-0-train_labels.npy")
train_x_5 = get_array("Data/Dataset254/tech254-0-0-train_normal.npy")
train_xS_5 = get_array("Data/Dataset254/tech254-0-0-train_priv.npy")
prob_5 = svm_problem()
prob_5.set_variables(train_x_5, train_xS_5, train_y_5) '''

'test_labels_1_0 = get_array("Data/Dataset137/tech137-0-0-test_labels.npy")\ntest_x_1_0 = get_array("Data/Dataset137/tech137-0-0-test_normal.npy")\ntrain_y_1_0 = get_array("Data/Dataset137/tech137-0-0-train_labels.npy")\ntrain_x_1_0 = get_array("Data/Dataset137/tech137-0-0-train_normal.npy")\ntrain_xS_1_0 = get_array("Data/Dataset137/tech137-0-0-train_priv.npy")\nprob_1_0 = svm_problem()\nprob_1_0.set_variables(train_x_1_0, train_xS_1_0, train_y_1_0)\n\ntest_labels_1_1 = get_array("Data/Dataset137/tech137-0-1-test_labels.npy")\ntest_x_1_1 = get_array("Data/Dataset137/tech137-0-1-test_normal.npy")\ntrain_y_1_1 = get_array("Data/Dataset137/tech137-0-1-train_labels.npy")\ntrain_x_1_1 = get_array("Data/Dataset137/tech137-0-1-train_normal.npy")\ntrain_xS_1_1 = get_array("Data/Dataset137/tech137-0-1-train_priv.npy")\nprob_1_1 = svm_problem()\nprob_1_1.set_variables(train_x_1_1, train_xS_1_1, train_y_1_1)\n\ntest_labels_1_2 = get_array("Data/Dataset137/tech137-0-2-test_labels.npy")\ntest_x_1_

In [18]:
def t(data, model, test_x, test_y):
    svm = model
    
    if isinstance(svm, SVM):
        clf = svm.train(data.X, data)
    else:
        clf = svm.train(data)
    predictions = []
    for test_point in test_x:
        predictions.append(clf.predict(test_point))
        
    tp = 0
    fp = 0
    fn = 0
    tn = 0
    for i in range(len(test_y)):
        if test_y[i] == 1 and predictions[i] == 1:
            tp += 1
        if test_y[i] == -1 and predictions[i] == 1:
            fp += 1
        if test_y[i] == 1 and predictions[i] == -1:
            fn += 1
        if test_y[i] == -1 and predictions[i] == -1:
            tn += 1
    return (tp, fp, fn, tn)

def comp(clf, prob, test_x, test_y):
    tp = 0
    fp = 0
    fn = 0
    tn = 0
    average_time = 0
    #for fold in probs:
    start = timer()
    a, b, c, d = t(prob, clf, test_x, test_y)
    average_time += timer() - start
    tp += a
    fp += b
    fn += c
    tn += d
    return tp,fp,fn,tn,average_time

def grid_search(C, Delta, Gamma, dataset):
    clfs = [SVM(), SVMdp_simp(), SVMdp(), SVMp()]

    prob_data = []
    for i in range(10):
        test_labels = get_array("Data/Dataset"+str(dataset)+"/tech"+str(dataset)+"-0-"+str(i)+"-test_labels.npy")
        test_x = get_array("Data/Dataset"+str(dataset)+"/tech"+str(dataset)+"-0-"+str(i)+"-test_normal.npy")
        train_y = get_array("Data/Dataset"+str(dataset)+"/tech"+str(dataset)+"-0-"+str(i)+"-train_labels.npy")
        train_x = get_array("Data/Dataset"+str(dataset)+"/tech"+str(dataset)+"-0-"+str(i)+"-train_normal.npy")
        train_xS = get_array("Data/Dataset"+str(dataset)+"/tech"+str(dataset)+"-0-"+str(i)+"-train_priv.npy")
        prob_data.append([test_labels, test_x, train_y, train_x, train_xS])
        
    for c_ in C:
        for delta_ in Delta:
            for gamma_ in Gamma:
                svm_tp = 0
                svm_fp = 0
                svm_fn = 0
                svm_tn = 0
                svm_avg_time = 0
                
                svmdsa_tp = 0
                svmdsa_fp = 0
                svmdsa_fn = 0
                svmdsa_tn = 0
                svmdsa_avg_time = 0
                
                svmdp_tp = 0
                svmdp_fp = 0
                svmdp_fn = 0
                svmdp_tn = 0
                svmdp_avg_time = 0
                
                svmp_tp = 0
                svmp_fp = 0
                svmp_fn = 0
                svmp_tn = 0
                svmp_avg_time = 0
                
                for data in prob_data:
                    prob = svm_problem(C=c_, delta=delta_, gamma=gamma_)
                    prob.set_variables(data[3], data[4], data[2])
                    
                    for clf in clfs:
                        tp = 0
                        fp = 0
                        fn = 0
                        tn = 0
                        avg_time = 0

                        tp_,fp_,fn_,tn_,avg_time_ = comp(clf, prob, data[1], data[0])
                        
                        if isinstance(clf, SVM):
                            svm_tp += tp_
                            svm_fp += fp_
                            svm_fn += fn_
                            svm_tn += tn_
                            svm_avg_time += avg_time
                        elif isinstance(clf, SVMdp_simp):
                            svmdsa_tp += tp_
                            svmdsa_fp += fp_
                            svmdsa_fn += fn_
                            svmdsa_tn += tn_
                            svmdsa_avg_time += avg_time
                        elif isinstance(clf, SVMdp):
                            svmdp_tp += tp_
                            svmdp_fp += fp_
                            svmdp_fn += fn_
                            svmdp_tn += tn_
                            svmdp_avg_time += avg_time
                        else: 
                            svmp_tp += tp_
                            svmp_fp += fp_
                            svmp_fn += fn_
                            svmp_tn += tn_
                            svmp_avg_time += avg_time
                
            for clf in clfs:
                tp=0
                fp=0
                fn=0
                fn=0
                avg_time=0
                if isinstance(clf, SVM):
                        tp = svm_tp
                        fp = svm_fp
                        fn = svm_fn
                        tn = svm_tn
                        avg_time = svm_avg_time
                elif isinstance(clf, SVMdp_simp):
                    tp = svmdsa_tp
                    fp = svmdsa_fp
                    fn = svmdsa_fn
                    tn = svmdsa_tn
                    avg_time = svmdsa_avg_time
                elif isinstance(clf, SVMdp):
                    tp = svmdp_tp
                    fp = svmdp_fp
                    fn = svmdp_fn
                    tn = svmdp_tn
                    avg_time = svmdp_avg_time
                else:
                    tp = svmp_tp
                    fp = svmp_fp
                    fn = svmp_fn
                    tn = svmp_tn
                    avg_time = svmp_avg_time

                accuracy = get_accuracy(tp, fp, fn, tn)
                error = get_error(tp, fp, fn, tn)
                recall = get_recall(tp, fp, fn, tn)
                specificity = get_specificity(tp, fp, fn, tn)
                precision = get_precision(tp, fp, fn, tn)
                prevalence = get_prevalence(tp, fp, fn, tn)
                print(clf.get_name())
                print("C = ", c_, " Delta = ", delta_, " Gamma = ", gamma_)
                print("=====================================")
                print("|          |  Pred: YES |  Pred: NO |")
                print("+----------+------------+-----------+")
                print("| Act: YES |  ", '{:7d}'.format(tp), " | ", '{:7d}'.format(fn), " |")
                print("+----------+------------+-----------+")
                print("| Act: NO  |  ", '{:7d}'.format(fp), " | ", '{:7d}'.format(tn), " |")
                print("+----------+------------+-----------+")

                print("accuracy = ", accuracy)
                print("error = ", error)
                print("recall = ", recall)
                print("specificity = ", specificity)
                print("precision = ", precision)
                print("prevalence = ", prevalence)
                print("average time to train classifier = ", avg_time, "\n")

    

In [19]:
grid_search(C=[0.1, 1.0, 10], Delta=[1.0], Gamma=[1.0], dataset=137)

SVM
C =  0.1  Delta =  1.0  Gamma =  1.0
|          |  Pred: YES |  Pred: NO |
+----------+------------+-----------+
| Act: YES |        82  |        6  |
+----------+------------+-----------+
| Act: NO  |        10  |       81  |
+----------+------------+-----------+
accuracy =  0.9106145200524328
error =  0.089385474360975
recall =  0.931818171229339
specificity =  0.8901098803284628
precision =  0.8913043381379964
prevalence =  0.4916201089853625
average time to train classifier =  0 

SVMd+ - simplified approach
C =  0.1  Delta =  1.0  Gamma =  1.0
|          |  Pred: YES |  Pred: NO |
+----------+------------+-----------+
| Act: YES |        88  |        0  |
+----------+------------+-----------+
| Act: NO  |        13  |       78  |
+----------+------------+-----------+
accuracy =  0.9273742964951157
error =  0.0726256979182922
recall =  0.9999999886363637
specificity =  0.857142847723705
precision =  0.8712871200862662
prevalence =  0.4916201089853625
average time to train class

In [20]:
#comp(prob_1, test_x_1, test_labels_1)

In [21]:
#comp(prob_2, test_x_2, test_labels_2)

In [22]:
#comp(prob_3, test_x_3, test_labels_3)

In [23]:
#comp(prob_4, test_x_4, test_labels_4)

In [24]:
#comp(prob_5, test_x_5, test_labels_5)