In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [1]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import itertools
import cvxopt
import math

cvxopt.solvers.options['show_progress'] = False

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
vec1 = [1.690,0.991,1.690,1.389,1.690,1.690,1.690,1.690,1.690,1.690,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
vec2 = [0,0.991,0,0,0,0,0,0,0,0,1.690,1.690,0,0,0,0,0,0,0,0,0,1.389,0,0,0,0,0,0,0,0]
vec3 = [0,0.991,0,0,0,0,0,0,0,0,0,0,0,1.690,1.690,1.690,1.690,0,0,0,0,0,0,0,0,0,0,0,0,0]
vec4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.690,1.690,1.690,1.690,1.389,1.690,1.690,1.690,1.690,1.690,0,0,0]
vec5 = [0,1.982,0,0,0,0,0,0,0,0,0,0,1.690,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
vec6 = [0,0.991,0,1.389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.690,1.690,1.690]

In [3]:
all_vec = [vec1, vec2, vec3, vec4, vec5, vec6]
all_y = [1,1,1,-1,-1,-1]

In [7]:
def linear_kernel(**kwargs):
    def f(x1, x2):
        return np.inner(x1, x2)
    return f

def polynomial_kernel(power, coef, **kwargs):
    def f(x1, x2):
        return (np.inner(x1, x2) + coef)**power
    return f

def rbf_kernel(gamma, **kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2) ** 2
        return np.exp(-gamma * distance)
    return f


class SupportVectorMachine(object):
    def __init__(self, C=1, kernel=polynomial_kernel, power=2, gamma=None, coef=1):
        self.C = C
        self.kernel = kernel
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.alpha = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.b = None
    
    def matprint(self, mat, fmt="g"):
        col_maxes = [max([len(("{:"+fmt+"}").format(x)) for x in col]) for col in mat.T]
        for x in mat:
            for i, y in enumerate(x):
                print(("{:"+str(col_maxes[i])+fmt+"}").format(y), end="  ")
            print("")
            
    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        
        print("Matrix X data latih")
        print("--------------------------------------")
        self.matprint(X)
        print("\n")
        
        n_samples, n_features = np.shape(X)

        if not self.gamma:
            self.gamma = 1 / n_features

        self.kernel = self.kernel(
            power=self.power,
            gamma=self.gamma,
            coef=self.coef)
        
        kernel_matrix = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                kernel_matrix[i, j] = self.kernel(X[i], X[j])
        
        print("Hasil kernel polynomial terhadap matrix X")
        print("--------------------------------------")
        self.matprint(np.array(kernel_matrix))
        print("\n")
        
        P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1, n_samples), tc='d')
        b = cvxopt.matrix(0, tc='d')

        G_max = np.identity(n_samples) * -1
        G_min = np.identity(n_samples)
        G = cvxopt.matrix(np.vstack((G_max, G_min)))
        h_max = cvxopt.matrix(np.zeros(n_samples))
        h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
        h = cvxopt.matrix(np.vstack((h_max, h_min)))
        
        print("Persamaan P Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(P))
        print("\n")
        
        print("Persamaan q Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(q))
        print("\n")
        
        print("Persamaan A Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(A))
        print("\n")
        
        print("Persamaan b Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(b))
        print("\n")
        
        print("Persamaan G Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(G))
        print("\n")
        
        print("Persamaan h Quadratic")
        print("--------------------------------------")
        self.matprint(np.array(h))
        print("\n")
        
        minimization = cvxopt.solvers.qp(P, q, G, h, A, b)
        
        alpha = np.ravel(minimization['x'])
        
        idx = alpha > 1e-7
        ind = np.arange(len(alpha))[idx]
        self.alpha = alpha[idx]
        self.support_vectors = X[idx]
        self.support_vector_labels = y[idx]
        
        print("Total Alpha\n--------------------------------------")
        for i in alpha: print(i)
        print("\n")
        
        print("Alpha Valid")
        print("--------------------------------------")
        for i in self.alpha: print(i)
        print("\n")
        
        print("Support Vector")
        print("--------------------------------------")
        for i in self.support_vectors: print(i)
        print("\n")
        
        print("Support Vector Label")
        print("--------------------------------------")
        for i in self.support_vector_labels: print(i)
        print("\n")

        self.b = 0
        for n in range(len(self.alpha)):
            self.b += self.support_vector_labels[n]
            self.b -= np.sum(self.alpha * self.support_vector_labels * kernel_matrix[ind[n], idx])
    
        self.b /= len(self.alpha)
        print("Bias\n--------------------------------------\n", self.b, "\n")

    def p(self, X):
        y_predict = np.zeros(len(X))
        for i in range(len(X)):
            s = 0
            for a, sv_y, sv in zip(self.alpha, self.support_vector_labels, self.support_vectors):
                s += a * sv_y * self.kernel(X[i], sv)
            y_predict[i] = s
        return y_predict + self.b
    
    def predict_proba(self, x):
        all_ = self.p(x)
        all_[all_ >= 1] = 1
        all_[all_ < -1] = -1
        val = []
        for i in all_:
            if i < 1:
                temp = (-1.0+i)/-2.0
                val.append([temp, 1-temp])
            else:
                temp = (1.0+i)/2.0
                val.append([1-temp, temp])
        return val
    
    def predict(self, X):
        return np.sign(self.p(X))

In [8]:
v = SupportVectorMachine()

In [9]:
v.fit(all_vec, all_y)

Matrix X data latih
--------------------------------------
1.69  0.991  1.69  1.389  1.69  1.69  1.69  1.69  1.69  1.69     0     0     0     0     0     0     0     0     0     0     0      0     0     0     0     0     0     0     0     0  
   0  0.991     0      0     0     0     0     0     0     0  1.69  1.69     0     0     0     0     0     0     0     0     0  1.389     0     0     0     0     0     0     0     0  
   0  0.991     0      0     0     0     0     0     0     0     0     0     0  1.69  1.69  1.69  1.69     0     0     0     0      0     0     0     0     0     0     0     0     0  
   0      0     0      0     0     0     0     0     0     0     0     0     0     0     0     0     0  1.69  1.69  1.69  1.69  1.389  1.69  1.69  1.69  1.69  1.69     0     0     0  
   0  1.982     0      0     0     0     0     0     0     0     0     0  1.69     0     0     0     0     0     0     0     0      0     0     0     0     0     0     0     0     0  
   0  0.991     0  1.

In [200]:
v.predict(all_vec)

array([ 1.,  1.,  1., -1., -1., -1.])

In [201]:
v.predict_proba(all_vec)

[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]

In [245]:
a = np.array([[1,2], [5,6]])

In [246]:
a.T

array([[1, 5],
       [2, 6]])