In [2]:
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.datasets import load_iris
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from numpy import linalg as LA
from scipy.linalg import eigh
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# hyperparameters: Kernel, Regularization, Gamma

### Sacharias testing things out

In [38]:
def accuracy(t, y):
    val = 0.0
    N = len(t)
    for i in range(N):
        if t[i] == y[i]:
            val += 1
    
    return val / N

def rbf_kernel(X, sigma, diag=1):
    # Basic SVM with predefined kernel matrix
    N = X.shape[0]
    K = np.zeros((N,N))

    for i in range(N):
        for j in range(N):
            if i == j:
                K[i,j] = diag
            else:
                x_i = X[i, :].reshape(1,-1)
                x_j = X[j, :].reshape(1,-1)

                K[i, j] = np.exp(-cdist(x_i, x_j, 'sqeuclidean') / (sigma ** 2)) # euclidean?
    return K

def make_D_matrix(K):
    K_sum = np.sum(K, axis=1)
    D = np.diag(K_sum)

    return D

def make_L_matrix(K, D):
    D_temp = np.diag( np.diag(D) ** -0.5 )
    L = D_temp @ K @ D_temp
    
    w, v = LA.eig(L) # w = eigenvalues, v = normalized (unit “length”) eigenvectors
    
    return L

def step_transfer(L, k=2):    
    w, v = eigh(L)
    lambda_cut = w[-k]
    w_new = np.where(w >= lambda_cut, 1, 0)
    
    L_hat = np.dot(v, np.dot(np.diag(w_new), v.T))
    D_hat = np.diag(1/np.diag(L_hat))
    K_hat = D_hat**(1/2) @ L_hat @ D_hat**(1/2)
    
    return L_hat, D_hat, K_hat

def linear_step_transfer(L, k=2):
    w, v = eigh(L)
    lambda_cut = w[-k]
    w = np.where(w >= lambda_cut, w, 0)
    
    L_hat = np.dot(v, np.dot(np.diag(w), v.T))
    D_hat = np.diag(1/np.diag(L_hat))
    K_hat = D_hat**(1/2) @ L_hat @ D_hat**(1/2)

    return L_hat, D_hat, K_hat

def polynomial_transfer(L, D, K, t):
    L_hat = L ** t
    D_hat = np.diag(1/np.diag(L_hat))
    K_hat = D_hat**(1/2) @ D**(1/2) @ (LA.inv(D) @ K)**t @ D**(1/2) @ D_hat**(1/2)

    K_hat = preprocessing.scale(K_hat)
    return L_hat, D_hat, K_hat
    

def apply_transfer_func(L, D, K, type="linear", **kwargs):
    if type == "linear":
        return L, D, K
    if type == "step":
        k = kwargs['k']
        return step_transfer(L, k)
    if type == "linear_step":
        return linear_step_transfer(L)
    if type == "polynomial":
        t = kwargs['t']
        return polynomial_transfer(L, D, K, t)
        
    raise ValueError("wrong argument")


In [40]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9)

def run_SVM_test():
    # load data
    iris = load_iris()
    X = iris.data[:,:2]
    # X = iris.data
    y = iris.target

    X = X[y != 0]
    y = y[y != 0]
    
    # Shuffle data
    np.random.seed(40)
    n_sample = len(X)
    order = np.random.permutation(n_sample)
    X = X[order]
    y = y[order].astype(np.float)
    
    # Make Kernel
    K = rbf_kernel(X, 1)
    D = make_D_matrix(K)
    L = make_L_matrix(K, D)
    
    L, D, K = apply_transfer_func(L, D, K, "step", k=3)
    
    split_idx = 30
    
    
    # Remove data without labels, split into train and test
    K_train = K[:split_idx, :split_idx]
    y_train = y[:split_idx]
    
    K_test = K[split_idx:, :split_idx]
    y_test = y[split_idx:]
        
    # Run Kernel SVM
    clf = SVC(kernel="precomputed", C=1)
    clf.fit(K_train, y_train)
    
    y_pred = clf.predict(K_test)
    print("accuracy:", accuracy(y_pred, y_test))
    
    # Run Basic SVM
    #clf2 = SVC(kernel="linear", C=1)
    #clf2.fit(X[:70], Y_red)
    
    #y_pred2 = clf2.predict(X[70:100])
    #print("accuracy:", accuracy(y_pred2, Y[70:100]))    
    
run_SVM_test()


accuracy: 0.6714285714285714
