In [15]:
from custom_kernel_example import NodeLabelKernel
import os
import sys
import time
from grakel.datasets import fetch_dataset
from tudataset.tud_benchmark.auxiliarymethods import kernel_evaluation
import numpy as np


In [2]:
# Loads the ENZYMES dataset
print("Loading ENZYMES dataset")
ENZYMES_attr = fetch_dataset("ENZYMES", prefer_attr_nodes=True, verbose=False) #loads all node attributes
#ENZYMES_attr = fetch_dataset("ENZYMES", with_classes=True, verbose=False) #loads node labels

G, classes = ENZYMES_attr.data, ENZYMES_attr.target

Loading ENZYMES dataset


In [5]:
def generate_hyperkernel_feature_matrix(weights ,attr_index_list, G):
    ENZYMES_hyperkernel_features= []
    SSE_count = 1
    for i in range(0, len(G)):
        enzyme = [G[i][0]]

        attr = {}
        # iterate over ss elements
        for ss in range(0, len(G[i][1])):
            # calculate hyperkernel value
            attr[SSE_count] = 0
            for index, attribute_index in enumerate(attr_index_list):
                weight = weights[index]
                attr[SSE_count] += (weight * G[i][1][SSE_count][attribute_index])
            SSE_count += 1

        enzyme.append(attr)

        enzyme.append({})

        ENZYMES_hyperkernel_features.append(enzyme)
    return(ENZYMES_hyperkernel_features)

In [154]:
def generate_attribute_data(attribute_index, G):
    ENZYMES_attr_subset= []
    SSE_count = 1
    for i in range(0, len(G)):
        enzyme = [G[i][0]]

        attr = {}
        for ss in range(0, len(G[i][1])):
            attr[SSE_count] = G[i][1][SSE_count][attribute_index]
            SSE_count += 1

        enzyme.append(attr)

        enzyme.append({})

        ENZYMES_attr_subset.append(enzyme)
    return(ENZYMES_attr_subset)

In [137]:
all_matrices = []

for attr_index in range(0, 18): # 18 node attributes
    ENZYMES_attr_subset = generate_attribute_data(attr_index, G)
    G_subseted, classes = ENZYMES_attr_subset, ENZYMES_attr.target
    # initialize kernel
    label_kernel = NodeLabelKernel(normalize=True)
    label_kernel_gram = label_kernel.fit_transform(G_subseted)
    all_matrices.append(label_kernel_gram)

In [133]:
start = time.time()

# kernel svm evaluation
print("Starting kernel svm evaluation...") # ca 40 seconds
num_reps = 10
accuracy, std_10, std_100 = kernel_evaluation.kernel_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True)

print ("accuracy:", accuracy)
print ("standard deviations of all 10-CV runs:", std_10)
print ("standard deviations of all 100 runs:", std_100)

end = time.time()
print("Elapsed time: ",end - start)

Starting kernel svm evaluation...
accuracy: 63.71666666666666
standard deviations of all 10-CV runs: 1.0699169230469352
standard deviations of all 100 runs: 4.7775807452912575
Elapsed time:  491.98106694221497


In [None]:
weights = [1,1, 1 ,1 , 0.12, 0.12, 0.12, 0.4, 0.13, 0.14, 0.13]
attr_index_list= [0, 7, 8, 9, 10, 11, 12 ,13, 15, 16, 17]

hyper_attr = generate_hyperkernel_feature_matrix(weights, attr_index_list, G )

In [14]:
G_subseted, classes = hyper_attr, ENZYMES_attr.target
# initialize kernel
hyper_kernel = NodeLabelKernel(normalize=True)
hyper_kernel_gram = hyper_kernel.fit_transform(G_subseted)

start = time.time()

# kernel svm evaluation
print("Starting kernel svm evaluation...") # ca 40 seconds
num_reps = 10
accuracy, std_10, std_100 = kernel_evaluation.kernel_svm_evaluation([hyper_kernel_gram] , classes, num_repetitions=num_reps, all_std=True)

print ("accuracy:", accuracy)
print ("standard deviations of all 10-CV runs:", std_10)
print ("standard deviations of all 100 runs:", std_100)

end = time.time()
print("Elapsed time: ",end - start)

[[1. 1. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 7. 0.]
 [0. 0. 0. ... 1. 8. 1.]]
Starting kernel svm evaluation...
accuracy: 66.16666666666666
standard deviations of all 10-CV runs: 0.9036961141150666
standard deviations of all 100 runs: 6.383572667401852
Elapsed time:  5.808809041976929


In [17]:
np.savetxt("./precomputed_kernels/without_labels/ENZYMES_gram_matrix_hyperkernel_v1.csv",hyper_kernel_gram, delimiter=";")