In [1]:
from auxiliarymethods import datasets as dp
from auxiliarymethods.reader import tud_to_networkx
import auxiliarymethods.auxiliary_methods as aux
import os
import numpy as np
from lib import *
import pickle


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# hyper parameter
iterations = 5 # weierfeiler-lehman iterations
k = 120
alpha = .85
delta_affine = 1.5
delta_dist = .5
min_samples = 2

In [3]:
#pickling
pickle_path = 'pickles'
# files can be found here
# https://ucloud.univie.ac.at/index.php/s/pjLEBg8rCJWdaJ2

In [4]:
# utility functions
def load_csv(path):
    return np.loadtxt(path, delimiter=";")

In [5]:
base_path = os.path.join("kernels", "without_labels")
ds_name = "IMDB-BINARY"


In [None]:

ds_name = "IMDB-BINARY"
classes = dp.get_dataset(ds_name)
G = tud_to_networkx(ds_name)
print(f"Number of graphs in data set is {len(G)}")
print(f"Number of classes {len(set(classes.tolist()))}")

In [6]:
#Gram Matrix for the Weisfeiler-Lehman subtree kernel
gram = load_csv(os.path.join(base_path,f"{ds_name}_gram_matrix_wl{iterations}.csv"))
gram = aux.normalize_gram_matrix(gram)

In [None]:
# partitioning
point_info, partitions = make_partitions(gram, k=120)

In [None]:
# writing
pickle.dump(point_info, open(os.path.join(pickle_path, f'point_info_{iterations}_{k}_{alpha}.p'), 'wb'))
pickle.dump(partitions, open(os.path.join(pickle_path, f'partitions_{k}_{alpha}.p'), 'wb'))

In [8]:
# reading
point_info = pickle.load(open(os.path.join(pickle_path, f'point_info_{iterations}_{k}_{alpha}.p'), 'rb'))
partitions = pickle.load(open(os.path.join(pickle_path, f'partitions_{iterations}_{k}_{alpha}.p'), 'rb'))

In [9]:
# clustering
models, clusters = cluster_partitions(gram, partitions, point_info, 1.5, .5, 2)
cluster_info = compute_cluster_list(clusters, gram)


In [10]:
# writing
pickle.dump(models, open(os.path.join(pickle_path, f'models_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'wb'))
pickle.dump(clusters, open(os.path.join(pickle_path, f'clusters_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'wb'))
pickle.dump(cluster_info, open(os.path.join(pickle_path, f'cluster_info_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'wb'))


In [None]:
# reading
models = pickle.load(open(os.path.join(pickle_path, f'models_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'rb'))
clusters = pickle.load(open(os.path.join(pickle_path, f'clusters_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'rb'))
clusters_info = pickle.load(open(os.path.join(pickle_path, f'clusters_info_{iterations}_{delta_affine}_{delta_dist}_{min_samples}.p'), 'rb'))

In [13]:
# hierarchy
hierarchy = build_hierarchy(cluster_info, delta_affine=1.5, delta_dist=.5)

In [14]:
# writing
pickle.dump(hierarchy, open(os.path.join(pickle_path, f'hierarchy_{iterations}_{delta_affine}_{delta_dist}.p'), 'wb'))

In [None]:
# reading
hierarchy = pickle.load(open(os.path.join(pickle_path, f'hierarchy_{iterations}_{delta_affine}_{delta_dist}.p'), 'rb'))