In [1]:
import sys
sys.path.insert(0, '../')

In [2]:
from collections import Counter

import wlkernel
import rdflib
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score

In [3]:
rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')

In [4]:
Counter(str(o) for s, p, o in rdf_graph if 'affiliation' in str(p))

Counter({'http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id1instance': 73,
         'http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id3instance': 60,
         'http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id4instance': 16,
         'http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id2instance': 28,
         'http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id5instance': 1})

In [5]:
instances_class_map = {
    str(s): str(o) for s, p, o in rdf_graph 
    if 'affiliation' in str(p)
    and 'id5instance' not in str(o)
}
len(instances_class_map)

177

In [6]:
instances = list(instances_class_map)
len(instances)

177

In [7]:
classes = set(instances_class_map.values())
len(classes)

4

In [8]:
triples = list(
    (str(s), str(p), str(o)) for s, p, o in rdf_graph
    if 'affiliation' not in str(p)
    and 'employs' not in str(p)
    and 'member' not in str(p)
    and 'head' not in str(p)
)
len(triples)

28699

In [9]:
wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=2)
len(wlrdf_graph.instance_nodes)

177

In [156]:
kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=1)
len(kernel_matrix)

177

In [157]:
X = list(instances_class_map)
y = list(instances_class_map.values())
for i in range(len(X)):
    assert y[i] == instances_class_map[X[i]]
len(y)

177

In [160]:
classifier = svm.SVC(kernel='precomputed')
classifier.fit(kernel_matrix, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='precomputed', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [161]:
scores = cross_val_score(classifier, np.array(kernel_matrix), y, cv=10)
scores.mean()

0.6530559971736442