-
Notifications
You must be signed in to change notification settings - Fork 1
/
generate_yago_gen.py
104 lines (79 loc) · 3.02 KB
/
generate_yago_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import random
import sys
from spell.benchmark_tools import (concept2sparql, concept2string,
create_materialized_tdb_dir,
create_restricted_owl, emit_sml_benchmark,
get_reachable_inds, owlname2tdbname,
parse_concept, query_tdbdir)
def generate_yago_generalization_bench(sml_bench_dir, owlfile):
c4 = parse_concept(
"\\exists <http://schema.org/actor> ( \\exists <http://schema.org/alumniOf> \\top \\sqcap \\exists <http://schema.org/award> \\top \\sqcap \\exists <http://schema.org/children> \\top \\sqcap \\exists <http://schema.org/deathPlace> \\top )"
)
nc = parse_concept("\\exists <http://schema.org/actor> \\top")
owlfile = "robot/yago-reasoned.owl"
tdbdir = owlname2tdbname(owlfile)
create_materialized_tdb_dir(owlfile, tdbdir)
P = set(query_tdbdir(tdbdir, concept2sparql(c4)))
N = set(query_tdbdir(tdbdir, concept2sparql(nc))) - P
total = P | N
totall = list(total)
for nex in range(5, 80, 5):
Ps = []
Ns = []
starts = set()
for iopp in range(20):
Pl = []
Nl = []
while (
len(Pl) == 0
): # At least one positive example, othwerwise ELTL complains
Pl.clear()
Nl.clear()
for i in range(nex):
ind = random.choice(totall)
if ind in P:
Pl.append(ind)
if ind in N:
Nl.append(ind)
starts |= set(Pl) | set(Nl)
Ps.append(Pl)
Ns.append(Nl)
tmp_owl = "tmp.owl"
print("== Collecting reachable individuals for this benchmark")
relevant_inds = get_reachable_inds(owlfile, list(starts))
rinds = []
for ind in relevant_inds:
# Remove things that eltl has trouble with
if (
".png" in ind
or ".svg" in ind
or ".jpg" in ind
or ".jpeg" in ind
or ".JPG" in ind
or "geo.com" in ind
):
continue
rinds.append(ind)
print("== Creating reachable fragment of {}".format(owlfile))
create_restricted_owl(owlfile, rinds, tmp_owl)
for i in range(len(Ps)):
emit_sml_benchmark(
sml_bench_dir,
"yago-gen-test-{}-{}".format(nex, i),
tmp_owl,
Ps[i],
Ns[i],
["Target query: {}".format(concept2string(c4))],
)
def main():
if len(sys.argv) < 3:
print(
"Requires arguments: path-to-sml-bench-learningtasks path-to-yago-owlfile"
)
return
owlfile = "robot/yago-reasoned.owl"
owlfile = sys.argv[2]
sml_bench_path = sys.argv[1]
generate_yago_generalization_bench(sml_bench_path, owlfile)
if __name__ == "__main__":
main()