Skip to content

Commit 4695370

Browse files
authored
[core][fix] Merge edge and vertex and unfold in code (#2217)
1 parent 4ba56c1 commit 4695370

File tree

3 files changed

+59
-41
lines changed

3 files changed

+59
-41
lines changed

fixcore/fixcore/db/arango_query.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969

7070
allowed_first_merge_part = Part(AllTerm())
7171
unset_props = json.dumps(["flat"])
72+
edge_unset_props = json.dumps(["_rev", "hash", "refs"])
7273
# This list of delimiter is also used in the arango delimiter index.
7374
# In case the definition is changed, also the index needs to change!
7475
fulltext_delimiter = [" ", "_", "-", "@", ":", "/", "."]
@@ -890,26 +891,27 @@ def inout(
890891
unique = "uniqueEdges: 'path'" if with_edges else "uniqueVertices: 'global'"
891892
dir_bound = "OUTBOUND" if direction == Direction.outbound else "INBOUND"
892893

893-
# the path array contains the whole path from the start node.
894-
# in the case of start > 0, we need to slice the array to get the correct part
895-
def slice_or_all(in_p_part: str) -> str:
896-
return f"SLICE({in_path}.{in_p_part}, {start})" if start > 0 else f"{in_path}.{in_p_part}"
897-
898-
# Edge filter: decision to include the source element is not possible while traversing it.
894+
# Edge filter: the decision to include the source element is not possible while traversing it.
899895
# When the target node is reached and edge properties are available, the decision can be made.
900896
# In case the filter succeeds, we need to select all vertices and edges on the path.
901-
# No filter but with_edges: another nested for loop required to return the node and edge
902-
# No filter and no with_edges: only the node is returned
897+
# No filter but with_edges: merge the edge into the vertex
898+
# No filter and not with_edges: only the node is returned
903899
if edge_filter:
904-
# walk the path and return all vertices (and possibly edges)
900+
# walk the path and return all/sliced vertices.
905901
# this means intermediate nodes are returned multiple times and have to be made distinct
906-
# since we return nodes first, the edges can always be resolved
907-
walk_array = slice_or_all("vertices")
908-
walk_array = f'APPEND({walk_array}, {slice_or_all("edges")})' if with_edges else walk_array
909-
inout_result = f"FOR {in_r} in {walk_array} RETURN DISTINCT({in_r})"
902+
if with_edges:
903+
pv = f"{in_path}.vertices[{in_r}]"
904+
pe = f"{in_path}.edges[{in_r}]"
905+
pv_with_pe = f"MERGE({pv}, {{_edge:UNSET({pe}, {edge_unset_props})}})"
906+
inout_result = (
907+
f"FOR {in_r} in {start}..LENGTH({in_path}.vertices)-1 "
908+
f"RETURN DISTINCT({pe}!=null ? {pv_with_pe} : {pv})"
909+
)
910+
else:
911+
slice_or_all = f"SLICE({in_path}.vertices, {start})" if start > 0 else f"{in_path}.vertices"
912+
inout_result = f"FOR {in_r} in {slice_or_all} RETURN DISTINCT({in_r})"
910913
elif with_edges:
911-
# return the node and edge via a nested for loop
912-
inout_result = f"FOR {in_r} in [{in_c}, {in_edge}] FILTER {in_r}!=null RETURN DISTINCT({in_r})"
914+
inout_result = f"RETURN DISTINCT(MERGE({in_c}, {{_edge:UNSET({in_edge}, {edge_unset_props})}}))"
913915
else:
914916
# return only the node
915917
inout_result = f"RETURN DISTINCT {in_c}"

fixcore/fixcore/db/async_arangodb.py

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(
5353
self.cursor_exhausted = False
5454
self.trafo: Callable[[Json], Optional[Any]] = trafo if trafo else identity # type: ignore
5555
self.vt_len: Optional[int] = None
56+
self.on_hold: Optional[Json] = None
5657
self.get_next: Callable[[], Awaitable[Optional[Json]]] = (
5758
self.next_filtered if flatten_nodes_and_edges else self.next_element
5859
)
@@ -61,7 +62,11 @@ async def __anext__(self) -> Any:
6162
# if there is an on-hold element: unset and return it
6263
# background: a graph node contains vertex and edge information.
6364
# since this method can only return one element at a time, the edge is put on-hold for vertex+edge data.
64-
if self.cursor_exhausted:
65+
if self.on_hold:
66+
res = self.on_hold
67+
self.on_hold = None
68+
return res
69+
elif self.cursor_exhausted:
6570
return await self.next_deferred_edge()
6671
else:
6772
try:
@@ -94,35 +99,44 @@ async def next_element(self) -> Optional[Json]:
9499

95100
async def next_filtered(self) -> Optional[Json]:
96101
element = await self.next_from_db()
102+
vertex: Optional[Json] = None
103+
edge: Optional[Json] = None
97104
try:
98-
if (from_id := element.get("_from")) and (to_id := element.get("_to")) and (node_id := element.get("_id")):
99-
if node_id not in self.visited_edge:
100-
self.visited_edge.add(node_id)
101-
if not self.vt_len:
102-
self.vt_len = len(re.sub("/.*$", "", from_id)) + 1
103-
edge = {
104-
"type": "edge",
105-
# example: vertex_name/node_id -> node_id
106-
"from": from_id[self.vt_len :], # noqa: E203
107-
# example: vertex_name/node_id -> node_id
108-
"to": to_id[self.vt_len :], # noqa: E203
109-
# example: vertex_name_default/edge_id -> default
110-
"edge_type": re.sub("/.*$", "", node_id[self.vt_len :]), # noqa: E203
111-
}
112-
if reported := element.get("reported"):
113-
edge["reported"] = reported
114-
# make sure that both nodes of the edge have been visited already
115-
if from_id not in self.visited_node or to_id not in self.visited_node:
116-
self.deferred_edges.append(edge)
117-
return None
118-
else:
119-
return edge
120-
elif key := element.get("_key"):
105+
if ep := element.get("_edge"):
106+
if (from_id := ep.get("_from")) and (to_id := ep.get("_to")) and (node_id := ep.get("_id")):
107+
if node_id not in self.visited_edge:
108+
self.visited_edge.add(node_id)
109+
if not self.vt_len:
110+
self.vt_len = len(re.sub("/.*$", "", from_id)) + 1
111+
edge = {
112+
"type": "edge",
113+
# example: vertex_name/node_id -> node_id
114+
"from": from_id[self.vt_len :], # noqa: E203
115+
# example: vertex_name/node_id -> node_id
116+
"to": to_id[self.vt_len :], # noqa: E203
117+
# example: vertex_name_default/edge_id -> default
118+
"edge_type": re.sub("/.*$", "", node_id[self.vt_len :]), # noqa: E203
119+
}
120+
if reported := ep.get("reported"):
121+
edge["reported"] = reported
122+
# make sure that both nodes of the edge have been visited already
123+
if from_id not in self.visited_node or to_id not in self.visited_node:
124+
self.deferred_edges.append(edge)
125+
edge = None
126+
if key := element.get("_key"):
121127
if key not in self.visited_node:
122128
self.visited_node.add(key)
123-
return self.trafo(element)
129+
vertex = self.trafo(element)
130+
else:
131+
vertex = element
132+
# if the vertex is not returned: return the edge
133+
# otherwise return the vertex and remember the edge
134+
if vertex:
135+
self.on_hold = edge
136+
return vertex
124137
else:
125-
return element
138+
return edge
139+
126140
except Exception as ex:
127141
log.warning(f"Could not read element {element}: {ex}. Ignore.")
128142
return None

fixcore/tests/fixcore/db/graphdb_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ async def assert_result(query: str, nodes: int, edges: int) -> None:
490490
await assert_result("is(foo) and reported.id==9 <-delete[0:]-", 11, 10)
491491
await assert_result("is(foo) and reported.id==9 <-default[0:]-", 4, 3)
492492
await assert_result("is(foo) and reported.id==9 -delete[0:]->", 1, 0)
493+
await assert_result("is(foo) and reported.id==9 -[0:]-> is(foo, bla)", 11, 10)
494+
await assert_result("is(foo) and reported.id==9 -[0:]{not_existent==null}-> is(foo, bla)", 11, 10)
493495

494496

495497
@mark.asyncio

0 commit comments

Comments
 (0)