Skip to content

Commit cdd1c34

Browse files
authored
[core][feat] Rewrite queries to make them more efficient (#2093)
1 parent 39d5500 commit cdd1c34

File tree

21 files changed

+301
-179
lines changed

21 files changed

+301
-179
lines changed

fixcore/fixcore/db/arango_query.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from fixcore.constants import less_greater_then_operations as lgt_ops, arangodb_matches_null_ops
1313
from fixcore.db import EstimatedSearchCost, EstimatedQueryCostRating as Rating
14+
from fixcore.db.arango_query_rewrite import rewrite_query
1415
from fixcore.db.arangodb_functions import as_arangodb_function
1516
from fixcore.db.model import QueryModel
1617
from fixcore.model.graph_access import Section, Direction
@@ -110,7 +111,7 @@ def to_query(
110111
id_column: str = "_key",
111112
) -> Tuple[str, Json]:
112113
ctx = ArangoQueryContext()
113-
query = query_model.query
114+
query = rewrite_query(query_model)
114115
start = from_collection or f"`{db.graph_vertex_name()}`"
115116
cursor, query_str = query_string(db, query, query_model, start, with_edges, ctx, id_column=id_column)
116117
last_limit = f" LIMIT {ll.offset}, {ll.length}" if (ll := query.current_part.limit) else ""
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from typing import List, cast
2+
3+
from attr import evolve
4+
5+
from fixcore.db.model import QueryModel
6+
from fixcore.model.model import predefined_kinds_by_name
7+
from fixcore.query.model import (
8+
Query,
9+
Predicate,
10+
FulltextTerm,
11+
Term,
12+
ContextTerm,
13+
MergeTerm,
14+
CombinedTerm,
15+
IsTerm,
16+
NotTerm,
17+
)
18+
19+
20+
def add_is_term(query_model: QueryModel) -> Query:
21+
model = query_model.model
22+
23+
def and_combined(term: Term) -> bool:
24+
if isinstance(term, NotTerm):
25+
return True
26+
return isinstance(term, CombinedTerm) and term.op == "and"
27+
28+
def no_context(term: Term) -> bool:
29+
return not isinstance(term, (MergeTerm, ContextTerm))
30+
31+
def combine_term_if_possible(term: Term, predicates: List[Predicate]) -> Term:
32+
kinds = set()
33+
for pred in predicates:
34+
for res in query_model.owners(pred.name):
35+
if res.fqn not in predefined_kinds_by_name:
36+
kinds.add(res.fqn)
37+
kinds.discard("resource") # all resources have this base kind - ignore it
38+
return IsTerm(kinds=sorted(kinds)).and_term(term) if kinds else term
39+
40+
def change_term(term: Term) -> Term:
41+
if isinstance(term, CombinedTerm) and term.op == "or":
42+
left = change_term(term.left)
43+
right = change_term(term.right)
44+
return evolve(term, left=left, right=right)
45+
elif isinstance(term, CombinedTerm) and term.op == "and":
46+
li = term.left.find_term(lambda t: isinstance(t, IsTerm), and_combined)
47+
ri = term.right.find_term(lambda t: isinstance(t, IsTerm), and_combined)
48+
if li is None and ri is None:
49+
predicates = cast(List[Predicate], term.find_terms(lambda t: isinstance(t, Predicate), no_context))
50+
contexts = cast(List[ContextTerm], term.find_terms(lambda t: isinstance(t, ContextTerm), no_context))
51+
all_preds = predicates + [pred for ctx in contexts for pred in ctx.visible_predicates()]
52+
return combine_term_if_possible(term, all_preds)
53+
elif isinstance(term, NotTerm):
54+
return NotTerm(change_term(term.term))
55+
elif isinstance(term, Predicate):
56+
return combine_term_if_possible(term, [term])
57+
elif isinstance(term, ContextTerm):
58+
return combine_term_if_possible(term, term.visible_predicates())
59+
elif isinstance(term, MergeTerm):
60+
pre = change_term(term.pre_filter)
61+
post = change_term(term.post_filter) if term.post_filter else None
62+
queries = [evolve(mq, query=add_is_term(QueryModel(mq.query, model))) for mq in term.merge]
63+
return MergeTerm(pre_filter=pre, post_filter=post, merge=queries)
64+
return term
65+
66+
part = query_model.query.first_part
67+
part = evolve(part, term=change_term(part.term))
68+
return evolve(query_model.query, parts=query_model.query.parts[:-1] + [part])
69+
70+
71+
def rewrite_query(
72+
query_model: QueryModel,
73+
) -> Query:
74+
q = query_model.query
75+
p = q.first_part
76+
77+
# check for single tags predicate. use fulltext index: tags.foo==bar --> "bar" and tags.foo==bar
78+
if isinstance(p.term, Predicate) and p.term.name.startswith("reported.tags.") and p.term.op in ("==", "in"):
79+
value = " ".join(p.term.value) if isinstance(p.term.value, list) else str(p.term.value)
80+
part = evolve(p, term=FulltextTerm(value).and_term(p.term))
81+
return evolve(q, parts=q.parts[:-1] + [part])
82+
83+
# try to add an IsTerm if not already provided
84+
q = add_is_term(query_model)
85+
86+
return q

fixcore/fixcore/db/model.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from __future__ import annotations
22

33
from abc import ABC
4-
from typing import Dict, Any, Optional, Tuple
4+
from typing import Dict, Any, Optional, Tuple, List
55

66
from attr import define
77

88
from fixcore.model.graph_access import Section
9-
from fixcore.model.model import Model, ResolvedPropertyPath
9+
from fixcore.model.model import Model, ResolvedPropertyPath, ComplexKind
1010
from fixcore.model.resolve_in_graph import GraphResolver
1111
from fixcore.query.model import Query
1212
from fixcore.util import first
@@ -30,15 +30,23 @@ def is_set(self, name: str) -> bool:
3030
return value.lower() in ["1", "true", "yes", "y"]
3131
return False
3232

33-
def prop_kind(self, path: str) -> Tuple[ResolvedPropertyPath, Optional[str]]: # prop, merge_name
33+
def __prop(self, path: str) -> Tuple[str, Optional[str]]:
3434
merge_name = first(lambda name: path.startswith(name + "."), self.query.merge_names) or first(
3535
lambda name: path.startswith(name + "."), ancestor_merges
3636
)
3737
# remove merge_name and section part (if existent) from the local_path
3838
lookup = Section.without_section(path[len(merge_name) + 1 :] if merge_name else path) # noqa: E203
39+
return lookup, merge_name
40+
41+
def prop_kind(self, path: str) -> Tuple[ResolvedPropertyPath, Optional[str]]: # prop, merge_name
42+
lookup, merge_name = self.__prop(path)
3943
resolved = self.model.property_by_path(lookup)
4044
return resolved, merge_name
4145

46+
def owners(self, path: str) -> List[ComplexKind]:
47+
lookup, _ = self.__prop(path)
48+
return self.model.owners_by_path(lookup)
49+
4250

4351
@define(repr=True, eq=True)
4452
class GraphUpdate(ABC):

fixcore/fixcore/model/model.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
import textwrap
88
from abc import ABC, abstractmethod
9+
from collections import defaultdict
910
from datetime import datetime, timezone, date
1011
from functools import lru_cache
1112
from json import JSONDecodeError
@@ -920,6 +921,7 @@ def __init__(
920921
self.__resolved_props: Dict[str, Tuple[Property, Kind]] = {}
921922
self.__resolved_direct_props: Dict[str, Tuple[Property, Kind]] = {}
922923
self.__resolved_bases: Dict[str, ComplexKind] = {}
924+
self.__owner_lookup: Dict[PropertyPath, ComplexKind] = {}
923925
self.__all_props: List[Property] = list(self.properties)
924926
self.__resolved_hierarchy: Set[str] = {fqn}
925927
self.__property_by_path: List[ResolvedPropertyPath] = []
@@ -988,7 +990,7 @@ def resolve(self, model: Dict[str, Kind]) -> None:
988990
self.__resolved_hierarchy.update(base.__resolved_hierarchy)
989991

990992
# property path -> kind
991-
self.__property_by_path = ComplexKind.resolve_properties(self, model)
993+
self.__property_by_path, self.__owner_lookup = ComplexKind.resolve_properties(self, model)
992994
self.__synthetic_props = [p for p in self.__property_by_path if p.prop.synthetic]
993995

994996
# resolve predecessor kinds
@@ -1051,6 +1053,9 @@ def is_a(self, kind: str) -> bool:
10511053
def resolved_property_paths(self) -> List[ResolvedPropertyPath]:
10521054
return self.__property_by_path
10531055

1056+
def owned_paths(self) -> Dict[PropertyPath, ComplexKind]:
1057+
return self.__owner_lookup
1058+
10541059
def resolved_bases(self) -> Dict[str, ComplexKind]:
10551060
return self.__resolved_bases
10561061

@@ -1244,16 +1249,19 @@ def walk_element(
12441249

12451250
@staticmethod
12461251
def resolve_properties(
1247-
complex_kind: ComplexKind,
1248-
model: Dict[str, Kind],
1249-
from_path: PropertyPath = EmptyPath,
1250-
maybe_visited: Optional[Dict[str, PropertyPath]] = None,
1251-
) -> List[ResolvedPropertyPath]:
1252-
visited = maybe_visited or {}
1252+
complex_kind: ComplexKind, model: Dict[str, Kind]
1253+
) -> Tuple[List[ResolvedPropertyPath], Dict[PropertyPath, ComplexKind]]:
1254+
visited: Dict[str, PropertyPath] = {}
12531255
result: List[ResolvedPropertyPath] = []
1256+
owner_lookup: Dict[PropertyPath, ComplexKind] = {}
12541257

12551258
def path_for(
1256-
prop: Property, kind: Kind, path: PropertyPath, array: bool = False, add_prop_to_path: bool = True
1259+
owner: ComplexKind,
1260+
prop: Property,
1261+
kind: Kind,
1262+
path: PropertyPath,
1263+
array: bool = False,
1264+
add_prop_to_path: bool = True,
12571265
) -> None:
12581266
prop_name = f"{prop.name}[]" if array else prop.name
12591267
# Detect object cycles: remember the path when we have visited this property.
@@ -1267,27 +1275,35 @@ def path_for(
12671275
kind.resolve(model)
12681276
if isinstance(kind, SimpleKind):
12691277
result.append(ResolvedPropertyPath(relative, prop, kind))
1278+
owner_lookup[relative] = owner
12701279
elif isinstance(kind, ArrayKind):
12711280
if name := relative.last_part:
12721281
result.append(ResolvedPropertyPath(relative, Property(name, kind.fqn), kind))
1273-
path_for(prop, kind.inner, path, True)
1282+
owner_lookup[relative] = owner
1283+
path_for(owner, prop, kind.inner, path, True)
12741284
elif isinstance(kind, DictionaryKind):
12751285
child = relative.child(None)
12761286
if name := relative.last_part:
12771287
result.append(ResolvedPropertyPath(relative, Property(name, kind.fqn), kind))
1288+
owner_lookup[relative] = owner
12781289
# Any child path accessing this dictionary will get a property of value kind.
12791290
value = kind.value_kind
12801291
result.append(ResolvedPropertyPath(child, Property("any", value.fqn), value))
1281-
path_for(prop, kind.value_kind, child, add_prop_to_path=False)
1292+
owner_lookup[child] = owner
1293+
path_for(owner, prop, kind.value_kind, child, add_prop_to_path=False)
12821294
elif isinstance(kind, ComplexKind):
12831295
if name := relative.last_part:
12841296
result.append(ResolvedPropertyPath(relative, Property(name, kind.fqn), kind))
1285-
result.extend(ComplexKind.resolve_properties(kind, model, relative, visited))
1297+
owner_lookup[relative] = owner
1298+
for_complex_kind(owner, kind, relative)
12861299

1287-
for x in complex_kind.all_props():
1288-
path_for(x, complex_kind.__resolved_props[x.name][1], from_path)
1300+
def for_complex_kind(owner: ComplexKind, current: ComplexKind, relative: PropertyPath) -> None:
1301+
for cpx in list(current.resolved_bases().values()) + [current]:
1302+
for prop in cpx.properties:
1303+
path_for(owner, prop, cpx.__resolved_props[prop.name][1], relative)
12891304

1290-
return result
1305+
for_complex_kind(complex_kind, complex_kind, PropertyPath([], ""))
1306+
return result, owner_lookup
12911307

12921308

12931309
string_kind = StringKind("string")
@@ -1362,29 +1378,39 @@ def path_for(
13621378
class Model:
13631379
@staticmethod
13641380
def empty() -> Model:
1365-
return Model({}, [])
1381+
return Model({}, [], {})
13661382

13671383
@staticmethod
13681384
def from_kinds(kinds: List[Kind]) -> Model:
13691385
all_kinds = kinds + predefined_kinds
13701386
kind_dict = {kind.fqn: kind for kind in all_kinds}
13711387
for kind in all_kinds:
13721388
kind.resolve(kind_dict)
1373-
resolved = list(
1374-
# several complex kinds might have the same property
1375-
# reduce the list by hash over the path.
1376-
{
1377-
r.path: r
1378-
for c in all_kinds
1379-
if isinstance(c, ComplexKind) and c.aggregate_root
1380-
for r in c.resolved_property_paths()
1381-
}.values()
1382-
)
1383-
return Model(kind_dict, resolved)
1389+
# several complex kinds might have the same property
1390+
# reduce the list by hash over the path.
1391+
prop_kinds_by_path = {}
1392+
# lookup map to get the aggregate root that defined a specific property path
1393+
# Example: instance_cores
1394+
complex_by_path_distinct: Dict[PropertyPath, Dict[str, ComplexKind]] = defaultdict(dict)
1395+
for c in all_kinds:
1396+
if isinstance(c, ComplexKind) and c.aggregate_root:
1397+
for r in c.resolved_property_paths():
1398+
prop_kinds_by_path[r.path] = r
1399+
for path, cpl in c.owned_paths().items():
1400+
complex_by_path_distinct[path][cpl.fqn] = cpl
1401+
1402+
complex_by_path = {k: list(v.values()) for k, v in complex_by_path_distinct.items()}
1403+
return Model(kind_dict, list(prop_kinds_by_path.values()), complex_by_path)
13841404

1385-
def __init__(self, kinds: Dict[str, Kind], property_kind_by_path: List[ResolvedPropertyPath]):
1405+
def __init__(
1406+
self,
1407+
kinds: Dict[str, Kind],
1408+
property_kind_by_path: List[ResolvedPropertyPath],
1409+
complex_kinds_by_path: Dict[PropertyPath, List[ComplexKind]],
1410+
):
13861411
self.kinds = kinds
13871412
self.__property_kind_by_path: List[ResolvedPropertyPath] = property_kind_by_path
1413+
self.__complex_kinds_by_path: Dict[PropertyPath, List[ComplexKind]] = complex_kinds_by_path
13881414

13891415
def __contains__(self, name_or_object: Union[str, Json]) -> bool:
13901416
if isinstance(name_or_object, str):
@@ -1434,6 +1460,10 @@ def property_by_path(self, path_: Union[str, List[str]]) -> ResolvedPropertyPath
14341460
def kind_by_path(self, path: Union[str, List[str]]) -> Kind:
14351461
return self.property_by_path(path).kind
14361462

1463+
def owners_by_path(self, path_: Union[str, List[str]]) -> List[ComplexKind]:
1464+
path = PropertyPath.from_string(path_) if isinstance(path_, str) else PropertyPath.from_list(path_)
1465+
return self.__complex_kinds_by_path.get(path, [])
1466+
14371467
def coerce(self, js: Json) -> Json:
14381468
try:
14391469
kind: Kind = self[js["kind"]]
@@ -1619,7 +1649,7 @@ def all_predecessor_kinds(kind: ComplexKind) -> Dict[EdgeType, List[str]]:
16191649
)
16201650
else:
16211651
result[kind.fqn] = kind
1622-
return Model(result, self.__property_kind_by_path)
1652+
return Model(result, self.__property_kind_by_path, self.__complex_kinds_by_path)
16231653

16241654
def filter_complex(
16251655
self, filter_fn: Callable[[ComplexKind], bool], with_bases: bool = True, with_prop_types: bool = True
@@ -1649,7 +1679,7 @@ def add_kind(cpl: ComplexKind) -> None:
16491679
if isinstance(kind, ComplexKind) and filter_fn(kind):
16501680
add_kind(kind)
16511681

1652-
return Model(kinds, self.__property_kind_by_path)
1682+
return Model(kinds, self.__property_kind_by_path, self.__complex_kinds_by_path)
16531683

16541684
def complete_path(
16551685
self,

0 commit comments

Comments
 (0)