Skip to content

Commit

Permalink
[fix] external bangs: don't overwrite Bangs in data trie
Browse files Browse the repository at this point in the history
Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same
prefix (e.g. `!!d`) [1].  This can be avoid when a non printable character is
used to tag a LEAF_KEY.

[1] searxng#740 (comment)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
  • Loading branch information
return42 committed Jan 12, 2022
1 parent 6d7e86e commit ce97de2
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
6 changes: 4 additions & 2 deletions searx/external_bang.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from searx.data import EXTERNAL_BANGS

LEAF_KEY = chr(16)


def get_node(external_bangs_db, bang):
node = external_bangs_db['trie']
Expand All @@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
if k.startswith(after):
bang_ac_list.append(before + k)
elif isinstance(node, dict):
bang_definition = node.get('*')
bang_ac_list = [before + k for k in node.keys() if k != '*']
bang_definition = node.get(LEAF_KEY)
bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY]
elif isinstance(node, str):
bang_definition = node
bang_ac_list = []
Expand Down
36 changes: 20 additions & 16 deletions searxng_extra/update/update_external_bangs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import httpx

from searx import searx_dir # pylint: disable=E0401 C0413

from searx.external_bang import LEAF_KEY

# from https://duckduckgo.com/newbang
URL_BV1 = 'https://duckduckgo.com/bv1.js'
Expand All @@ -51,18 +51,22 @@ def fetch_ddg_bangs(url):
def merge_when_no_leaf(node):
"""Minimize the number of nodes
A -> B -> C
B is child of A
C is child of B
``A -> B -> C``
- ``B`` is child of ``A``
- ``C`` is child of ``B``
If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
into ``A``. For example (5 nodes)::
d -> d -> g -> <LEAF_KEY> (ddg)
-> i -> g -> <LEAF_KEY> (dig)
becomes (3 noodes)::
If there are no C equals to '*', then each C are merged into A
d -> dg -> <LEAF_KEY>
-> ig -> <LEAF_KEY>
For example:
d -> d -> g -> * (ddg*)
-> i -> g -> * (dig*)
becomes
d -> dg -> *
-> ig -> *
"""
restart = False
if not isinstance(node, dict):
Expand All @@ -72,12 +76,12 @@ def merge_when_no_leaf(node):
keys = list(node.keys())

for key in keys:
if key == '*':
if key == LEAF_KEY:
continue

value = node[key]
value_keys = list(value.keys())
if '*' not in value_keys:
if LEAF_KEY not in value_keys:
for value_key in value_keys:
node[key + value_key] = value[value_key]
merge_when_no_leaf(node[key + value_key])
Expand All @@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node):
if not isinstance(node, dict):
return

if len(node) == 1 and '*' in node and parent is not None:
parent[parent_key] = node['*']
if len(node) == 1 and LEAF_KEY in node and parent is not None:
parent[parent_key] = node[LEAF_KEY]
else:
for key, value in node.items():
optimize_leaf(node, key, value)
Expand Down Expand Up @@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs):
t = bang_trie
for bang_letter in bang:
t = t.setdefault(bang_letter, {})
t = t.setdefault('*', bang_def_output)
t = t.setdefault(LEAF_KEY, bang_def_output)

# optimize the trie
merge_when_no_leaf(bang_trie)
Expand Down

0 comments on commit ce97de2

Please sign in to comment.