[mod] Add engine for Emojipedia

Emojipedia is an emoji reference website which documents the meaning and common usage of emoji characters in the Unicode Standard. It is owned by Zedge since 2021. Emojipedia is a voting member of The Unicode Consortium.[1] Cherry picked from @james-still [2[3] and slightly modified to fit SearXNG's quality gates. [1] https://en.wikipedia.org/wiki/Emojipedia [2] obfuscated-loop/searx@2fc01eb [3] searx/searx#3278
return42 · Jul 15, 2022 · 10edcbe · 10edcbe
1 parent cf01f9e
commit 10edcbe
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 0 deletions.
diff --git a/searx/engines/emojipedia.py b/searx/engines/emojipedia.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Emojipedia
+
+Emojipedia is an emoji reference website which documents the meaning and
+common usage of emoji characters in the Unicode Standard.  It is owned by Zedge
+since 2021. Emojipedia is a voting member of The Unicode Consortium.[1]
+
+[1] https://en.wikipedia.org/wiki/Emojipedia
+"""
+
+from urllib.parse import urlencode
+from lxml import html
+
+from searx.utils import (
+    eval_xpath_list,
+    eval_xpath_getindex,
+    extract_text,
+)
+
+about = {
+    "website": 'https://emojipedia.org',
+    "wikidata_id": 'Q22908129',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+categories = []
+paging = False
+time_range_support = False
+
+base_url = 'https://emojipedia.org'
+search_url = base_url + '/search/?{query}'
+
+
+def request(query, params):
+    params['url'] = search_url.format(
+        query=urlencode({'q': query}),
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    for result in eval_xpath_list(dom, "/html/body/div[2]/div[1]/ol/li"):
+
+        extracted_desc = extract_text(eval_xpath_getindex(result, './/p', 0))
+
+        if 'No results found.' in extracted_desc:
+            break
+
+        link = eval_xpath_getindex(result, './/h2/a', 0)
+
+        url = base_url + link.attrib.get('href')
+        title = extract_text(link)
+        content = extracted_desc
+
+        res = {'url': url, 'title': title, 'content': content}
+
+        results.append(res)
+
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -542,6 +542,12 @@ engines:
     timeout: 3.0
     disabled: true
 
+  - name: emojipedia
+    engine: emojipedia
+    timeout: 4.0
+    shortcut: em
+    disabled: true
+
   - name: tineye
     engine: tineye
     shortcut: tin