Skip to content
This repository has been archived by the owner on Sep 7, 2023. It is now read-only.

[enh] add checker #2419

Merged
merged 12 commits into from
Jan 13, 2021
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ RUN apk upgrade --no-cache \
openssl-dev \
tar \
git \
protoc \
protobuf-dev \
&& apk add --no-cache \
ca-certificates \
su-exec \
Expand All @@ -53,6 +55,7 @@ RUN apk upgrade --no-cache \
uwsgi \
uwsgi-python3 \
brotli \
protobuf \
&& pip3 install --upgrade pip \
&& pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \
Expand Down
3 changes: 3 additions & 0 deletions dockerfiles/uwsgi.ini
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ static-map = /static=/usr/local/searx/searx/static
static-expires = /* 864000
static-gzip-all = True
offload-threads = %k

# Cache
cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ pygments==2.1.3
python-dateutil==2.8.1
pyyaml==5.3.1
requests[socks]==2.25.1
pycld3==0.20
65 changes: 5 additions & 60 deletions searx/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.search.models import EngineRef, SearchQuery
from searx.search.processors import processors, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker


logger = logger.getChild('search')
Expand All @@ -45,68 +47,11 @@
sys.exit(1)


def initialize(settings_engines=None):
def initialize(settings_engines=None, enable_checker=False):
settings_engines = settings_engines or settings['engines']
initialize_processors(settings_engines)


class EngineRef:

__slots__ = 'name', 'category'

def __init__(self, name: str, category: str):
self.name = name
self.category = category

def __repr__(self):
return "EngineRef({!r}, {!r})".format(self.name, self.category)

def __eq__(self, other):
return self.name == other.name and self.category == other.category


class SearchQuery:
"""container for all the search parameters (query, language, etc...)"""

__slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\
'timeout_limit', 'external_bang'

def __init__(self,
query: str,
engineref_list: typing.List[EngineRef],
lang: str='all',
safesearch: int=0,
pageno: int=1,
time_range: typing.Optional[str]=None,
timeout_limit: typing.Optional[float]=None,
external_bang: typing.Optional[str]=None):
self.query = query
self.engineref_list = engineref_list
self.lang = lang
self.safesearch = safesearch
self.pageno = pageno
self.time_range = time_range
self.timeout_limit = timeout_limit
self.external_bang = external_bang

@property
def categories(self):
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))

def __repr__(self):
return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
format(self.query, self.engineref_list, self.lang, self.safesearch,
self.pageno, self.time_range, self.timeout_limit, self.external_bang)

def __eq__(self, other):
return self.query == other.query\
and self.engineref_list == other.engineref_list\
and self.lang == other.lang\
and self.safesearch == other.safesearch\
and self.pageno == other.pageno\
and self.time_range == other.time_range\
and self.timeout_limit == other.timeout_limit\
and self.external_bang == other.external_bang
if enable_checker:
initialize_checker()


class Search:
Expand Down
4 changes: 4 additions & 0 deletions searx/search/checker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# SPDX-License-Identifier: AGPL-3.0-or-later

from .impl import Checker
from .background import initialize, get_result
94 changes: 94 additions & 0 deletions searx/search/checker/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# SPDX-License-Identifier: AGPL-3.0-or-later

import sys
import io
import os
import argparse
import logging

import searx.search
import searx.search.checker
from searx.search import processors
from searx.engines import engine_shortcuts


# configure logging
root = logging.getLogger()
handler = logging.StreamHandler(sys.stdout)
for h in root.handlers:
root.removeHandler(h)
root.addHandler(handler)

# color only for a valid terminal
if sys.stdout.isatty() and os.environ.get('TERM') not in ['dumb', 'unknown']:
RESET_SEQ = "\033[0m"
COLOR_SEQ = "\033[1;%dm"
BOLD_SEQ = "\033[1m"
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = map(lambda i: COLOR_SEQ % (30 + i), range(8))
else:
RESET_SEQ = ""
COLOR_SEQ = ""
BOLD_SEQ = ""
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = "", "", "", "", "", "", "", ""

# equivalent of 'python -u' (unbuffered stdout, stderr)
stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)


# iterator of processors
def iter_processor(engine_name_list):
if len(engine_name_list) > 0:
for name in engine_name_list:
name = engine_shortcuts.get(name, name)
processor = processors.get(name)
if processor is not None:
yield name, processor
else:
stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}')
else:
for name, processor in searx.search.processors.items():
yield name, processor


# actual check & display
def run(engine_name_list, verbose):
searx.search.initialize()
for name, processor in iter_processor(engine_name_list):
stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
if not sys.stdout.isatty():
stderr.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
checker = searx.search.checker.Checker(processor)
checker.run()
if checker.test_results.succesfull:
stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{GREEN}OK{RESET_SEQ}\n')
if verbose:
stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
else:
stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RESET_SEQ}{RED}Error{RESET_SEQ}')
if not verbose:
errors = [test_name + ': ' + error for test_name, error in checker.test_results]
stdout.write(f'{RED}Error {str(errors)}{RESET_SEQ}\n')
else:
stdout.write('\n')
stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
for test_name, logs in checker.test_results.logs.items():
for log in logs:
stdout.write(f' {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n')


# call by setup.py
def main():
parser = argparse.ArgumentParser(description='Check searx engines.')
parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*',
help='engines name or shortcut list. Empty for all engines.')
parser.add_argument('--verbose', '-v',
action='store_true', dest='verbose',
help='Display details about the test results',
default=False)
args = parser.parse_args()
run(args.engine_name_list, args.verbose)


if __name__ == '__main__':
main()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a makefile target which uses python from ./local
If you think it has its value, cherry pick from 190fa23

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are different options / parameters, Makefile is not very convenient ?

python -m searx.search.checker duckduckgo "google images"

What about an entrypoint ?

searx/setup.py

Lines 52 to 56 in 14a395a

entry_points={
'console_scripts': [
'searx-run = searx.webapp:run'
]
},

122 changes: 122 additions & 0 deletions searx/search/checker/background.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# SPDX-License-Identifier: AGPL-3.0-or-later

import json
import random
import time
import threading
import os
import signal

from searx import logger, settings, searx_debug
from searx.exceptions import SearxSettingsException
from searx.search.processors import processors
from searx.search.checker import Checker
from searx.shared import schedule, storage


CHECKER_RESULT = 'CHECKER_RESULT'
running = threading.Lock()


def _get_interval(every, error_msg):
if isinstance(every, int):
every = (every, every)
if not isinstance(every, (tuple, list))\
or len(every) != 2\
or not isinstance(every[0], int)\
or not isinstance(every[1], int):
raise SearxSettingsException(error_msg, None)
return every


def _get_every():
every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800))
return _get_interval(every, 'checker.scheduling.every is not a int or list')


def get_result():
serialized_result = storage.get_str(CHECKER_RESULT)
if serialized_result is not None:
return json.loads(serialized_result)


def _set_result(result):
result['timestamp'] = int(time.time() / 3600) * 3600
storage.set_str(CHECKER_RESULT, json.dumps(result))


def run():
if not running.acquire(blocking=False):
return
try:
logger.info('Starting checker')
result = {
'status': 'ok',
'engines': {}
}
for name, processor in processors.items():
logger.debug('Checking %s engine', name)
checker = Checker(processor)
checker.run()
if checker.test_results.succesfull:
result['engines'][name] = {'success': True}
else:
result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}

_set_result(result)
logger.info('Check done')
except Exception:
_set_result({'status': 'error'})
logger.exception('Error while running the checker')
finally:
running.release()


def _run_with_delay():
every = _get_every()
delay = random.randint(0, every[1] - every[0])
logger.debug('Start checker in %i seconds', delay)
time.sleep(delay)
run()


def _start_scheduling():
every = _get_every()
schedule(every[0], _run_with_delay)
run()


def _signal_handler(signum, frame):
t = threading.Thread(target=run)
t.daemon = True
t.start()


def initialize():
logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
signal.signal(signal.SIGUSR1, _signal_handler)

# disabled by default
_set_result({'status': 'disabled'})

# special case when debug is activate
if searx_debug and settings.get('checker', {}).get('off_when_debug', True):
logger.info('debug mode: checker is disabled')
return

# check value of checker.scheduling.every now
scheduling = settings.get('checker', {}).get('scheduling', None)
if scheduling is None or not scheduling:
logger.info('Checker scheduler is disabled')
return

#
_set_result({'status': 'unknown'})

start_after = scheduling.get('start_after', (300, 1800))
start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list')
delay = random.randint(start_after[0], start_after[1])
logger.info('Start checker in %i seconds', delay)
t = threading.Timer(delay, _start_scheduling)
t.daemon = True
t.start()
Loading