Skip to content
This repository has been archived by the owner on Sep 7, 2023. It is now read-only.

Commit

Permalink
[mod] Quart: use httpx
Browse files Browse the repository at this point in the history
see https://github.com/encode/httpx
disable HTTP2 (from h2 project): see encode/httpx#381
working image proxy (with stream)
  • Loading branch information
dalf committed Oct 20, 2019
1 parent 28df4f7 commit e688620
Show file tree
Hide file tree
Showing 18 changed files with 94 additions and 1,473 deletions.
5 changes: 2 additions & 3 deletions requirements.txt
Expand Up @@ -8,7 +8,6 @@ lxml==4.3.3
pygments==2.1.3
python-dateutil==2.8.0
pyyaml==5.1
pycurl==7.43.0.3
cchardet==2.1.4
distro==1.4.0
httpx==0.7.5
brotlipy==0.7.0
uvloop==0.13.0
4 changes: 4 additions & 0 deletions searx/__init__.py
Expand Up @@ -75,6 +75,10 @@ def check_settings_yml(file_name):

if searx_debug:
logging.basicConfig(level=logging.DEBUG)
for l in ('httpx.config', 'hpack.hpack', 'hpack.table',
'httpx.dispatch.connection_pool',
'httpx.dispatch.http2', 'httpx.dispatch.http11'):
logging.getLogger(l).setLevel(logging.WARNING)
else:
logging.basicConfig(level=logging.WARNING)

Expand Down
2 changes: 1 addition & 1 deletion searx/engines/__init__.py
Expand Up @@ -24,7 +24,7 @@
from flask_babel import gettext
from operator import itemgetter
from json import loads
from searx.httpclient.requests import get
from searx.httpclient import get
from searx import settings
from searx import logger
from searx.utils import load_module, match_language, get_engine_from_settings
Expand Down
1 change: 1 addition & 0 deletions searx/engines/btdigg.py
Expand Up @@ -12,6 +12,7 @@

from operator import itemgetter
from urllib.parse import quote, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, html_fromstring

Expand Down
2 changes: 1 addition & 1 deletion searx/engines/dictzone.py
Expand Up @@ -59,7 +59,7 @@ async def response(resp):
to_results.append(to_result.text_content())

results.append({
'url': urljoin(resp.url, '?%d' % k),
'url': resp.url.join('?%d' % k),
'title': from_result.text_content(),
'content': '; '.join(to_results)
})
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/duckduckgo_images.py
Expand Up @@ -20,7 +20,7 @@
_fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases
)
from searx.httpclient.requests import get
from searx.httpclient import get
from searx.url_utils import urlencode

# engine dependent config
Expand Down
5 changes: 2 additions & 3 deletions searx/engines/google.py
Expand Up @@ -212,11 +212,10 @@ async def response(resp):
results = []

# detect google sorry
resp_url = urlparse(resp.url)
if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect':
if resp.url.host == 'sorry.google.com' or resp.url.path == '/sorry/IndexRedirect':
raise RuntimeWarning('sorry.google.com')

if resp_url.path.startswith('/sorry'):
if resp.url.path.startswith('/sorry'):
raise RuntimeWarning(gettext('CAPTCHA required'))

# which hostname ?
Expand Down
62 changes: 49 additions & 13 deletions searx/httpclient/requests.py → searx/httpclient.py
@@ -1,17 +1,48 @@
# -*- coding: utf-8 -*-

import asyncio
import time
from searx.httpclient.asynciosessions import AsyncioSession
from searx.httpclient.exceptions import TimeoutError
import logging
import httpx
import httpx.models
import httpx.config

from searx import settings, logger
from httpx import (Request, Response)
from httpx.exceptions import (HTTPError, Timeout, ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout, ProxyError,
ProtocolError, DecodingError, RedirectError, TooManyRedirects, RedirectBodyUnavailable,
RedirectLoop, NotRedirectResponse, StreamError, StreamConsumed, ResponseNotRead,
ResponseClosed, InvalidURL, CookieConflict)


logger = logger.getChild('httpclient')
SESSION = None


max_host_connections = settings['outgoing'].get('pool_maxsize', 10)
max_total_connections = max_host_connections * settings['outgoing'].get('pool_connections', 100)
SESSION = AsyncioSession(share_cookies=False,
http2=True,
max_total_connections=max_total_connections,
max_host_connections=max_host_connections)
def baseresponse_ok(self):
try:
self.raise_for_status()
except HTTPError:
return False
return True


async def initialize():
global SESSION

# monkey patch
setattr(httpx.models.BaseResponse, 'ok', baseresponse_ok)

# FIXME: pool_maxsize, pool_connections names don't match soft and hard limits
soft_limit = settings['outgoing'].get('pool_maxsize', 10)
hard_limit = settings['outgoing'].get('pool_connections', 100)
pool_limits = httpx.config.PoolLimits(soft_limit=soft_limit, hard_limit=hard_limit, pool_timeout=10.0)

# proxies
proxies = settings['outgoing'].get('proxies') or None

# no HTTP2, see https://github.com/encode/httpx/issues/381
SESSION = httpx.AsyncClient(proxies=proxies, http_versions="HTTP/1.1", pool_limits=pool_limits)


def _get_context():
Expand All @@ -34,9 +65,6 @@ async def request(method, url, **kwargs):
time_before_request = time.time()
context = _get_context()

# proxies
kwargs['proxies'] = settings['outgoing'].get('proxies') or None

# timeout
if 'timeout' in kwargs:
timeout = kwargs['timeout']
Expand All @@ -47,17 +75,25 @@ async def request(method, url, **kwargs):

# do request
response = await SESSION.request(method, url, **kwargs)

time_after_request = time.time()

# debug
if logger.isEnabledFor(logging.DEBUG):
if hasattr(response, "_content"):
size = str(len(response._content))
else:
size = "??"
logger.debug("\"{0} {1} {2}\" {3} {4}".format(
response.request.method, response.url, response.http_version, response.status_code, size))

# is there a timeout for this engine ?
if timeout is not None:
timeout_overhead = 0.2 # seconds
# start_time = when the user request started
start_time = getattr(context, 'start_time', time_before_request)
search_duration = time_after_request - start_time
if search_duration > timeout + timeout_overhead:
raise TimeoutError(response=response)
raise httpx.exceptions.Timeout(response=response)

if hasattr(context, 'total_time'):
context.total_time += time_after_request - time_before_request
Expand Down
21 changes: 0 additions & 21 deletions searx/httpclient/__init__.py

This file was deleted.

41 changes: 0 additions & 41 deletions searx/httpclient/asynciosessions.py

This file was deleted.

97 changes: 0 additions & 97 deletions searx/httpclient/exceptions.py

This file was deleted.

50 changes: 0 additions & 50 deletions searx/httpclient/misc.py

This file was deleted.

0 comments on commit e688620

Please sign in to comment.