Skip to content

Commit

Permalink
Remove doc limit for stats query (#1597)
Browse files Browse the repository at this point in the history
## Remove doc limit for stats query
Two minor changes to the search lambda:
1. removing the document-per-shard limit only for the stats query, which can take advantage of cached summary statistics insdie elastic.
2. allow the MAX_DOCS_PER_SHARD to be set in the environment.

Small test fix:
Grabbing environment variables at initialization doesn't work (at least in pytest). Moving it into the body of the lambda handler.


Co-authored-by: Kevin Moore <kevin@quiltdata.io>
  • Loading branch information
kevinemoore and Kevin Moore committed Apr 15, 2020
1 parent dcdcdd4 commit 33a4d8f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
13 changes: 8 additions & 5 deletions lambdas/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
from t4_lambda_shared.decorator import api
from t4_lambda_shared.utils import get_default_origins, make_json_response

INDEX_OVERRIDES = os.getenv('INDEX_OVERRIDES', '')
MAX_QUERY_DURATION = '15s'
MAX_DOCUMENTS_PER_SHARD = 10000
NUM_PREVIEW_IMAGES = 100
NUM_PREVIEW_FILES = 100
IMG_EXTS = [
Expand Down Expand Up @@ -46,8 +44,10 @@ def lambda_handler(request):
"""
Proxy the request to the elastic search.
"""

action = request.args.get('action')
indexes = request.args.get('index')
terminate_after = os.getenv('MAX_DOCUMENTS_PER_SHARD')

if action == 'search':
query = request.args.get('query', '')
Expand Down Expand Up @@ -75,6 +75,8 @@ def lambda_handler(request):
}
size = 0
_source = []
# Consider all documents when computing counts, etc.
terminate_after = None
elif action == 'images':
body = {
'query': {'terms': {'ext': IMG_EXTS}},
Expand Down Expand Up @@ -118,7 +120,8 @@ def lambda_handler(request):

es_host = os.environ['ES_HOST']
region = os.environ['AWS_REGION']

index_overrides = os.getenv('INDEX_OVERRIDES', '')

auth = BotoAWSRequestsAuth(
aws_host=es_host,
aws_region=region,
Expand All @@ -133,13 +136,13 @@ def lambda_handler(request):
connection_class=RequestsHttpConnection
)

to_search = f"{indexes},{INDEX_OVERRIDES}" if INDEX_OVERRIDES else indexes
to_search = f"{indexes},{index_overrides}" if index_overrides else indexes
result = es_client.search(
to_search,
body,
_source=_source,
size=size,
terminate_after=MAX_DOCUMENTS_PER_SHARD,
terminate_after=terminate_after,
timeout=MAX_QUERY_DURATION
)

Expand Down
4 changes: 2 additions & 2 deletions lambdas/search/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def setUp(self):
'AWS_ACCESS_KEY_ID': 'test_key',
'AWS_SECRET_ACCESS_KEY': 'test_secret',
'AWS_REGION': 'ng-north-1',
'ES_HOST': 'www.example.com'
'ES_HOST': 'www.example.com',
'MAX_DOCUMENTS_PER_SHARD': '10000',
})
self.env_patcher.start()

Expand Down Expand Up @@ -86,7 +87,6 @@ def _callback(request):
def test_stats(self):
url = 'https://www.example.com:443/bucket/_search?' + urlencode(dict(
timeout='15s',
terminate_after=10000,
size=0,
_source = '',
))
Expand Down

0 comments on commit 33a4d8f

Please sign in to comment.