From 173f2015cd18ec02e62dbc1b15bc213b3acf3248 Mon Sep 17 00:00:00 2001 From: Taras Semenenko Date: Tue, 8 Jul 2014 19:56:37 +0400 Subject: [PATCH] Filter Elasticsearch search results by score Fix #250 --- pootle/apps/pootle_store/models.py | 3 +-- pootle/core/tmserver.py | 40 +++++++++++++++++++----------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/pootle/apps/pootle_store/models.py b/pootle/apps/pootle_store/models.py index cbddc638abe..edde1376bda 100644 --- a/pootle/apps/pootle_store/models.py +++ b/pootle/apps/pootle_store/models.py @@ -861,8 +861,7 @@ def update_tmserver(self): update_tmserver(self.store.translation_project.language.code, obj) def get_tm_suggestions(self): - return get_tmsuggestions(self.store.translation_project.language.code, - self.source) + return get_tmsuggestions(self) ##################### TranslationUnit ############################ diff --git a/pootle/core/tmserver.py b/pootle/core/tmserver.py index 1d86476c4df..35e8700e037 100644 --- a/pootle/core/tmserver.py +++ b/pootle/core/tmserver.py @@ -27,6 +27,10 @@ from django.conf import settings +# Elasticsearch filter settings +MIN_SCORE = 0.1 + + def get_params(): params = getattr(settings, 'POOTLE_TM_SERVER', None) @@ -51,26 +55,34 @@ def update(language, obj): body=obj, id=obj['id']) -def search(language, source): + +def is_valuable_hit(unit, hit): + if hit['_score'] < MIN_SCORE or str(unit.id) == hit['_id']: + return False + + return True + + +def search(unit): if es is not None: res = [] + language = unit.store.translation_project.language.code es_res = es.search(index=es_params['INDEX_NAME'], doc_type=language, - body={"query": {"match": {'source': source}}}) + body={"query": {"match": {'source': unit.source}}}) - max_score = es_res['hits']['max_score'] for hit in es_res['hits']['hits']: - res.append({ - 'unit_id': ['_id'], - 'quality': 100 * hit['_score'] / max_score, - 'source': hit['_source']['source'], - 'target': hit['_source']['target'], - 'project': hit['_source']['project'], - 'path': hit['_source']['path'], - 'username': hit['_source']['username'], - 'fullname': hit['_source']['fullname'], - 'email_md5': hit['_source']['email_md5'], - }) + if is_valuable_hit(unit, hit): + res.append({ + 'unit_id': hit['_id'], + 'source': hit['_source']['source'], + 'target': hit['_source']['target'], + 'project': hit['_source']['project'], + 'path': hit['_source']['path'], + 'username': hit['_source']['username'], + 'fullname': hit['_source']['fullname'], + 'email_md5': hit['_source']['email_md5'], + }) return res