Skip to content
This repository has been archived by the owner on Sep 7, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1446 from MarcAbonce/language_aliases_fix
Browse files Browse the repository at this point in the history
[fix] Fix queries in Hebrew and Norwegian so they give results in the right language
  • Loading branch information
kvch committed Jan 7, 2019
2 parents 35d82ed + 1a850cf commit 491792c
Show file tree
Hide file tree
Showing 18 changed files with 18,975 additions and 18,710 deletions.
37,606 changes: 18,935 additions & 18,671 deletions searx/data/engines_languages.json

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions searx/engines/__init__.py
Expand Up @@ -113,8 +113,7 @@ def load_engine(engine_data):
iso_lang not in getattr(engine, 'supported_languages'):
language_aliases[iso_lang] = engine_lang

if language_aliases:
setattr(engine, 'language_aliases', language_aliases)
setattr(engine, 'language_aliases', language_aliases)

# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/bing_images.py
Expand Up @@ -55,7 +55,7 @@ def request(query, params):
query=urlencode({'q': query}),
offset=offset)

language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()

params['cookies']['SRCHHPGUSR'] = \
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/bing_videos.py
Expand Up @@ -48,7 +48,7 @@ def request(query, params):
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

# language cookie
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'

# query and paging
Expand Down
8 changes: 4 additions & 4 deletions searx/engines/google.py
Expand Up @@ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath):
def request(query, params):
offset = (params['pageno'] - 1) * 10

language = match_language(params['language'], supported_languages)
language = match_language(params['language'], supported_languages, language_aliases)
language_array = language.split('-')
if params['language'].find('-') > 0:
country = params['language'].split('-')[1]
Expand Down Expand Up @@ -381,10 +381,10 @@ def attributes_to_html(attributes):
def _fetch_supported_languages(resp):
supported_languages = {}
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
for option in options:
code = option.xpath('./@id')[0][1:]
name = option.text.title()
code = option.xpath('./@value')[0].split('_')[-1]
name = option.xpath('./@data-name')[0].title()
supported_languages[code] = {"name": name}

return supported_languages
2 changes: 1 addition & 1 deletion searx/engines/google_news.py
Expand Up @@ -51,7 +51,7 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options))

language = match_language(params['language'], supported_languages).split('-')[0]
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language

Expand Down
2 changes: 1 addition & 1 deletion searx/engines/qwant.py
Expand Up @@ -46,7 +46,7 @@ def request(query, params):
offset=offset)

# add language tag
language = match_language(params['language'], supported_languages)
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()

return params
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/swisscows.py
Expand Up @@ -36,7 +36,7 @@

# do search-request
def request(query, params):
region = match_language(params['language'], supported_languages)
region = match_language(params['language'], supported_languages, language_aliases)
ui_language = region.split('-')[0]

search_path = search_string.format(
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/wikidata.py
Expand Up @@ -68,7 +68,7 @@ def response(resp):
html = fromstring(resp.text)
search_results = html.xpath(wikidata_ids_xpath)

language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]

# TODO: make requests asynchronous to avoid timeout when result_count > 1
for search_result in search_results[:result_count]:
Expand Down
2 changes: 1 addition & 1 deletion searx/engines/wikipedia.py
Expand Up @@ -31,7 +31,7 @@

# set language in base_url
def url_lang(lang):
return match_language(lang, supported_languages).split('-')[0]
return match_language(lang, supported_languages, language_aliases).split('-')[0]


# do search-request
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_bing_images.py
Expand Up @@ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase):

def test_request(self):
bing_images.supported_languages = ['fr-FR', 'en-US']
bing_images.language_aliases = {}
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_bing_videos.py
Expand Up @@ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase):

def test_request(self):
bing_videos.supported_languages = ['fr-FR', 'en-US']
bing_videos.language_aliases = {}
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
Expand Down
39 changes: 15 additions & 24 deletions tests/unit/engines/test_google.py
Expand Up @@ -15,7 +15,8 @@ def mock_response(self, text):
return response

def test_request(self):
google.supported_languages = ['en', 'fr', 'zh-CN']
google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
google.language_aliases = {'he': 'iw'}

query = 'test_query'
dicto = defaultdict(dict)
Expand All @@ -41,6 +42,12 @@ def test_request(self):
self.assertIn('zh-CN', params['url'])
self.assertIn('zh-CN', params['headers']['Accept-Language'])

dicto['language'] = 'he'
params = google.request(query, dicto)
self.assertIn('google.com', params['url'])
self.assertIn('iw', params['url'])
self.assertIn('iw', params['headers']['Accept-Language'])

def test_response(self):
self.assertRaises(AttributeError, google.response, None)
self.assertRaises(AttributeError, google.response, [])
Expand Down Expand Up @@ -198,29 +205,13 @@ def test_fetch_supported_languages(self):
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
<div id="langSec">
<div>
<input name="lr" data-name="english" value="lang_en" />
<input name="lr" data-name="中文 (简体)" value="lang_zh-CN" />
<input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" />
</div>
</div>
</body>
</html>
"""
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_google_news.py
Expand Up @@ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase):

def test_request(self):
google_news.supported_languages = ['en-US', 'fr-FR']
google_news.language_aliases = {}
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_qwant.py
Expand Up @@ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase):

def test_request(self):
qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
qwant.language_aliases = {}
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_swisscows.py
Expand Up @@ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase):

def test_request(self):
swisscows.supported_languages = ['de-AT', 'de-DE']
swisscows.language_aliases = {}
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
Expand Down
1 change: 1 addition & 0 deletions tests/unit/engines/test_wikidata.py
Expand Up @@ -27,6 +27,7 @@ def test_response(self):
self.assertRaises(AttributeError, wikidata.response, '[]')

wikidata.supported_languages = ['en', 'es']
wikidata.language_aliases = {}
response = mock.Mock(text='<html></html>', search_params={"language": "en"})
self.assertEqual(wikidata.response(response), [])

Expand Down
9 changes: 7 additions & 2 deletions tests/unit/engines/test_wikipedia.py
Expand Up @@ -8,7 +8,8 @@
class TestWikipediaEngine(SearxTestCase):

def test_request(self):
wikipedia.supported_languages = ['fr', 'en']
wikipedia.supported_languages = ['fr', 'en', 'no']
wikipedia.language_aliases = {'nb': 'no'}

query = 'test_query'
dicto = defaultdict(dict)
Expand All @@ -25,9 +26,13 @@ def test_request(self):
self.assertIn('Test_Query', params['url'])
self.assertNotIn('test_query', params['url'])

dicto['language'] = 'nb'
params = wikipedia.request(query, dicto)
self.assertIn('no.wikipedia.org', params['url'])

dicto['language'] = 'xx'
params = wikipedia.request(query, dicto)
self.assertIn('en', params['url'])
self.assertIn('en.wikipedia.org', params['url'])

def test_response(self):
dicto = defaultdict(dict)
Expand Down

0 comments on commit 491792c

Please sign in to comment.