Skip to content

Commit

Permalink
[solr tests] Testing that solr picks up on the flexible tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
icmurray committed Nov 15, 2011
1 parent 081bc0c commit 15345e3
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 8 deletions.
12 changes: 6 additions & 6 deletions ckan/lib/create_test_data.py
Expand Up @@ -535,7 +535,7 @@ def get_all_data(cls):
search_items = [{'name':'gils',
'title':'Government Information Locator Service',
'url':'',
'tags':'registry country-usa government federal gov workshop-20081101 penguin',
'tags':'registry,country-usa,government,federal,gov,workshop-20081101,penguin'.split(','),
'resources':[{'url':'http://www.dcsf.gov.uk/rsgateway/DB/SFR/s000859/SFR17_2009_tables.xls',
'format':'XLS',
'last_modified': datetime.datetime(2005,10,01),
Expand All @@ -559,7 +559,7 @@ def get_all_data(cls):
'title':'U.S. Government Photos and Graphics',
'url':'http://www.usa.gov/Topics/Graphics.shtml',
'download_url':'http://www.usa.gov/Topics/Graphics.shtml',
'tags':'images graphics photographs photos pictures us usa america history wildlife nature war military todo-split gov penguin',
'tags':'images,graphics,photographs,photos,pictures,us,usa,america,history,wildlife,nature,war,military,todo split,gov,penguin'.split(','),
'groups':'ukgov test1 penguin',
'license':'other-open',
'notes':'''## About
Expand All @@ -575,7 +575,7 @@ def get_all_data(cls):
'title':'Text of US Federal Cases',
'url':'http://bulk.resource.org/courts.gov/',
'download_url':'http://bulk.resource.org/courts.gov/',
'tags':'us courts case-law us courts case-law gov legal law access-bulk penguins penguin',
'tags':'us,courts,case-law,us,courts,case-law,gov,legal,law,access-bulk,penguins,penguin'.split(','),
'groups':'ukgov test2 penguin',
'license':'cc-zero',
'notes':'''### Description
Expand All @@ -590,7 +590,7 @@ def get_all_data(cls):
},
{'name':'uk-government-expenditure',
'title':'UK Government Expenditure',
'tags':'workshop-20081101 uk gov expenditure finance public funding penguin',
'tags':'workshop-20081101,uk,gov,expenditure,finance,public,funding,penguin'.split(','),
'groups':'ukgov penguin',
'notes':'''Discussed at [Workshop on Public Information, 2008-11-02](http://okfn.org/wiki/PublicInformation).
Expand All @@ -601,7 +601,7 @@ def get_all_data(cls):
'title':'Sweden - Government Offices of Sweden - Publications',
'url':'http://www.sweden.gov.se/sb/d/574',
'groups':'penguin',
'tags':'country-sweden format-pdf access-www documents publications government eutransparency penguin',
'tags':u'country-sweden,format-pdf,access-www,documents,publications,government,eutransparency,penguin,CAPITALS,surprise!,greek omega \u03a9,strange character \u0489'.split(','),
'license':'',
'notes':'''### About
Expand All @@ -617,7 +617,7 @@ def get_all_data(cls):
'groups':'penguin',
'url':'http://www.opengov.se/',
'download_url':'http://www.opengov.se/data/open/',
'tags':'country-sweden government data penguin',
'tags':'country-sweden,government,data,penguin'.split(','),
'license':'cc-by-sa',
'notes':'''### About
Expand Down
57 changes: 55 additions & 2 deletions ckan/tests/lib/test_solr_package_search.py
Expand Up @@ -118,12 +118,49 @@ def test_tags_field(self):
result = search.query_for(model.Package).run({'q': u'country-sweden'})
assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)

def test_tags_field_split_word(self):
result = search.query_for(model.Package).run({'q': u'todo split'})
assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result)

def test_tags_field_with_capitals(self):
result = search.query_for(model.Package).run({'q': u'CAPITALS'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def test_tags_field_with_the_special_character_exclamation_mark(self):
"""
Asserts that a search for "surprise\\!" picks up the package with the "surprise!" tag.
The reason for escaping the exclamation mark is that '!' is a special
character in the solr query syntax. And since we want to maintain being
able to run arbitrary solr searches through the package search, we
can't escape any special characters automatically. (As they may be
part of a genuine solr query expression).
The reason for this test is two-fold:
1. It asserts that it's possible to find a packages with a special
character within it, as long as the query is correct.
2. It exhibits this behaviour of it being necessary to escape special
solr characters.
"""
result = search.query_for(model.Package).run({'q': u'surprise\\!'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def dont_test_tags_field_with_basic_unicode(self):
result = search.query_for(model.Package).run({'q': u'greek omega \u03a9'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def test_tags_token_simple(self):
result = search.query_for(model.Package).run({'q': u'tags:country-sweden'})
assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
result = search.query_for(model.Package).run({'q': u'tags:wildlife'})
assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)

def test_tags_token_with_multi_word_tag(self):
result = search.query_for(model.Package).run({'q': u'tags:"todo split"'})
assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result)

def test_tags_token_simple_with_deleted_tag(self):
# registry has been deleted
result = search.query_for(model.Package).run({'q': u'tags:registry'})
Expand All @@ -132,11 +169,25 @@ def test_tags_token_simple_with_deleted_tag(self):
def test_tags_token_multiple(self):
result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:format-pdf'})
assert self._pkg_names(result) == 'se-publications', self._pkg_names(result)
result = search.query_for(model.Package).run({'q': u'tags:"todo split" tags:war'})
assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)

def test_tags_token_complicated(self):
result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:somethingrandom'})
assert self._pkg_names(result) == '', self._pkg_names(result)

def test_tags_token_with_capitals(self):
result = search.query_for(model.Package).run({'q': u'tags:"CAPITALS"'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def test_tags_token_with_punctuation(self):
result = search.query_for(model.Package).run({'q': u'tags:"surprise!"'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def dont_test_tags_token_with_basic_unicode(self):
result = search.query_for(model.Package).run({'q': u'tags:"greek omega \u03a9"'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def test_pagination(self):
# large search
all_results = search.query_for(model.Package).run({'q': self.q_all})
Expand Down Expand Up @@ -300,6 +351,8 @@ def test_overall(self):
self._check_search_results('groups:david', 2)
self._check_search_results('groups:roger', 1)
self._check_search_results('groups:lenny', 0)
self._check_search_results('tags:"russian"', 2)
self._check_search_results(u'tags:"Flexible \u0489!"', 2)


class TestGeographicCoverage(TestController):
Expand Down Expand Up @@ -419,10 +472,10 @@ def setup_class(cls):
setup_test_search_index()
init_data = [{'name':u'test1-penguin-canary',
'title':u'penguin',
'tags':u'canary goose squirrel wombat wombat'},
'tags':u'canary goose squirrel wombat wombat'.split()},
{'name':u'test2-squirrel-squirrel-canary-goose',
'title':u'squirrel goose',
'tags':u'penguin wombat'},
'tags':u'penguin wombat'.split()},
]
CreateTestData.create_arbitrary(init_data)
cls.pkg_names = [
Expand Down

0 comments on commit 15345e3

Please sign in to comment.