Skip to content

Commit

Permalink
[tagname search] Made /tag search case-insensitive.
Browse files Browse the repository at this point in the history
 - This affects tagname autocomplete
 - Also fixed up a bug where '_' and '%' weren't being escaped in SQL LIKE expressions
  • Loading branch information
icmurray committed Nov 22, 2011
1 parent 3775845 commit d074ef2
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 19 deletions.
4 changes: 3 additions & 1 deletion ckan/logic/action/get.py
Expand Up @@ -6,6 +6,7 @@
import ckan
from ckan.logic import NotFound
from ckan.logic import check_access
from ckan.model import misc
from ckan.plugins import (PluginImplementations,
IGroupController,
IPackageController)
Expand Down Expand Up @@ -789,7 +790,8 @@ def tag_search(context, data_dict):
return

for term in terms:
q = q.filter(model.Tag.name.contains(term))
escaped_term = misc.escape_sql_like_special_characters(term, escape='\\')
q = q.filter(model.Tag.name.ilike('%' + escaped_term + '%'))

count = q.count()
q = q.offset(offset)
Expand Down
17 changes: 17 additions & 0 deletions ckan/model/misc.py
@@ -0,0 +1,17 @@
"""
Contains miscelaneous set of DB-related functions
"""

import re

_special_characters = '%_'
def escape_sql_like_special_characters(term, escape='\\'):
"""
Escapes characters that are special to the the sql LIKE expression.
In particular, for both postgres and sqlite this means '%' and '_'.
"""
for ch in escape + _special_characters:
term = term.replace(ch, escape+ch)
return term

27 changes: 13 additions & 14 deletions ckan/tests/functional/api/test_action.py
Expand Up @@ -663,13 +663,6 @@ def test_15_tag_autocomplete_tag_with_punctuation(self):
def test_15_tag_autocomplete_tag_with_capital_letters(self):
"""
Asserts autocomplete finds tags that contain capital letters
Note : the only reason this test passes for now is that the
search is for a lower cases substring. If we had searched
for "CAPITAL" then it would fail. This is because currently
the search API lower cases all search terms. There's a
sister test (`test_15_tag_autocomplete_search_with_capital_letters`)
that tests that functionality.
"""

CreateTestData.create_arbitrary([{
Expand Down Expand Up @@ -738,13 +731,6 @@ def test_15_tag_autocomplete_search_with_punctuation(self):
def test_15_tag_autocomplete_search_with_capital_letters(self):
"""
Asserts that a search term containing capital letters works correctly
NOTE - this test FAILS. This is because I haven't implemented the
search side of flexible tags yet.
NOTE - when this test is fixed, remove the NOTE in
`test_15_tag_autocomplete_tag_with_capital_letters` that
references this one.
"""

CreateTestData.create_arbitrary([{
Expand All @@ -759,6 +745,19 @@ def test_15_tag_autocomplete_search_with_capital_letters(self):
assert res_obj['success']
assert 'CAPITAL idea old chap' in res_obj['result'], res_obj['result']

def test_15_tag_autocomplete_is_case_insensitive(self):
CreateTestData.create_arbitrary([{
'name': u'package-with-tag-that-has-a-capital-letter-3',
'tags': [u'MIX of CAPITALS and LOWER case'],
'license': 'never_heard_of_it',
}])

postparams = '%s=1' % json.dumps({'q':u'lower case'})
res = self.app.post('/api/action/tag_autocomplete', params=postparams)
res_obj = json.loads(res.body)
assert res_obj['success']
assert 'MIX of CAPITALS and LOWER case' in res_obj['result'], res_obj['result']

def test_16_user_autocomplete(self):
#Empty query
postparams = '%s=1' % json.dumps({})
Expand Down
7 changes: 5 additions & 2 deletions ckan/tests/lib/test_solr_package_search.py
Expand Up @@ -164,10 +164,10 @@ def test_tags_token_with_punctuation(self):
result = search.query_for(model.Package).run({'q': u'tags:"surprise."'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def dont_test_tags_token_with_basic_unicode(self):
def test_tags_token_with_basic_unicode(self):
result = search.query_for(model.Package).run({'q': u'tags:"greek omega \u03a9"'})
assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)

def test_pagination(self):
# large search
all_results = search.query_for(model.Package).run({'q': self.q_all})
Expand Down Expand Up @@ -333,6 +333,9 @@ def test_overall(self):
self._check_search_results('groups:lenny', 0)
self._check_search_results('tags:"russian"', 2)
self._check_search_results(u'tags:"Flexible \u30a1"', 2)
self._check_search_results(u'Flexible \u30a1', 2)
self._check_search_results(u'Flexible', 2)
self._check_search_results(u'flexible', 2)


class TestGeographicCoverage(TestController):
Expand Down
4 changes: 2 additions & 2 deletions ckan/tests/lib/test_tag_search.py
Expand Up @@ -51,9 +51,9 @@ def test_search_with_unicode_in_search_query(self):
result = search.query_for(model.Tag).run(query=u' \u30a1')
assert u'Flexible \u30a1' in result['results']

def test_search_is_case_sensitive(self):
def test_search_is_case_insensitive(self):
result = search.query_for(model.Tag).run(query=u'flexible')
assert u'Flexible \u30a1' not in result['results']
assert u'Flexible \u30a1' in result['results']


def test_good_search_fields(self):
Expand Down
45 changes: 45 additions & 0 deletions ckan/tests/models/test_misc.py
@@ -1,5 +1,8 @@
from nose.tools import assert_equal

from ckan.tests import *
import ckan.model as model
from ckan.model.misc import escape_sql_like_special_characters

class TestRevisionExtraAttributes:
@classmethod
Expand All @@ -23,3 +26,45 @@ def test_revision_user(self):
assert rev.user is not None, rev
assert rev.user.name == rev.author

_sql_escape = escape_sql_like_special_characters
class TestEscapeSqlLikeCharacters(object):
"""
Tests for model.misc.escape_sql_like_special_characters
"""

def test_identity(self):
"""Asserts that it escapes nothing if nothing needs escaping"""
terms = ['',
'word',
'two words']
for term, expected_term in zip(terms, terms):
assert_equal(_sql_escape(term), expected_term)

def test_escape_chararacter_is_escaped(self):
"""Asserts that the escape character is escaped"""
term = r'backslash \ character'
assert_equal (_sql_escape(term, escape='\\'),
r'backslash \\ character')

term = 'surprise!'
assert_equal (_sql_escape(term, escape='!'),
r'surprise!!')

def test_default_escape_character_is_a_backslash(self):
"""Asserts that the default escape character is the backslash"""
term = r'backslash \ character'
assert_equal (_sql_escape(term),
r'backslash \\ character')

def test_sql_like_special_characters_are_escaped(self):
"""Asserts that '%' and '_' are escaped correctly"""
terms = [
(r'percents %', r'percents \%'),
(r'underscores _', r'underscores \_'),
(r'backslash \ ', r'backslash \\ '),
(r'all three \ _%', r'all three \\ \_\%'),
]

for term, expected_result in terms:
assert_equal(_sql_escape(term), expected_result)

0 comments on commit d074ef2

Please sign in to comment.