Skip to content
This repository
Fetching contributors…

Cannot retrieve contributors at this time

file 148 lines (112 sloc) 4.385 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
import re
import warnings
from django.utils.encoding import force_unicode


class BaseInput(object):
    """
The base input type. Doesn't do much. You want ``Raw`` instead.
"""
    input_type_name = 'base'
    post_process = True

    def __init__(self, query_string, **kwargs):
        self.query_string = query_string
        self.kwargs = kwargs

    def __repr__(self):
        return u"<%s '%s'>" % (self.__class__.__name__, self.__unicode__().encode('utf8'))

    def __unicode__(self):
        return force_unicode(self.query_string)

    def prepare(self, query_obj):
        return self.query_string


class Raw(BaseInput):
    """
An input type for passing a query directly to the backend.

Prone to not being very portable.
"""
    input_type_name = 'raw'
    post_process = False


class PythonData(BaseInput):
    """
Represents a bare Python non-string type.

Largely only for internal use.
"""
    input_type_name = 'python_data'


class Clean(BaseInput):
    """
An input type for sanitizing user/untrusted input.
"""
    input_type_name = 'clean'

    def prepare(self, query_obj):
        query_string = super(Clean, self).prepare(query_obj)
        return query_obj.clean(query_string)


class Exact(BaseInput):
    """
An input type for making exact matches.
"""
    input_type_name = 'exact'

    def prepare(self, query_obj):
        query_string = super(Exact, self).prepare(query_obj)

        if self.kwargs.get('clean', False):
            # We need to clean each part of the exact match.
            exact_bits = [Clean(bit).prepare(query_obj) for bit in query_string.split(' ') if bit]
            query_string = u' '.join(exact_bits)

        return query_obj.build_exact_query(query_string)


class Not(Clean):
    """
An input type for negating a query.
"""
    input_type_name = 'not'

    def prepare(self, query_obj):
        query_string = super(Not, self).prepare(query_obj)
        return query_obj.build_not_query(query_string)


class AutoQuery(BaseInput):
    """
A convenience class that handles common user queries.

In addition to cleaning all tokens, it handles double quote bits as
exact matches & terms with '-' in front as NOT queries.
"""
    input_type_name = 'auto_query'
    post_process = False
    exact_match_re = re.compile(r'"(?P<phrase>.*?)"')

    def prepare(self, query_obj):
        query_string = super(AutoQuery, self).prepare(query_obj)
        exacts = self.exact_match_re.findall(query_string)
        tokens = []
        query_bits = []

        for rough_token in self.exact_match_re.split(query_string):
            if not rough_token:
                continue
            elif not rough_token in exacts:
                # We have something that's not an exact match but may have more
                # than on word in it.
                tokens.extend(rough_token.split(' '))
            else:
                tokens.append(rough_token)

        for token in tokens:
            if not token:
                continue
            if token in exacts:
                query_bits.append(Exact(token, clean=True).prepare(query_obj))
            elif token.startswith('-') and len(token) > 1:
                # This might break Xapian. Check on this.
                query_bits.append(Not(token[1:]).prepare(query_obj))
            else:
                query_bits.append(Clean(token).prepare(query_obj))

        return u' '.join(query_bits)


class AltParser(BaseInput):
    """
If the engine supports it, this input type allows for submitting a query
that uses a different parser.
"""
    input_type_name = 'alt_parser'
    post_process = False
    use_parens = False

    def __init__(self, parser_name, query_string='', **kwargs):
        self.parser_name = parser_name
        self.query_string = query_string
        self.kwargs = kwargs

    def __repr__(self):
        return u"<%s '%s' '%s' '%s'>" % (self.__class__.__name__, self.parser_name, self.query_string, self.kwargs)

    def prepare(self, query_obj):
        if not hasattr(query_obj, 'build_alt_parser_query'):
            warnings.warn("Use of 'AltParser' input type is being ignored, as the '%s' backend doesn't support them." % query_obj)
            return ''

        return query_obj.build_alt_parser_query(self.parser_name, self.query_string, **self.kwargs)
Something went wrong with that request. Please try again.