/
models.py
359 lines (273 loc) · 13.2 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# -*- coding: utf-8 -*-
from itertools import repeat
import six
from django.db import models, connections
from django.db.models.query import QuerySet
from django.utils.encoding import smart_text
from djorm_pgfulltext.utils import adapt
# Compatibility import and fixes section.
try:
from django.db.transaction import atomic
except ImportError:
# This encapsulates pre django 1.6 transaction
# behavior under same abstraction as django 1.6 atomic
# decorator. This not intends to emulate django 1.6 atomic
# behavior, only has partially same interface for easy
# use.
from django.db import transaction
class atomic(object):
def __init__(self, using=None):
self.using = using
def __enter__(self):
if not transaction.is_managed(using=self.using):
transaction.enter_transaction_management(using=self.using)
self.forced_managed = True
else:
self.forced_managed = False
def __exit__(self, *args, **kwargs):
try:
if self.forced_managed:
transaction.commit(using=self.using)
else:
transaction.commit_unless_managed(using=self.using)
finally:
if self.forced_managed:
transaction.leave_transaction_management(using=self.using)
def auto_update_search_field_handler(sender, instance, *args, **kwargs):
instance.update_search_field()
class SearchManagerMixIn(object):
"""
A mixin to create a Manager with a 'search' method that may do a full text search
on the model.
The manager is set up with a list of one or more model's fields that will be searched.
It can be a list of field names, or a list of tuples (field_name, weight). It can also
be None, in that case every CharField and TextField in the model will be searched.
You can also give a 'search_field', a VectorField into where the values of the searched
fields are copied and normalized. If you give it, the searches will be made on this
field; if not, they will be made directly in the searched fields.
When using search_field, if auto_update = True, Django signals will be used to
automatically syncronize the search_field with the searched fields every time instances
are saved. If not, you can call to 'update_search_field' method in model instances to do this.
If search_field not used, both auto_update and update_search_field does nothing. Alternatively,
you can create a postgresql trigger to do the syncronization at database level, see this:
http://www.postgresql.org/docs/9.1/interactive/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
In both cases, you should create a text search index, on either the searched fields or
the compound search_field, like explained here:
http://www.postgresql.org/docs/9.1/interactive/textsearch-tables.html#TEXTSEARCH-TABLES-INDEX
Finally, you can give a 'config', the Postgres text search configuration that will be used
to normalize the search_field and the queries. How do you can create a configuration:
http://www.postgresql.org/docs/9.1/interactive/textsearch-configuration.html
Note that 'config' can be a tuple as in ('pg_catalog.english', 'pg_catalog.simple').
In this case, fields are tokenized using each of the tokenizers specified in 'config'
and the result is contatenated. This allows you to create tsvector with multiple configs.
To do all those actions in database, create a setup sql script for Django:
https://docs.djangoproject.com/en/1.4/howto/initial-data/#providing-initial-sql-data
"""
def __init__(self,
fields=None,
search_field='search_index',
config='pg_catalog.english',
auto_update_search_field=False):
self.search_field = search_field
self.default_weight = 'D'
self.config = config
self.auto_update_search_field = auto_update_search_field
self._fields = fields
super(SearchManagerMixIn, self).__init__()
def contribute_to_class(self, cls, name):
'''
Called automatically by Django when setting up the model class.
'''
if not cls._meta.abstract:
# Attach this manager as _fts_manager in the model class.
if not getattr(cls, '_fts_manager', None):
cls._fts_manager = self
# Add 'update_search_field' instance method, that calls manager's update_search_field.
if not getattr(cls, 'update_search_field', None):
def update_search_field(self, search_field=None, fields=None, using=None, config=None, extra=None):
self._fts_manager.update_search_field(
pk=self.pk, search_field=search_field, fields=fields, using=using, config=config, extra=extra
)
setattr(cls, 'update_search_field', update_search_field)
if self.auto_update_search_field:
models.signals.post_save.connect(auto_update_search_field_handler, sender=cls)
super(SearchManagerMixIn, self).contribute_to_class(cls, name)
def get_queryset(self):
return SearchQuerySet(model=self.model, using=self._db)
def search(self, *args, **kwargs):
return self.get_queryset().search(*args, **kwargs)
def update_search_field(self, pk=None, search_field=None, fields=None, config=None, using=None, extra=None):
"""
Update the search_field of one instance, or a list of instances, or
all instances in the table (pk is one key, a list of keys or none).
If there is no search_field, this function does nothing.
:param pk: Primary key of instance
:param search_field: search_field which will be updated
:param fields: fields from which we update the search_field
:param config: config of full text search
:param using: DB we are using
"""
if not search_field:
search_field = self.search_field
if not search_field:
return
if fields is None:
fields = self._fields
if not config:
config = self.config
if using is None:
using = self.db
connection = connections[using]
qn = connection.ops.quote_name
where_sql = ''
params = []
if pk is not None:
if isinstance(pk, (list, tuple)):
params = pk
else:
params = [pk]
where_sql = "WHERE %s IN (%s)" % (
qn(self.model._meta.pk.column),
','.join(repeat("%s", len(params)))
)
search_vector = self._get_search_vector(config, using, fields=fields, extra=extra)
sql = "UPDATE %s SET %s = %s %s;" % (
qn(self.model._meta.db_table),
qn(search_field),
search_vector or "''",
where_sql
)
with atomic():
cursor = connection.cursor()
cursor.execute(sql, params)
def _find_text_fields(self):
fields = [f for f in self.model._meta.fields
if isinstance(f, (models.CharField, models.TextField))]
return [(f.name, None) for f in fields]
def _parse_fields(self, fields):
"""
Parse fields list into a correct format needed by this manager.
If any field does not exist, raise ValueError.
"""
parsed_fields = set()
if fields is not None and isinstance(fields, (list, tuple)):
if len(fields) > 0 and isinstance(fields[0], (list, tuple)):
parsed_fields.update(fields)
else:
parsed_fields.update([(x, None) for x in fields])
# Does not support field.attname.
field_names = set(field.name for field in self.model._meta.fields if not field.primary_key)
non_model_fields = set(x[0] for x in parsed_fields).difference(field_names)
if non_model_fields:
raise ValueError("The following fields do not exist in this"
" model: {0}".format(", ".join(x for x in non_model_fields)))
else:
parsed_fields.update(self._find_text_fields())
return parsed_fields
def _get_search_vector(self, configs, using, fields=None, extra=None):
if fields is None:
vector_fields = self._parse_fields(self._fields)
else:
vector_fields = self._parse_fields(fields)
if isinstance(configs, six.string_types[0]):
configs = [configs]
search_vector = []
for config in configs:
for field_name, weight in vector_fields:
search_vector.append(
self._get_vector_for_field(field_name, weight=weight, config=config, using=using, extra=extra)
)
return ' || '.join(search_vector)
def _get_vector_for_field(self, field_name, weight=None, config=None, using=None, extra=None):
if not weight:
weight = self.default_weight
if not config:
config = self.config
if using is None:
using = self.db
field = self.model._meta.get_field(field_name)
ret = None
if hasattr(self.model, '_convert_field_to_db'):
ret = self.model._convert_field_to_db(field, weight, config, using, extra=extra)
if ret is None:
ret = self._convert_field_to_db(field, weight, config, using, extra=extra)
return ret
@staticmethod
def _convert_field_to_db(field, weight, config, using, extra=None):
connection = connections[using]
qn = connection.ops.quote_name
return "setweight(to_tsvector('%s', coalesce(%s.%s, '')), '%s')" % \
(config, qn(field.model._meta.db_table), qn(field.column), weight)
class SearchQuerySet(QuerySet):
@property
def manager(self):
return self.model._fts_manager
@property
def db(self):
return self._db or self.manager.db
def search(self, query, rank_field=None, rank_function='ts_rank', config=None,
rank_normalization=32, raw=False, using=None, fields=None,
headline_field=None, headline_document=None):
'''
Convert query with to_tsquery or plainto_tsquery, depending on raw is
`True` or `False`, and return a QuerySet with the filter.
If `rank_field` is not `None`, a field with this name will be added
containing the search rank of the instances, and the queryset will be
ordered by it. The rank_function and normalization are explained here:
http://www.postgresql.org/docs/9.1/interactive/textsearch-controls.html#TEXTSEARCH-RANKING
If an empty query is given, no filter is made so the QuerySet will
return all model instances.
If `fields` is not `None`, the filter is made with this fields instead
of defined on a constructor of manager.
If `headline_field` and `headline_document` is not `None`, a field with
this `headline_field` name will be added containing the headline of the
instances, which will be searched inside `headline_document`.
Search headlines are explained here:
http://www.postgresql.org/docs/9.1/static/textsearch-controls.html#TEXTSEARCH-HEADLINE
'''
if not config:
config = self.manager.config
db_alias = using if using is not None else self.db
connection = connections[db_alias]
qn = connection.ops.quote_name
qs = self
if using is not None:
qs = qs.using(using)
if query:
function = "to_tsquery" if raw else "plainto_tsquery"
ts_query = smart_text(
"%s('%s', %s)" % (function, config, adapt(query))
)
full_search_field = "%s.%s" % (
qn(self.model._meta.db_table),
qn(self.manager.search_field)
)
# if fields is passed, obtain a vector expression with
# these fields. In other case, intent use of search_field if
# exists.
if fields:
search_vector = self.manager._get_search_vector(config, using, fields=fields)
else:
if not self.manager.search_field:
raise ValueError("search_field is not specified")
search_vector = full_search_field
where = " (%s) @@ (%s)" % (search_vector, ts_query)
select_dict, order = {}, []
if rank_field:
select_dict[rank_field] = '%s(%s, %s, %d)' % (
rank_function,
search_vector,
ts_query,
rank_normalization
)
order = ['-%s' % (rank_field,)]
if headline_field is not None and headline_document is not None:
select_dict[headline_field] = "ts_headline('%s', %s, %s)" % (
config,
headline_document,
ts_query
)
qs = qs.extra(select=select_dict, where=[where], order_by=order)
return qs
class SearchManager(SearchManagerMixIn, models.Manager):
pass