-
Notifications
You must be signed in to change notification settings - Fork 87
/
queries.py
486 lines (402 loc) · 17.2 KB
/
queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
"""
Allow us to make search queries
"""
import datetime
import operator
from collections import defaultdict
from functools import reduce
from django.contrib.contenttypes.models import ContentType
from django.db import models as djangomodels
from django.db.models import Q, Max
from django.conf import settings
from opal import models
from opal.core import fields, subrecords
from opal.utils import stringport
from opal.core.search.search_rule import SearchRule
def get_model_name_from_column_name(column_name):
return column_name.replace(' ', '').replace('_', '').lower()
def get_model_from_api_name(column_name):
if column_name == "tagging":
return models.Tagging
else:
return subrecords.get_subrecord_from_api_name(column_name)
class PatientSummary(object):
def __init__(self, patient, episodes):
start_dates = [i.start for i in episodes if i.start]
self.start = None
if len(start_dates) > 0:
self.start = min(start_dates)
end_dates = [i.end for i in episodes if i.end]
self.end = None
if len(end_dates) > 0:
self.end = max(end_dates)
self.patient_id = patient.id
demographics = patient.demographics_set.all()[0]
self.first_name = demographics.first_name
self.surname = demographics.surname
self.hospital_number = demographics.hospital_number
self.date_of_birth = demographics.date_of_birth
self.categories = list(sorted(set([
episode.category_name for episode in episodes
])))
self.count = len(episodes)
def to_dict(self):
keys = [
"patient_id", "start", "end", "first_name",
"surname", "hospital_number", "date_of_birth",
"categories", "count"
]
return {key: getattr(self, key) for key in keys}
def episodes_for_user(episodes, user):
"""
Given an iterable of EPISODES and a USER, return a filtered
list of episodes that this user has the permissions to know
about.
"""
return [e for e in episodes if e.visible_to(user)]
class QueryBackend(object):
"""
Base class for search implementations to inherit from
"""
def __init__(self, user, query):
self.user = user
self.query = query
def fuzzy_query(self):
raise NotImplementedError()
def get_episodes(self):
raise NotImplementedError()
def description(self):
raise NotImplementedError()
def get_patients(self):
raise NotImplementedError()
def get_patient_summaries(self):
raise NotImplementedError()
def patients_as_json(self):
patients = self.get_patients()
return [
p.to_dict(self.user) for p in patients
]
class DatabaseQuery(QueryBackend):
"""
The default built in query backend for OPAL allows advanced search
criteria building.
We broadly map reduce all criteria then the set of combined and/or
criteria together, then only unique episodes.
Finally we filter based on episode type level restrictions.
"""
patient_summary_class = PatientSummary
def fuzzy_query(self):
"""
Fuzzy queries break apart the query string by spaces and search a
number of fields based on the underlying tokens.
We then search hospital number, first name and surname by those fields
and order by the occurances
so if you put in Anna Lisa, even though this is a first name split
becasuse Anna and Lisa will both be found, this will rank higher
than an Anna or a Lisa, although both of those will also be found
it returns a list of patients ordered by their most recent episode id
"""
some_query = self.query
patients = models.Patient.objects.search(some_query)
patients = patients.annotate(
max_episode_id=Max('episode__id')
)
return patients.order_by("-max_episode_id")
def _episodes_for_filter_kwargs(self, filter_kwargs, model):
"""
For a given MODEL, return the Episodes that match for FILTER_KWARGS,
understanding how to handle both EpispdeSubrecord and PatientSubrecord
appropriately.
"""
if issubclass(model, models.EpisodeSubrecord):
return models.Episode.objects.filter(**filter_kwargs)
elif issubclass(model, models.PatientSubrecord):
pats = models.Patient.objects.filter(**filter_kwargs)
return models.Episode.objects.filter(
patient__in=pats
)
def _episodes_for_boolean_fields(self, query, field, contains):
model = get_model_from_api_name(query['column'])
model_name = get_model_name_from_column_name(query['column'])
val = query['query'] == 'true'
kw = {'{0}__{1}'.format(model_name, field): val}
return self._episodes_for_filter_kwargs(kw, model)
def _episodes_for_number_fields(self, query, field, contains):
model = get_model_from_api_name(query['column'])
model_name = get_model_name_from_column_name(query['column'])
if query['queryType'] == 'Greater Than':
qtype = '__gt'
elif query['queryType'] == 'Less Than':
qtype = '__lt'
kw = {'{0}__{1}{2}'.format(model_name, field, qtype): query['query']}
return self._episodes_for_filter_kwargs(kw, model)
def _episodes_for_date_fields(self, query, field, contains):
model = get_model_from_api_name(query['column'])
model_name = get_model_name_from_column_name(query['column'])
qtype = ''
val = datetime.datetime.strptime(query['query'], "%d/%m/%Y")
if query['queryType'] == 'Before':
qtype = '__lte'
elif query['queryType'] == 'After':
qtype = '__gte'
elif query['queryType'] == 'Equals':
qtype = ''
kw = {'{0}__{1}{2}'.format(model_name, field, qtype): val}
return self._episodes_for_filter_kwargs(kw, model)
def _get_lookuplist_names_for_query_string(
self, lookuplist, query_string, contains):
"""
Returns a list of canonical terms from a given LOOKUPLIST that match
QUERY_STRING respecting CONTAINS - which will be one of:
'__iexact'
'__icontains'
"""
from opal.models import Synonym
content_type = ContentType.objects.get_for_model(lookuplist)
filter_key_words = dict(content_type=content_type)
filter_key_words["name{0}".format(contains)] = query_string
synonyms = Synonym.objects.filter(**filter_key_words)
return [synonym.content_object.name for synonym in synonyms]
def _episodes_for_fkft_many_to_many_fields(
self, query, field, contains, Mod
):
"""
Returns episodes that match QUERY.
We are dealing with Django ManyToMany fields that link a subrecord
to an Opal Lookuplist.
We need to construct a database query that will match episodes where:
1) The .name attribute of the FK target matches the query string
2) A synonym of the FK target matches the query string
"""
# looks for subrecords with many to many relations to the
# fk or ft fields.
related_query_name = Mod._meta.model_name
if issubclass(Mod, models.EpisodeSubrecord):
qs = models.Episode.objects.all()
elif issubclass(Mod, models.PatientSubrecord):
qs = models.Patient.objects.all()
lookuplist = getattr(Mod, field).field.related_model
lookuplist_names = self._get_lookuplist_names_for_query_string(
lookuplist, query['query'], contains
)
# 1)
non_synonym_query = {
'{0}__{1}__name{2}'.format(
related_query_name, field, contains
): query['query']
}
q_objects = [Q(**non_synonym_query)]
# 2)
if query["queryType"] == "Contains":
# add in those that have synonyms that contain the query
# expression
for name in lookuplist_names:
keyword = "{0}__{1}__name".format(
related_query_name, field
)
q_objects.append(Q(**{keyword: name}))
else:
if lookuplist_names:
synonym_equals = {
'{0}__{1}__name'.format(
related_query_name, field
): lookuplist_names[0]
# Only one lookuplist entry can have matched because
# we're after an exact match on the query string rather
# than looking for all matches inside synonym names so
# we just take the [0]
}
q_objects.append(Q(**synonym_equals))
qs = qs.filter(reduce(operator.or_, q_objects)).distinct()
if qs.model == models.Episode:
return qs
else:
# otherwise its a patient
return models.Episode.objects.filter(patient__in=qs).distinct()
def _episodes_for_fkorft_fields(self, query, field, contains, Mod):
"""
Returns episodes that match QUERY.
We are dealing with the Opal FreeTextOrForeignKey field.
We need to construct a database query that will match episodes where:
1) The free text value matches the query string
2) The name of the foreign key value matches the query string
- 2.1) This may be the canonical form (the .name attribute)
- 2.2) This may be a synonymous form (a Synonym with a content_type)
that matches FIELD.foreign_model
"""
related_query_name = Mod._meta.model_name
if issubclass(Mod, models.EpisodeSubrecord):
qs = models.Episode.objects.all()
elif issubclass(Mod, models.PatientSubrecord):
qs = models.Patient.objects.all()
# 1)
free_text_query = {
'{0}__{1}_ft{2}'.format(
related_query_name, field, contains
): query['query']
}
# get all synonyms, if this is an 'Equal' query,
# the return should be a list containing a single response.
# Otherwise it's all of names of fields that have synonyms
# that contain the query
lookuplist_names = self._get_lookuplist_names_for_query_string(
getattr(Mod, field).foreign_model, query['query'], contains
)
# 2.1)
foreign_key_query = {
'{0}__{1}_fk__name{2}'.format(
related_query_name, field, contains
): query['query']
}
q_objects = [Q(**foreign_key_query), Q(**free_text_query)]
# 2.2
if query["queryType"] == "Contains":
# add in those that have synonyms that contain the query
# expression
for name in lookuplist_names:
keyword = "{0}__{1}_fk__name".format(
related_query_name, field
)
q_objects.append(Q(**{keyword: name}))
else:
if lookuplist_names:
synonym_equals = {
'{0}__{1}_fk__name'.format(
related_query_name, field
# Only one lookuplist entry can have matched because
# we're after an exact match on the query string rather
# than looking for all matches inside synonym names so
# we just take the [0]
): lookuplist_names[0]
}
q_objects.append(Q(**synonym_equals))
qs = qs.filter(reduce(operator.or_, q_objects)).distinct()
if qs.model == models.Episode:
return qs
else:
# otherwise its a patient
return models.Episode.objects.filter(patient__in=qs).distinct()
def episodes_for_criteria(self, criteria):
"""
Given one set of criteria, return episodes that match it.
"""
query = criteria
querytype = query['queryType']
contains = '__iexact'
if querytype == 'Contains':
contains = '__icontains'
column_name = query['column']
search_rule = SearchRule.get(column_name)
if search_rule:
return search_rule().query(query)
field = query['field'].replace(' ', '_').lower()
Mod = get_model_from_api_name(column_name)
named_fields = [f for f in Mod._meta.fields if f.name == field]
if len(named_fields) == 1 and isinstance(named_fields[0],
djangomodels.BooleanField):
eps = self._episodes_for_boolean_fields(query, field, contains)
elif len(named_fields) == 1 and isinstance(named_fields[0],
djangomodels.DateField):
eps = self._episodes_for_date_fields(query, field, contains)
elif len(named_fields) == 1 and fields.is_numeric(named_fields[0]):
eps = self._episodes_for_number_fields(query, field, contains)
elif hasattr(Mod, field) and isinstance(getattr(Mod, field),
fields.ForeignKeyOrFreeText):
eps = self._episodes_for_fkorft_fields(query, field, contains, Mod)
elif hasattr(Mod, field) and isinstance(Mod._meta.get_field(field),
djangomodels.ManyToManyField):
eps = self._episodes_for_fkft_many_to_many_fields(
query, field, contains, Mod
)
else:
model_name = get_model_name_from_column_name(query['column'])
queryset_path = '{0}__{1}{2}'.format(model_name, field, contains)
kw = {queryset_path: query['query']}
if Mod == models.Tagging:
if query['field'] == "mine":
tags = models.Tagging.objects.filter(
value="mine",
user=self.user
)
eps = models.Episode.objects.filter(
tagging__in=tags
)
else:
tag_name = query['field'].replace(" ", "_").title()
eps = models.Episode.objects.filter(
tagging__value__iexact=tag_name
)
elif issubclass(Mod, models.EpisodeSubrecord):
eps = models.Episode.objects.filter(**kw)
elif issubclass(Mod, models.PatientSubrecord):
pats = models.Patient.objects.filter(**kw)
eps = []
for p in pats:
eps += list(p.episode_set.all())
return eps
def get_aggregate_patients_from_episodes(self, episodes):
patient_to_episodes = defaultdict(set)
result = []
for episode in episodes:
patient_to_episodes[episode.patient].add(episode)
for patient, episodes in patient_to_episodes.items():
patient_summary = self.patient_summary_class(patient, episodes)
result.append(patient_summary.to_dict())
return result
def _episodes_without_restrictions(self):
all_matches = [
(q['combine'], self.episodes_for_criteria(q))
for q in self.query
]
if not all_matches:
return []
working = set(all_matches[0][1])
rest = all_matches[1:]
for combine, episodes in rest:
methods = {
'and': 'intersection',
'or' : 'union',
'not': 'difference'
}
working = getattr(set(episodes), methods[combine])(working)
return working
def get_episodes(self):
return episodes_for_user(
self._episodes_without_restrictions(), self.user)
def get_patient_summaries(self):
eps = self._episodes_without_restrictions()
episode_ids = [e.id for e in eps]
# get all episodes of patients, that have episodes that
# match the criteria
all_eps = models.Episode.objects.filter(
patient__episode__in=episode_ids
)
filtered_eps = episodes_for_user(all_eps, self.user)
return self.get_aggregate_patients_from_episodes(filtered_eps)
def get_patients(self):
patients = set(e.patient for e in self.get_episodes())
return list(patients)
def description(self):
"""
Provide a textual description of the current search
"""
filteritem = "{combine} {column} {field} {queryType} {query}"
filters = "\n".join(
filteritem.format(**f) for f in self.query
)
return """{username} ({date})
Searching for:
{filters}
""".format(username=self.user.username,
date=datetime.datetime.now(),
filters=filters)
def create_query(user, criteria):
"""
gives us a level of indirection to select the search backend we're
going to use, without this we can get import errors if the module is
loaded after this module
"""
if hasattr(settings, "OPAL_SEARCH_BACKEND"):
query_backend = stringport(settings.OPAL_SEARCH_BACKEND)
return query_backend(user, criteria)
return DatabaseQuery(user, criteria)