Skip to content

Commit

Permalink
Merge pull request #312 from lookit/feature/141-add-checkboxes-for-ci…
Browse files Browse the repository at this point in the history
…rcumstances

Query DSL replacing EligibilityModel workflow
  • Loading branch information
Datamance committed Aug 8, 2019
2 parents f6b52fc + 107c844 commit 046c82a
Show file tree
Hide file tree
Showing 17 changed files with 784 additions and 179 deletions.
4 changes: 4 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ pgcli = "*"
pyopenssl = "*"
pyinstrument = "*"
importanize = "*"
autopep8 = "*"
docformatter = "*"
yapf = "*"

[packages]
djangorestframework-jsonapi = "==2.7.0"
Expand Down Expand Up @@ -57,6 +60,7 @@ psycopg2 = "==2.7.5"
google-cloud-storage = "==1.14.0"
docker = "==3.7.2"
django-bitfield = "==1.9.3"
lark-parser = "==0.7.2"

[requires]
python_version = "3.6"
240 changes: 142 additions & 98 deletions Pipfile.lock

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions accounts/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,14 @@ class Meta:
"birthday": "Birthday",
"gestational_age_at_birth": "Gestational Age at Birth",
"additional_information": "Any additional information you'd like us to know",
"existing_conditions": "Characteristics and conditions",
"languages_spoken": "Languages this child is exposed to at home, school, or with another caregiver.",
}

help_texts = {
"given_name": "This lets you select the correct child to participate in a particular study. A nickname or initials are fine! We may include your child's name in email to you (for instance, \"There's a new study available for Molly!\") but will never publish names or use them in our research.",
"additional_information": "for instance, diagnosed developmental disorders or vision or hearing problems",
"additional_information": "For instance, diagnosed developmental disorders or vision or hearing problems",
"gestational_age_at_birth": "Please round down to the nearest full week of pregnancy completed",
}

widgets = {
Expand Down Expand Up @@ -248,11 +251,13 @@ class Meta:
"birthday": "Birthday",
"gestational_age_at_birth": "Gestational Age at Birth",
"additional_information": "Any additional information you'd like us to know",
"existing_conditions": "Characteristics and conditions",
"languages_spoken": "Languages this child is exposed to at home, school, or with another caregiver.",
}

help_texts = {
"given_name": "This lets you select the correct child to participate in a particular study. A nickname or initials are fine! We may include your child's name in email to you (for instance, \"There's a new study available for Molly!\") but will never publish names or use them in our research.",
"additional_information": "for instance, diagnosed developmental disorders or vision or hearing problems",
"additional_information": "For instance, diagnosed developmental disorders or vision or hearing problems",
}

widgets = {
Expand Down
3 changes: 3 additions & 0 deletions accounts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from model_utils import Choices
from multiselectfield import MultiSelectField

from accounts.queries import BitfieldQuerySet
from accounts.utils import build_org_group_name
from project.fields.datetime_aware_jsonfield import DateTimeAwareJSONField
from project.settings import EMAIL_FROM_ADDRESS
Expand Down Expand Up @@ -322,6 +323,8 @@ class Child(models.Model):
"accounts.User", related_name="children", related_query_name="children"
)

objects = BitfieldQuerySet.as_manager()

def __str__(self):
return f"<Child: {self.given_name}>"

Expand Down
304 changes: 304 additions & 0 deletions accounts/queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
"""Constructs for different kinds of queries and managers."""

import ast
import operator
from datetime import date
from functools import reduce
from itertools import chain

from django.db import models
from django.db.models import F, Q
from lark import Lark, Transformer, v_args

from studies.fields import CONDITIONS, LANGUAGES


CONST_MAPPING = {"true": True, "false": False, "null": None}

GENDER_MAPPING = {"male": "m", "female": "f", "other": "o"}

CONDITION_FIELDS = {condition_tuple[0] for condition_tuple in CONDITIONS}

LANGUAGE_FIELDS = {f"speaks_{language_tuple[0]}" for language_tuple in LANGUAGES}

QUERY_GRAMMAR = """
?start: bool_expr
?bool_expr: bool_term ("OR" bool_term)*
?bool_term: bool_factor ("AND" bool_factor)*
?bool_factor: not_bool_factor
| "(" bool_expr ")"
| relation_expr
?relation_expr: gender_comparison
| gestational_age_comparison
| age_in_days_comparison
| language_comparison
| condition_comparison
| language_count_comparison
not_bool_factor: "NOT" bool_factor
gender_comparison: "gender" (EQ | NE) gender_target
// 24 to 40 weeks
gestational_age_comparison: "gestational_age_in_weeks" comparator GESTATIONAL_AGE_AS_WEEKS
age_in_days_comparison: "age_in_days" comparator INT
language_count_comparison: ("n_languages" | "num_languages") comparator INT
comparator: EQ | NE | LT | LTE | GT | GTE
gender_target: MALE | FEMALE | OTHER_GENDER | UNSPECIFIED_GENDER
language_comparison: LANGUAGE_TARGET
condition_comparison: CONDITION_TARGET
// TERMINALS
LANGUAGE_TARGET: {language_targets}
CONDITION_TARGET: {condition_targets}
GESTATIONAL_AGE_AS_WEEKS: /(2[4-9]|3[0-9]|40)/
EQ: "="
NE: "!="
LT: "<"
LTE: "<="
GT: ">"
GTE: ">="
TRUE: "true"i
FALSE: "false"i
NULL: "null"i
MALE: "male"i | "m"i
FEMALE: "female"i | "f"i
OTHER_GENDER: "other"i | "o"i
UNSPECIFIED_GENDER: "na"i
%import common.INT
%import common.WS
%ignore WS
""".format(
language_targets=" | ".join([f'"{target}"' for target in LANGUAGE_FIELDS]),
condition_targets=" | ".join([f'"{target}"' for target in CONDITION_FIELDS]),
)

QUERY_DSL_PARSER = Lark(QUERY_GRAMMAR, parser="earley")


def get_child_eligibility_for_study(child_obj, study_obj):
return get_child_eligibility(child_obj, study_obj.criteria_expression)


def get_child_eligibility(child_obj, criteria_expr):
if criteria_expr:
compiled_tester_func = compile_expression(criteria_expr)

expanded_child = _get_expanded_child(child_obj)

return bool(compiled_tester_func(expanded_child))
else:
return True


def compile_expression(boolean_algebra_expression: str):
"""Compiles a boolean algebra expression into a python function.
Args:
boolean_algebra_expression: a string boolean algebra expression.
Returns:
A function.
Raises:
lark.exceptions.ParseError: in case we cannot parse the boolean algebra.
"""
parse_tree = QUERY_DSL_PARSER.parse(boolean_algebra_expression)

func_body = FunctionTransformer().transform(parse_tree)

func_text = " ".join(["def property_tester(child_obj): return", func_body])

code_object = ast.parse(func_text, mode="exec")

new_func = compile(code_object, filename="temp.py", mode="exec")

temp_namespace = {}

exec(new_func, temp_namespace)

return temp_namespace["property_tester"]


def _get_expanded_child(child_object):
"""Expands a child object such that it can be evaluated easily.
The output of this method should be such that _compile_expression
can evaluate it; i.e. all keys are first-level.
Args:
child_object: a accounts.models.Child instance.
Returns:
A dict representing the child.
"""
expanded_child = _to_dict(child_object)

# 1) Change birthday to age in days.
age_delta = date.today() - expanded_child.pop("birthday")
expanded_child["age_in_days"] = age_delta.days

# 2) Expand existing conditions in-place.
expanded_conditions = dict(expanded_child.pop("existing_conditions").items())
expanded_child.update(expanded_conditions)

# 3) Expand languages in place.
expanded_languages = {
f"speaks_{langcode}": boolean
for langcode, boolean in expanded_child.pop("languages_spoken").items()
}
expanded_child.update(expanded_languages)

ga_enum = expanded_child.pop("gestational_age_at_birth")

gestational_age_in_weeks = _gestational_age_enum_value_to_weeks(ga_enum)

expanded_child["gestational_age_in_weeks"] = gestational_age_in_weeks

return expanded_child


def _to_dict(model_instance):
"""Better version of django.forms.models.model_to_dict.
Args:
model_instance: A django model instance.
Returns:
A dictionary formed from a model instance.
"""
opts = model_instance._meta
data = {}
for f in chain(opts.concrete_fields, opts.private_fields):
data[f.name] = f.value_from_object(model_instance)
return data


def _gestational_age_enum_value_to_weeks(enum_value: int):
"""Convert enum value on child object to actual # of weeks.
This enables us to directly query the expanded child object with a
scalar value. 0 == "under 24 weeks"; 17 = "Over 40 weeks". To see
enumerated values, please reference studies/fields.py.
"""
return min(max(23, enum_value + 23), 40) if enum_value else None


@v_args(inline=True)
class FunctionTransformer(Transformer):
def bool_expr(self, bool_term, *others):
or_clauses = " ".join(f"or {other}" for other in others)
return f"({bool_term} {or_clauses})"

def bool_term(self, bool_factor, *others):
and_clauses = " ".join(f"and {other}" for other in others)
return f"({bool_factor} {and_clauses})"

def gender_comparison(self, comparator, target_gender):
return f"child_obj.get('gender') {'==' if comparator == '=' else comparator} {target_gender}"

def gestational_age_comparison(self, comparator, num_weeks):
return f"child_obj.get('gestational_age_in_weeks') {comparator} {num_weeks}"

def age_in_days_comparison(self, comparator, num_days):
return f"child_obj.get('age_in_days') {comparator} {num_days}"

def language_comparison(self, lang_target):
return f"child_obj.get('{lang_target}', False)"

def condition_comparison(self, condition_target):
return f"child_obj.get('{condition_target}', False)"

def language_count_comparison(self, comparator, num_langs):
return (
f"len({{k: v for k, v in child_obj.items() if k.startswith('speaks_') and v}}) "
f"{comparator} {num_langs}"
)

def gender_target(self, gender):
gender = gender.lower()
return f"'{GENDER_MAPPING.get(gender, gender)}'"

def comparator(self, relation):
return "==" if relation == "=" else relation

def not_bool_factor(self, bool_factor):
return f"not {bool_factor}"


class BitfieldQuerySet(models.QuerySet):
"""A QuerySet that can handle bitwise queries intelligently.
The trick is in constructs like this: F(field_name) +
F(field_name).bitand(reduce(operator.or_, bitmasks, 0)) which might
produce a SQL query like so:
WHERE ...
"accounts_child"."existing_conditions" <
(("accounts_child"."existing_conditions" + (1 * ("accounts_child"."existing_conditions" & 12))))
This is a "bit hack" that relies on the fact that a bit state ANDed with a mask will give us a result that
is greater than zero if ~any~ of the bits match between the mask and the state. So what we are saying is,
"give me rows from this table where my_field is less than my_field + (my_field AND some_mask). This will only
ever be true if there are matching set bits between my_field and some_mask.
For has_one_of, we take all the bits we care about and OR them into a single mask (e.g., 01101)
For has_all_of, we split the individual bits we care about (e.g. 01000, 00100, 00001 - only powers of 2 in decimal)
and split them across AND filters in the where clause of our SQL query.
"""

def has_one_of(self, field_name: str, bitmasks: list):
"""Check to see that field_name has at least one of the bits in
bitmasks.
Args:
field_name: The field which we will be querying against - usually a BigInt
bitmasks: the list of integers which will serve as bitmasks
Returns:
A filtered queryset.
"""
filter_dict = {
f"{field_name}__gt": 0,
# field value contains one of supplied field bits
f"{field_name}__lt": F(field_name)
+ F(field_name).bitand(reduce(operator.or_, bitmasks, 0)),
}

return self.filter(**filter_dict)

def has_all_of(self, field_name: str, bitmasks: list):
"""Check to see that field_name has all of the bits in bitmasks.
Args:
field_name: The field which we will be querying against - usually a BigInt
bitmasks: the list of integers which will serve as bitmasks
Returns:
A filtered queryset.
"""

def make_query_dict(specific_mask):
return {
f"{field_name}__lt": F(field_name) + F(field_name).bitand(specific_mask)
}

has_each = map(lambda c: Q(**make_query_dict(c)), bitmasks)

filter_query = reduce(operator.and_, has_each, Q(**{f"{field_name}__gt": 0}))

return self.filter(filter_query)
Loading

0 comments on commit 046c82a

Please sign in to comment.