Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backend text filtering #879

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 211 additions & 49 deletions db/filters/base.py

Large diffs are not rendered by default.

45 changes: 21 additions & 24 deletions db/filters/operations/deserialize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any
from db.filters.base import (
Predicate, Leaf, SingleParameter, MultiParameter, NoParameter, Branch, get_predicate_subclass_by_type_str, BadFilterFormat
Predicate, Leaf, SingleParameter, MultiParameter, NoParameter, Branch, get_predicate_subclass_by_id_str, BadFilterFormat
)


Expand All @@ -9,28 +9,25 @@ def get_first_dict_key(dict: dict) -> Any:
return next(iter(dict))
if not isinstance(spec, dict):
raise BadFilterFormat("Parsing of Mathesar filter specification failed.")
predicate_type_str = get_first_dict_key(spec)
predicate_subclass = get_predicate_subclass_by_type_str(predicate_type_str)
predicate_body = spec[predicate_type_str]
if issubclass(predicate_subclass, Leaf):
columnName = predicate_body['column']
if issubclass(predicate_subclass, SingleParameter):
return predicate_subclass(column=columnName, parameter=predicate_body['parameter'])
elif issubclass(predicate_subclass, MultiParameter):
return predicate_subclass(column=columnName, parameters=predicate_body['parameters'])
elif issubclass(predicate_subclass, NoParameter):
return predicate_subclass(column=columnName)
predicate_id_str = get_first_dict_key(spec)
try:
predicate_subclass = get_predicate_subclass_by_id_str(predicate_id_str)
predicate_body = spec[predicate_id_str]
if issubclass(predicate_subclass, Leaf):
return predicate_subclass(**predicate_body)
elif issubclass(predicate_subclass, Branch):
if issubclass(predicate_subclass, SingleParameter):
parameter_predicate = get_predicate_from_MA_filter_spec(predicate_body)
return predicate_subclass(parameter=parameter_predicate)
elif issubclass(predicate_subclass, MultiParameter):
parameter_predicates = \
[get_predicate_from_MA_filter_spec(parameter) for parameter in predicate_body]
return predicate_subclass(parameters=parameter_predicates)
else:
raise Exception("This should never happen.")
else:
raise Exception("This should never happen.")
elif issubclass(predicate_subclass, Branch):
if issubclass(predicate_subclass, SingleParameter):
parameter_predicate = get_predicate_from_MA_filter_spec(predicate_body)
return predicate_subclass(parameter=parameter_predicate)
elif issubclass(predicate_subclass, MultiParameter):
parameter_predicates = \
[get_predicate_from_MA_filter_spec(parameter) for parameter in predicate_body]
return predicate_subclass(parameters=parameter_predicates)
else:
raise Exception("This should never happen.")
else:
raise Exception("This should never happen.")
except (TypeError, KeyError) as e:
# Raised when the objects in the spec don't have the right fields (e.g. column or parameter).
raise BadFilterFormat from e

24 changes: 10 additions & 14 deletions db/filters/operations/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,17 @@

def get_SA_filter_spec_from_predicate(pred: Predicate) -> dict:
if isinstance(pred, Leaf):
if isinstance(pred, SingleParameter):
return {'field': pred.column, 'op': pred.saId(), 'value': pred.parameter}
elif isinstance(pred, MultiParameter):
return {'field': pred.column, 'op': pred.saId(), 'value': pred.parameters}
elif isinstance(pred, NoParameter):
return {'field': pred.column, 'op': pred.saId()}
else:
raise Exception("This should never happen.")
sa_spec = {'field': pred.column, 'op': pred.saId}
sa_parameter = pred.sa_parameter
if sa_parameter:
sa_spec['value'] = sa_parameter
return sa_spec
elif isinstance(pred, Branch):
if isinstance(pred, SingleParameter):
subject = get_SA_filter_spec_from_predicate(pred.parameter)
return {pred.saId(): [subject]}
elif isinstance(pred, MultiParameter):
subjects = [get_SA_filter_spec_from_predicate(subject) for subject in pred.parameters]
return {pred.saId(): subjects}
sa_parameter = pred.sa_parameter
# A branch predicate will always be parametrized (with another predicate)
if sa_parameter is not None:
sa_parameters = [get_SA_filter_spec_from_predicate(sub_pred) for sub_pred in sa_parameter]
return {pred.saId: sa_parameters}
else:
raise Exception("This should never happen.")
else:
Expand Down
55 changes: 39 additions & 16 deletions db/tests/filters/operations/test_serialize.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,46 @@
from db.filters.operations.serialize import get_SA_filter_spec_from_predicate
from db.filters.base import And, Or, Not, Equal, Empty, In
import pytest

from db.filters.operations.serialize import get_SA_filter_spec_from_predicate
from db.filters.base import And, Or, Not, Equal, Empty, In, StartsWith, EndsWith, Contains, EmailDomainEquals

def test_serialization():
predicate = And([
valid_cases = [
[
And([
Or([
In(column="col3", parameters=["value31", "value32"]),
Equal(column="col2", parameter="value2"),
]),
Not(
Empty(column="col1")
),
]),
{'and': [
{'or': [
{'field': 'col3', 'op': 'in', 'value': ['value31', 'value32']},
{'field': 'col2', 'op': 'eq', 'value': 'value2'}
]},
{'not': [{'field': 'col1', 'op': 'is_null'}]}
]}
],
[
# NOTE: escaping of _, % and \ is tested too:
Or([
In(column="col3", parameters=["value31", "value32"]),
Equal(column="col2", parameter="value2"),
StartsWith(column="col1", parameter="start_"),
EndsWith(column="col1", parameter="end%"),
# NOTE: below line tests case sensitivity setting
Contains(column="col1", parameter="contained\\", case_sensitive=False),
EmailDomainEquals(column="col1", parameter="domain.com"),
]),
Not(
Empty(column="col1")
),
])
expected_SA_filter_spec = {'and': [
{'or': [
{'field': 'col3', 'op': 'in', 'value': ['value31', 'value32']},
{'field': 'col2', 'op': 'eq', 'value': 'value2'}
]},
{'not': [{'field': 'col1', 'op': 'is_null'}]}
]}
{'field': 'col1', 'op': 'like', 'value': 'start\\_%'},
{'field': 'col1', 'op': 'like', 'value': '%end\\%'},
{'field': 'col1', 'op': 'ilike', 'value': '%contained\\\\%'},
{'field': 'col1', 'op': 'ilike', 'value': '%@domain.com'},
]}
],
]

@pytest.mark.parametrize("predicate, expected_SA_filter_spec", valid_cases)
def test_serialization(predicate, expected_SA_filter_spec):
sa_filter_spec = get_SA_filter_spec_from_predicate(predicate)
assert sa_filter_spec == expected_SA_filter_spec
11 changes: 9 additions & 2 deletions db/tests/filters/test_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from db.filters.base import (
all_predicates, Leaf, Branch, MultiParameter, SingleParameter, NoParameter, Empty, BadFilterFormat
all_predicates, Leaf, Branch, MultiParameter, SingleParameter, NoParameter, Empty, BadFilterFormat, ReliesOnLike
)


Expand Down Expand Up @@ -37,6 +37,10 @@ def instantiate_subclass(subclass, column=None, parameter=None):
'single': {
'valid': [1],
'invalid': [None, [], someLeafPredicates[0]],
'based_on_like': {
'valid': ["abc"],
'invalid': [None, [], someLeafPredicates[0], 1],
},
},
},
'branch': {
Expand All @@ -58,7 +62,10 @@ def get_spec_params(predicate_subclass, valid):
if issubclass(predicate_subclass, MultiParameter):
return parametersSpec['leaf']['multi'][validityKey]
elif issubclass(predicate_subclass, SingleParameter):
return parametersSpec['leaf']['single'][validityKey]
if issubclass(predicate_subclass, ReliesOnLike):
return parametersSpec['leaf']['single']['based_on_like'][validityKey]
else:
return parametersSpec['leaf']['single'][validityKey]
elif issubclass(predicate_subclass, Branch):
if issubclass(predicate_subclass, MultiParameter):
return parametersSpec['branch']['multi'][validityKey]
Expand Down
18 changes: 14 additions & 4 deletions db/tests/records/operations/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlalchemy_filters.exceptions import BadFilterFormat, FilterFieldNotFound

from db.records.operations.select import get_records
from db.filters.base import MultiParameter, SingleParameter, get_predicate_subclass_by_type_str
from db.filters.base import MultiParameter, SingleParameter, get_predicate_subclass_by_id_str


def test_get_records_filters_using_col_str_names(roster_table_obj):
Expand All @@ -26,6 +26,7 @@ def test_get_records_filters_using_col_str_names(roster_table_obj):
)


# TODO no: remove these tests
@pytest.mark.skip(reason="should this be implemented?")
def test_get_records_filters_using_col_objects(roster_table_obj):
roster, engine = roster_table_obj
Expand Down Expand Up @@ -98,13 +99,16 @@ def _ilike(x, v):
# "like": _like,
# "ilike": _ilike,
# "not_ilike": lambda x, v: not _ilike(x, v),
"starts_with": lambda x, v: x.startswith(v),
"ends_with": lambda x, v: x.endswith(v),
"contains": lambda x, v: x.find(v) != -1,
"in": lambda x, v: x in v,
"not_in": lambda x, v: x not in v,
# "any": lambda x, v: v in x,
# "not_any": lambda x, v: v not in x,
"and": lambda x: all(x),
"or": lambda x: any(x),
"not": lambda x: not x[0]
"not": lambda x: not x[0],
}


Expand Down Expand Up @@ -147,8 +151,14 @@ def _ilike(x, v):
("date", "lesser_or_equal", "2099-01-01", 100),
# like
# ("varchar", "like", "%1", 10),
# ends_with
("varchar", "ends_with", "1", 10),
# ilike
# ("varchar", "ilike", "STRING1%", 12),
# starts_with
("varchar", "starts_with", "string1", 12),
# contains
("varchar", "contains", "g1", 12),
# not_ilike
# ("varchar", "not_ilike", "STRING1%", 88),
# in
Expand All @@ -169,7 +179,7 @@ def test_get_records_filters_ops(
filter_sort_table_obj, column, predicate_id, parameter, res_len
):
filter_sort, engine = filter_sort_table_obj
predicate = get_predicate_subclass_by_type_str(predicate_id)
predicate = get_predicate_subclass_by_id_str(predicate_id)
if issubclass(predicate, MultiParameter):
filters = {predicate_id: {"column": column, "parameters": parameter}}
elif issubclass(predicate, SingleParameter):
Expand Down Expand Up @@ -236,7 +246,7 @@ def test_get_records_filters_boolean_ops(
):
filter_sort, engine = filter_sort_table_obj

predicate = get_predicate_subclass_by_type_str(op)
predicate = get_predicate_subclass_by_id_str(op)
if issubclass(predicate, SingleParameter):
filters = {op: [
{"equal": {"column": column, "parameter": parameter}}
Expand Down
44 changes: 40 additions & 4 deletions mathesar/api/filters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from dataclasses import fields as dataclass_fields
from typing import Optional, Type, List

from django_filters import BooleanFilter, DateTimeFromToRangeFilter, OrderingFilter
from django_property_filter import PropertyFilterSet, PropertyBaseInFilter, PropertyCharFilter, PropertyOrderingFilter

from db.filters.base import all_predicates
from db.filters.base import all_predicates, Predicate, ReliesOnLike

from mathesar.database.types import is_ma_type_supported_by_predicate
from mathesar.models import Schema, Table, Database
Expand All @@ -16,19 +19,52 @@ def get_filter_options_for_database(database):
]
return [
{
"identifier": predicate.type.value,
"identifier": predicate.id.value,
"name": predicate.name,
"position": predicate.super_type.value,
"position": predicate.position.value,
"parameter_count": predicate.parameter_count.value,
"ma_types": [
ma_type.value
for ma_type in supported_ma_types
if is_ma_type_supported_by_predicate(ma_type, predicate)
]
],
"settings": _get_settings_for_predicate(predicate),
} for predicate in all_predicates
]


def _get_type_name(type) -> str:
return type.__name__


def _get_human_name_for_field(field) -> str:
return field.name.replace("_", " ").capitalize()


def _get_settings_for_predicate(predicate_class: Type[Predicate]) -> Optional[List[dict]]:
"""
Returns optional settings applicable to predicate. At the moment this is an adhoc
implementation: notice that instead of hardcoding, as done below, this can be automated
to find any non-standard fields and describe their defaults and types.
"""
if issubclass(predicate_class, ReliesOnLike):
case_sensitive_field = tuple(
field
for field in dataclass_fields(predicate_class)
if field.name == "case_sensitive"
)[0]
return [
{
"identifier": case_sensitive_field.name,
"name": _get_human_name_for_field(case_sensitive_field),
"default": case_sensitive_field.default,
"python_type": _get_type_name(case_sensitive_field.type),
}
]
else:
return None


class CharInFilter(PropertyBaseInFilter, PropertyCharFilter):
pass

Expand Down
1 change: 1 addition & 0 deletions mathesar/api/serializers/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ class FilterSerializer(serializers.Serializer):
position = serializers.CharField()
parameter_count = serializers.CharField()
ma_types = serializers.ListField(child=serializers.CharField())
settings = serializers.ListField(child=serializers.DictField(), allow_null=True)
21 changes: 20 additions & 1 deletion mathesar/database/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from enum import Enum

from db.types.base import PostgresType, MathesarCustomType, get_available_types, get_qualified_name, get_db_type_name
from db.filters.base import Predicate, relies_on_comparability
from db.filters.base import Predicate, relies_on_comparability, relies_on_like, applies_only_to_email

from typing import Type

Expand Down Expand Up @@ -37,9 +37,28 @@ def _is_ma_type_comparable(ma_type: MathesarTypeIdentifier) -> bool:
return ma_type in comparable_mathesar_types


supported_by_like_mathesar_types = {
MathesarTypeIdentifier.EMAIL,
MathesarTypeIdentifier.TEXT,
MathesarTypeIdentifier.URI,
}


def _is_ma_type_supported_by_like(ma_type: MathesarTypeIdentifier) -> bool:
return ma_type in supported_by_like_mathesar_types


def _is_ma_type_an_email_string(ma_type: MathesarTypeIdentifier) -> bool:
return ma_type is MathesarTypeIdentifier.EMAIL


def is_ma_type_supported_by_predicate(ma_type: MathesarTypeIdentifier, predicate: Type[Predicate]):
if relies_on_comparability(predicate):
return _is_ma_type_comparable(ma_type)
elif applies_only_to_email(predicate):
return _is_ma_type_an_email_string(ma_type)
elif relies_on_like(predicate):
return _is_ma_type_supported_by_like(ma_type)
else:
return True

Expand Down