Skip to content

Commit

Permalink
Merge pull request #19 from tomcounsell/query-values
Browse files Browse the repository at this point in the history
query for specific values. if all are key fields, performance increases greatly
  • Loading branch information
tomcounsell committed Jun 4, 2022
2 parents c6ba7e6 + 0fbe40b commit ce255fb
Show file tree
Hide file tree
Showing 13 changed files with 191 additions and 58 deletions.
34 changes: 31 additions & 3 deletions docs/query.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,33 @@ Animal.query.filter(name__startswith="S")[0].name
>>> "salamander"
```

## Values

## Order By {field_name}
Returns dictionaries, rather than model instances. Each of those dictionaries represents an object, with the keys corresponding to the attribute names of model objects.
Specify the fields with a tuple of field names. Each dictionary will contain only the field keys/values for the fields you specify.

``` python
Animal.query.filter(values=("name", "color"))
>>> [{"name": "salamander", "color" "green"}, ...]
```

Pro Tip: If _all_ the fields specified are _Key_ fields, then query performance will be at least 2x faster compared to a query without any specified values.

Results are ordered by the value of a given field. Ascending order is implied.

## Order By field_name

Results are ordered by the value of a given field.

``` python
Movies.query.filter(order_by="-release_date")
Movies.query.filter(order_by="name")
```

the above will return movies ordered by name alphabetically
The negative sign in front of "-release_date" indicates descending order. Ascending order is implied.
The second query will return movies ordered by name alphabetically.
ordering works for field types: `str`, `int`, `float`, `decimal`, `time`, `date`, `datetime`


## Limit Number of Results

returns first 100 objects
Expand All @@ -77,6 +92,19 @@ both are valid and will return the same list of objects.
if order_by is used, it will order before


## Values

Returns dictionaries, rather than model instances.
Each of those dictionaries represents an object, with the keys corresponding to the attribute names of model objects.
values requires a tuple of field names
example:

``` python
Movies.query.filter(name="Life Of Pi", values=("name",))
>>> [{"name": "Life Of Pi"}, ]
```


## KeyField query filters

`{field_name}=`: exact match
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = popoto
version = 0.7.4
version = 0.8.0
author = Tom Counsell
author_email = other@tomcounsell.com
description = A Python Redis ORM
Expand Down
4 changes: 2 additions & 2 deletions src/popoto/fields/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ def format_value_pre_save(self, field_value):
return field_value

@classmethod
def get_special_use_field_db_key(cls, model: 'Model', field_name: str) -> DB_key:
def get_special_use_field_db_key(cls, model: 'Model', *field_names) -> DB_key:
"""
For use by child class when implementing additional Redis data structures
Children implementing more than one new structure will need to augment this.
"""
return DB_key(cls.field_class_key, model._meta.db_class_key, field_name)
return DB_key(cls.field_class_key, model._meta.db_class_key, *field_names)


@classmethod
Expand Down
5 changes: 5 additions & 0 deletions src/popoto/fields/shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .key_field_mixin import KeyFieldMixin
from .auto_field_mixin import AutoFieldMixin
from .sorted_field_mixin import SortedFieldMixin
from ..exceptions import ModelException


class IntField(Field):
Expand Down Expand Up @@ -94,7 +95,11 @@ def __init__(self, *args, **kwargs):

class UniqueKeyField(KeyField):
def __init__(self, *args, **kwargs):
if kwargs.get('unique') is False:
raise ModelException(f"you may not set unique=False on this field type")
kwargs['unique'] = True
if kwargs.get('null') is True:
raise ModelException(f"you may not set null=True on this field type")
kwargs['null'] = False
super().__init__(**kwargs)

Expand Down
30 changes: 19 additions & 11 deletions src/popoto/fields/sorted_field_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def convert_to_numeric(cls, field, field_value):
raise ValueError("SortedField received non-numeric value.")

@classmethod
def get_sortedset_db_key(cls, model, field_name) -> DB_key:
return cls.get_special_use_field_db_key(model, field_name)
def get_sortedset_db_key(cls, model, field_name, *partition_field_names) -> DB_key:
return cls.get_special_use_field_db_key(model, field_name, *partition_field_names)

@classmethod
def get_partitioned_sortedset_db_key(cls, model_instance, field_name) -> DB_key:
Expand All @@ -109,7 +109,8 @@ def get_partitioned_sortedset_db_key(cls, model_instance, field_name) -> DB_key:
try:
sortedset_db_key.append(str(getattr(model_instance, partition_field_name)))
except KeyError:
raise QueryException(f"{field_name} filter requires partition_on field values")
raise QueryException(f"{field_name} field is partitioned. "
f"Queries must also contain a filter for the partitioned fields")
return sortedset_db_key

@classmethod
Expand Down Expand Up @@ -159,14 +160,21 @@ def filter_query(cls, model_class: 'Model', field_name: str, **query_params) ->
else:
raise QueryException(f"Query filters provided are not compatible with this field {field_name}")

sortedset_db_key = cls.get_sortedset_db_key(model_class, field_name)

# use field names and query values partition_on fields to extend sortedset_db_key
for field_name in model_class._meta.fields[field_name].partition_on:
try:
sortedset_db_key += f":{str(query_params[field_name])}"
except KeyError:
raise QueryException(f"{field_name} filter requires partition_on field values")
try:
# use field names and query values partition_on fields to extend sortedset_db_key
sortedset_db_key = cls.get_sortedset_db_key(
model_class,
field_name,
*[
str(query_params[partition_field_name])
for partition_field_name in model_class._meta.fields[field_name].partition_on
]
)
except KeyError:
raise QueryException(
f"{field_name} field is partitioned on {', '.join(model_class._meta.fields[field_name].partition_on)}. "
f"Query filter must also specify a value for {', '.join(model_class._meta.fields[field_name].partition_on)}"
)

redis_db_keys_list = POPOTO_REDIS_DB.zrangebyscore(
sortedset_db_key.redis_key, value_range['min'], value_range['max']
Expand Down
6 changes: 4 additions & 2 deletions src/popoto/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def __init__(self, model_name):
self.base_meta = None

def add_field(self, field_name: str, field: Field):
if field_name in ['limit', 'order_by', 'values']:
if not field_name[0] == "_" and not field_name[0].islower():
raise ModelException(f"{field_name} field name must start with a lowercase letter.")
elif field_name in ['limit', 'order_by', 'values']:
raise ModelException(f"{field_name} is a reserved field name. "
f"See https://popoto.readthedocs.io/en/latest/fields/#reserved-field-names")
elif field_name.startswith("_") and field_name not in self.hidden_fields:
Expand Down Expand Up @@ -229,7 +231,7 @@ def db_key(self) -> DB_key:
OR both
"""
return DB_key(self._meta.db_class_key, [
getattr(self, key_field_name) or "None"
str(getattr(self, key_field_name, "None"))
for key_field_name in sorted(self._meta.key_field_names)
])

Expand Down
21 changes: 17 additions & 4 deletions src/popoto/models/db_key.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Iterable

from ..redis_db import POPOTO_REDIS_DB
from ..redis_db import POPOTO_REDIS_DB, ENCODING


class DB_key(list):
Expand All @@ -15,12 +15,25 @@ def flatten(yet_flat):
super().__init__(flatten(key_partials))

@classmethod
def clean(cls, value: str, ignore_colons: bool = False) -> str:
def from_redis_key(cls, redis_key):
if isinstance(redis_key, bytes):
redis_key = redis_key.decode(ENCODING)
return cls([DB_key.unclean(partial) for partial in redis_key.split(":")])

@classmethod
def clean(cls, value: str) -> str:
value = value.replace('/', '//')
for char in "'?*^[]-":
value = value.replace(char, f"/{char}")
if not ignore_colons:
value = value.replace(':', '_')
value = value.replace(':', '{:}')
return value

@classmethod
def unclean(cls, value: str) -> str:
value = value.replace('{:}', ':')
for char in "'?*^[]-":
value = value.replace(f"/{char}", char)
value = value.replace('//', '/',)
return value

def __str__(self):
Expand Down
14 changes: 8 additions & 6 deletions src/popoto/models/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ def encode_popoto_model_obj(obj: 'Model') -> dict:
return encoded_hashmap


def decode_popoto_model_hashmap(model_class: 'Model', redis_hash: dict) -> 'Model':
def decode_popoto_model_hashmap(model_class: 'Model', redis_hash: dict, fields_only=False) -> 'Model':
if len(redis_hash):
return model_class(**{
key_b.decode(ENCODING): decode_custom_types(msgpack.unpackb(value_b))
model_attrs = {
key_b.decode(ENCODING) if not fields_only else key_b:
decode_custom_types(msgpack.unpackb(value_b))
for key_b, value_b in redis_hash.items()
})
else:
return None
}
return model_attrs if fields_only else model_class(**model_attrs)

return None
84 changes: 68 additions & 16 deletions src/popoto/models/query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from .db_key import DB_key
from ..redis_db import POPOTO_REDIS_DB
from ..redis_db import POPOTO_REDIS_DB, ENCODING

logger = logging.getLogger('POPOTO.Query')

Expand Down Expand Up @@ -63,7 +63,8 @@ def keys(self, catchall=False, clean=False, **kwargs) -> list:
def all(self, **kwargs) -> list:
redis_db_keys_list = self.keys()
return self.prepare_results(
Query.get_many_objects(self.model_class, set(redis_db_keys_list)), **kwargs
Query.get_many_objects(self.model_class, set(redis_db_keys_list), values=kwargs.get('values', None)),
**kwargs
)

def filter_for_keys_set(self, **kwargs) -> set:
Expand Down Expand Up @@ -107,24 +108,37 @@ def filter(self, **kwargs) -> list:
Run query using the given paramters
return a list of model_class objects
"""
limit: int = int(kwargs.pop('limit')) if 'limit' in kwargs else None
order_by_attr_name: str = str(kwargs.pop('order_by')) if 'order_by' in kwargs else None
# values_attr_names: tuple = kwargs.pop('values') if 'values' in kwargs else None

db_keys_set = self.filter_for_keys_set(**kwargs)
if not len(db_keys_set):
return []

return self.prepare_results(
Query.get_many_objects(self.model_class, db_keys_set, limit=limit), **kwargs
Query.get_many_objects(
self.model_class, db_keys_set,
order_by_attr_name=kwargs.get('order_by', None),
limit=kwargs.get('limit', None),
values=kwargs.get('values', None),
),
**kwargs
)

def prepare_results(self, objects, order_by: str = "", values: tuple = (), limit: int = None, **kwargs):
reverse_order = False
if order_by and order_by.startswith("-"):
reverse_order = True
order_by = order_by[1:]
if order_by:
order_by_attr_name = order_by
if (not isinstance(order_by_attr_name, str)) or order_by_attr_name not in self.model_class._meta.fields:
raise QueryException(f"order_by={order_by_attr_name} must be a field name (str)")
attr_type = self.model_class._meta.fields[order_by_attr_name].type
objects.sort(key=lambda item: getattr(item, order_by_attr_name) or attr_type())
if values and order_by_attr_name not in values:
raise QueryException("field must be included in values=(fieldnames) in order to use order_by")
elif values:
objects.sort(key=lambda item: item.get(order_by_attr_name))
else:
objects.sort(key=lambda item: getattr(item, order_by_attr_name) or attr_type())
objects = list(reversed(objects))[:limit] if reverse_order else objects[:limit]

if limit and len(objects) > limit:
objects = objects[:limit]
Expand All @@ -137,17 +151,55 @@ def count(self, **kwargs) -> int:
return len(self.filter_for_keys_set(**kwargs)) # maybe possible to refactor to use redis.SINTERCARD

@classmethod
def get_many_objects(cls, model: 'Model', db_keys: set, order_by_attr_name: str = "", limit: int = None) -> list:
def get_many_objects(cls, model: 'Model', db_keys: set,
order_by_attr_name: str = "", limit: int = None, values: tuple = None) -> list:
from .encoding import decode_popoto_model_hashmap
pipeline = POPOTO_REDIS_DB.pipeline()
for db_key in db_keys:
pipeline.hgetall(db_key)
hashes_list = pipeline.execute()
reverse_order = False
# order the hashes list or objects before applying limit
if order_by_attr_name and order_by_attr_name.startswith("-"):
order_by_attr_name = order_by_attr_name[1:]
reverse_order = True

if order_by_attr_name and order_by_attr_name in model._meta.key_field_names:
field_position = model._meta.get_db_key_index_position(order_by_attr_name)
db_keys = list(db_keys)
db_keys.sort(key=lambda key: key.split(b":")[field_position])
db_keys = list(reversed(db_keys))[:limit] if reverse_order else db_keys[:limit]

if values:
if not isinstance(values, tuple):
raise QueryException("values takes a tuple. eg. query.filter(values=('name',))")
elif set(values).issubset(model._meta.key_field_names):
db_keys = [DB_key.from_redis_key(db_key) for db_key in db_keys]
return [
{
field_name: model._meta.fields[field_name].type(
db_key[model._meta.get_db_key_index_position(field_name)]
) if db_key[model._meta.get_db_key_index_position(field_name)] else None
for field_name in values
}
for db_key in db_keys
]
else:
[pipeline.hmget(db_key, values) for db_key in db_keys]
value_lists = pipeline.execute()
hashes_list = [
{
field_name: result[i]
for i, field_name in enumerate(values)
}
for result in value_lists
]

else:
[pipeline.hgetall(db_key) for db_key in db_keys]
hashes_list = pipeline.execute()

if {} in hashes_list:
logger.error("one or more redis keys points to missing objects")
logger.error("one or more redis keys points to missing objects. Debug with Model.query.keys(clean=True")

# todo: order the hashes list or objects before applying limit
return [
decode_popoto_model_hashmap(model, redis_hash)
for redis_hash in hashes_list[:limit] if redis_hash
decode_popoto_model_hashmap(model, redis_hash, fields_only=bool(values))
for redis_hash in hashes_list if redis_hash
]
12 changes: 12 additions & 0 deletions tests/all_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from .test_common_models import *
from .test_field_types import *
from .test_geofield import *
from .test_key_fields import *
from .test_kitchen_sink import *
from .test_model_errors import *
from .test_pubsub import *
from .test_queries import *
from .test_query_results import *
from .test_relationship import *
from .test_sortedfield import *
from .test_timeseries import *
2 changes: 0 additions & 2 deletions tests/test_model_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))

from src.popoto.redis_db import POPOTO_REDIS_DB
from src import popoto

try:
Expand All @@ -16,7 +15,6 @@ class KeyValueModel(popoto.Model):

KeyValueModel()
except ModelException as e:
print(e)
assert "null" in str(e)
else:
raise Exception("expected null error on AutoKeyField")

0 comments on commit ce255fb

Please sign in to comment.