diff --git a/docs/source/analysis_objects.rst b/docs/source/analysis_objects.rst index bb2adcfc5..52a9d33f3 100644 --- a/docs/source/analysis_objects.rst +++ b/docs/source/analysis_objects.rst @@ -27,18 +27,5 @@ your app can always be verified. These hashes exist on the following attributes: - ``configuration_values_hash`` - ``configuration_manifest_hash`` -If an input or configuration attribute is ``None``, so will its hash attribute be. For ``Manifests``, some metadata -about the ``Datafiles`` and ``Datasets`` within them, and about the ``Manifest`` itself, is included when calculating -the hash: - -- For a ``Datafile``, the content of its on-disk file is hashed, along with the following metadata: - - - ``name`` - - ``cluster`` - - ``sequence`` - - ``timestamp`` - - ``tags`` - -- For a ``Dataset``, the hashes of its ``Datafiles`` are included, along with its ``tags``. - -- For a ``Manifest``, the hashes of its ``Datasets`` are included, along with its ``keys``. +If a strand is ``None``, so will its corresponding hash attribute be. The hash of a datafile is the hash of +its file, while the hash of a manifest or dataset is the cumulative hash of the files it refers to. diff --git a/docs/source/child_services.rst b/docs/source/child_services.rst index 665a105de..4cf8bb3eb 100644 --- a/docs/source/child_services.rst +++ b/docs/source/child_services.rst @@ -104,13 +104,13 @@ The children field must also be present in the ``twine.json`` file: "key": "wind_speed", "purpose": "A service that returns the average wind speed for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:wind_speed" + "filters": "labels:wind_speed" }, { "key": "elevation", "purpose": "A service that returns the elevation for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:elevation" + "filters": "labels:elevation" } ], ... diff --git a/docs/source/cloud_storage.rst b/docs/source/cloud_storage.rst index 6f68bac75..367957c6d 100644 --- a/docs/source/cloud_storage.rst +++ b/docs/source/cloud_storage.rst @@ -12,7 +12,7 @@ in Octue SDK, please join the discussion `in this issue. , bucket_name=, path_in_bucket=, - metadata={"tags": ["blah", "glah", "jah"], "cleaned": True, "id": 3} + metadata={"id": 3, "labels": ["blah", "glah", "jah"], "cleaned": True, "colour": "blue"} ) storage_client.upload_from_string( string='[{"height": 99, "width": 72}, {"height": 12, "width": 103}]', bucket_name=, path_in_bucket=, - metadata={"tags": ["dimensions"], "cleaned": True, "id": 96} + metadata={"id": 96, "labels": ["dimensions"], "cleaned": True, "colour": "red", "size": "small"} ) **Downloading** @@ -61,7 +61,7 @@ to any of these methods. bucket_name=, path_in_bucket=, ) - >>> {"tags": ["dimensions"], "cleaned": True, "id": 96} + >>> {"id": 96, "labels": ["dimensions"], "cleaned": True, "colour": "red", "size": "small"} **Deleting** diff --git a/docs/source/datafile.rst b/docs/source/datafile.rst index 6971341ab..42a11880e 100644 --- a/docs/source/datafile.rst +++ b/docs/source/datafile.rst @@ -10,7 +10,8 @@ the following main attributes: - ``path`` - the path of this file, which may include folders or subfolders, within the dataset. - ``cluster`` - the integer cluster of files, within a dataset, to which this belongs (default 0) - ``sequence`` - a sequence number of this file within its cluster (if sequences are appropriate) -- ``tags`` - a space-separated string or iterable of tags relevant to this file +- ``tags`` - key-value pairs of metadata relevant to this file +- ``labels`` - a space-separated string or iterable of labels relevant to this file - ``timestamp`` - a posix timestamp associated with the file, in seconds since epoch, typically when it was created but could relate to a relevant time point for the data @@ -43,7 +44,7 @@ Example A bucket_name = "my-bucket", datafile_path = "path/to/data.csv" - with Datafile.from_cloud(project_name, bucket_name, datafile_path, mode="r") as datafile, f: + with Datafile.from_cloud(project_name, bucket_name, datafile_path, mode="r") as (datafile, f): data = f.read() new_metadata = metadata_calculating_function(data) @@ -51,6 +52,7 @@ Example A datafile.cluster = new_metadata["cluster"] datafile.sequence = new_metadata["sequence"] datafile.tags = new_metadata["tags"] + datafile.labels = new_metadata["labels"] Example B @@ -76,7 +78,8 @@ Example B datafile.timestamp = datetime.now() datafile.cluster = 0 datafile.sequence = 3 - datafile.tags = {"manufacturer:Vestas", "output:1MW"} + datafile.tags = {"manufacturer": "Vestas", "output": "1MW"} + datafile.labels = {"new"} datafile.to_cloud() # Or, datafile.update_cloud_metadata() @@ -122,10 +125,11 @@ For creating new data in a new local file: sequence = 2 - tags = {"cleaned:True", "type:linear"} + tags = {"cleaned": True, "type": "linear"} + labels = {"Vestas"} - with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, mode="w") as datafile, f: + with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels, mode="w") as (datafile, f): f.write("This is some cleaned data.") datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") @@ -139,7 +143,8 @@ For existing data in an existing local file: sequence = 2 - tags = {"cleaned:True", "type:linear"} + tags = {"cleaned": True, "type": "linear"} + labels = {"Vestas"} - datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags) + datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels) datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") diff --git a/docs/source/dataset.rst b/docs/source/dataset.rst index 79390a941..f41c979ea 100644 --- a/docs/source/dataset.rst +++ b/docs/source/dataset.rst @@ -8,9 +8,10 @@ A ``Dataset`` contains any number of ``Datafiles`` along with the following meta - ``name`` - ``tags`` +- ``labels`` The files are stored in a ``FilterSet``, meaning they can be easily filtered according to any attribute of the -:doc:`Datafile ` instances it contains. +:doc:`Datafile ` instances contained. -------------------------------- @@ -23,23 +24,26 @@ You can filter a ``Dataset``'s files as follows: dataset = Dataset( files=[ - Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", tags="one a:2 b:3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", tags="two a:2 b:3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/another_file.csv", tags="three all"), + Datafile(path="path-within-dataset/my_file.csv", labels=["one", "a", "b" "all"]), + Datafile(path="path-within-dataset/your_file.txt", labels=["two", "a", "b", "all"), + Datafile(path="path-within-dataset/another_file.csv", labels=["three", "all"]), ] ) - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv") + dataset.files.filter(name__ends_with=".csv") >>> , })> - dataset.files.filter("tags__contains", filter_value="a:2") + dataset.files.filter(labels__contains="a") >>> , })> -You can also chain filters indefinitely: +You can also chain filters indefinitely, or specify them all at the same time: .. code-block:: python - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("tags__contains", filter_value="a:2") + dataset.files.filter(name__ends_with=".csv").filter(labels__contains="a") + >>> })> + + dataset.files.filter(name__ends_with=".csv", labels__contains="a") >>> })> Find out more about ``FilterSets`` :doc:`here `, including all the possible filters available for each type of object stored on diff --git a/docs/source/filter_containers.rst b/docs/source/filter_containers.rst index 3bed20e3d..59f988c6f 100644 --- a/docs/source/filter_containers.rst +++ b/docs/source/filter_containers.rst @@ -4,43 +4,61 @@ Filter containers ================= -A filter container is just a regular python container that has some extra methods for filtering or ordering its +A filter container is just a regular python container that has some extra methods for filtering and ordering its elements. It has the same interface (i.e. attributes and methods) as the primitive python type it inherits from, with these extra methods: - ``filter`` - ``order_by`` -There are two types of filter containers currently implemented: +There are three types of filter containers currently implemented: - ``FilterSet`` - ``FilterList`` +- ``FilterDict`` -``FilterSets`` are currently used in: +``FilterSets`` are currently used in ``Dataset.files`` to store ``Datafiles`` and make them filterable, which is useful +for dealing with a large number of datasets, while ``FilterList`` is returned when ordering any filter container. -- ``Dataset.files`` to store ``Datafiles`` -- ``TagSet.tags`` to store ``Tags`` - -You can see filtering in action on the files of a ``Dataset`` :doc:`here `. +You can see an example of filtering of a ``Dataset``'s files :doc:`here `. --------- Filtering --------- -Filters are named as ``"__"``, and any attribute of a member of the -``FilterSet`` whose type or interface is supported can be filtered. +Key points: + +* Any attribute of a member of a filter container whose type or interface is supported can be used when filtering +* Filters are named as ``"__"`` +* Multiple filters can be specified at once for chained filtering +* ```` can be a single attribute name or a double-underscore-separated string of nested attribute names +* Nested attribute names work for real attributes as well as dictionary keys (in any combination and to any depth) .. code-block:: python filter_set = FilterSet( - {Datafile(timestamp=time.time(), path="my_file.csv"), Datafile(timestamp=time.time(), path="your_file.txt"), Datafile(timestamp=time.time(), path="another_file.csv")} + { + Datafile(path="my_file.csv", cluster=0, tags={"manufacturer": "Vestas"}), + Datafile(path="your_file.txt", cluster=1, tags={"manufacturer": "Vergnet"}), + Datafile(path="another_file.csv", cluster=2, tags={"manufacturer": "Enercon"}) + } ) - filter_set.filter(filter_name="name__ends_with", filter_value=".csv") + # Single filter, non-nested attribute. + filter_set.filter(name__ends_with=".csv") >>> , })> -The following filters are implemented for the following types: + # Two filters, non-nested attributes. + filter_set.filter(name__ends_with=".csv", cluster__gt=1) + >>> })> + + # Single filter, nested attribute. + filter_set.filter(tags__manufacturer__startswith("V")) + >>> , })> + + +These filters are currently available for the following types: - ``bool``: @@ -73,19 +91,20 @@ The following filters are implemented for the following types: * ``is`` * ``is_not`` -- ``TagSet``: +- ``LabelSet``: * ``is`` * ``is_not`` * ``equals`` * ``not_equals`` - * ``any_tag_contains`` - * ``not_any_tag_contains`` - * ``any_tag_starts_with`` - * ``not_any_tag_starts_with`` - * ``any_tag_ends_with`` - * ``not_any_tag_ends_with`` - + * ``contains`` + * ``not_contains`` + * ``any_label_contains`` + * ``not_any_label_contains`` + * ``any_label_starts_with`` + * ``not_any_label_starts_with`` + * ``any_label_ends_with`` + * ``not_any_label_ends_with`` Additionally, these filters are defined for the following *interfaces* (duck-types). : @@ -118,14 +137,31 @@ list of filters. -------- Ordering -------- -As sets are inherently orderless, ordering a ``FilterSet`` results in a new ``FilterList``, which has the same extra -methods and behaviour as a ``FilterSet``, but is based on the ``list`` type instead - meaning it can be ordered and -indexed etc. A ``FilterSet`` or ``FilterList`` can be ordered by any of the attributes of its members: +As sets and dictionaries are inherently orderless, ordering any filter container results in a new ``FilterList``, which +has the same methods and behaviour but is based on ``list`` instead, meaning it can be ordered and indexed etc. A +filter container can be ordered by any of the attributes of its members: .. code-block:: python filter_set.order_by("name") >>> , , ])> + filter_set.order_by("cluster") + >>> , , ])> + The ordering can also be carried out in reverse (i.e. descending order) by passing ``reverse=True`` as a second argument to the ``order_by`` method. + + +-------------- +``FilterDict`` +-------------- +The keys of a ``FilterDict`` can be anything, but each value must be a ``Filterable``. Hence, a ``FilterDict`` is +filtered and ordered by its values' attributes; when ordering, its items (key-value tuples) are returned in a +``FilterList``. + +----------------------- +Using for your own data +----------------------- +If using filter containers for your own data, all the members must inherit from ``octue.mixins.filterable.Filterable`` +to be filterable and orderable. diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index 1d93b33fc..5ee6377ff 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -1,4 +1,5 @@ import base64 +import json import logging from google.cloud import storage from google.cloud.storage.constants import _DEFAULT_TIMEOUT @@ -6,6 +7,8 @@ from octue.cloud.credentials import GCPCredentialsManager from octue.cloud.storage.path import split_bucket_name_from_gs_path +from octue.utils.decoders import OctueJSONDecoder +from octue.utils.encoders import OctueJSONEncoder logger = logging.getLogger(__name__) @@ -59,12 +62,12 @@ def upload_file( """Upload a local file to a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str local_path: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param dict metadata: - :param float timeout: + :param str local_path: path to local file + :param str|None cloud_path: full cloud path to upload file to (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket to store file in + :param str|None path_in_bucket: path to upload file to (e.g. `path/to/file.csv`) + :param dict metadata: key-value pairs to associate with the cloud file as metadata + :param float timeout: time in seconds to allow for the upload to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -73,7 +76,7 @@ def upload_file( blob.crc32c = self._compute_crc32c_checksum(f.read()) blob.upload_from_filename(filename=local_path, timeout=timeout) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) logger.info("Uploaded %r to Google Cloud at %r.", local_path, blob.public_url) def upload_from_string( @@ -82,33 +85,67 @@ def upload_from_string( """Upload serialised data in string form to a file in a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str string: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param dict metadata: - :param float timeout: + :param str string: string to upload as file + :param str|None cloud_path: full cloud path to upload as file to (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket to store as file in + :param str|None path_in_bucket: path to upload as file to (e.g. `path/to/file.csv`) + :param dict metadata: key-value pairs to associate with the cloud file as metadata + :param float timeout: time in seconds to allow for the upload to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) blob.crc32c = self._compute_crc32c_checksum(string) blob.upload_from_string(data=string, timeout=timeout) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) - def update_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): - """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must - be provided. + def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): + """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or + `cloud_path` must be provided. - :param dict metadata: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: + :param str|None cloud_path: full cloud path to file (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket where cloud file is located + :param str|None path_in_bucket: path to cloud file (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the request to complete + :return dict: + """ + if cloud_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) + + bucket = self.client.get_bucket(bucket_or_name=bucket_name) + blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) + + if blob is None: + return None + + custom_metadata = blob.metadata or {} + + return { + "custom_metadata": {key: json.loads(value, cls=OctueJSONDecoder) for key, value in custom_metadata.items()}, + "crc32c": blob.crc32c, + "size": blob.size, + "updated": blob.updated, + "time_created": blob.time_created, + "time_deleted": blob.time_deleted, + "custom_time": blob.custom_time, + "project_name": self.project_name, + "bucket_name": bucket_name, + "path_in_bucket": path_in_bucket, + } + + def overwrite_custom_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): + """Overwrite the custom metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or + `cloud_path` must be provided. + + :param dict metadata: key-value pairs to set as the new custom metadata + :param str|None cloud_path: full cloud path to file (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket where cloud file is located + :param str|None path_in_bucket: path to cloud file (e.g. `path/to/file.csv`) :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) def download_to_file( self, local_path, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT @@ -116,11 +153,11 @@ def download_to_file( """Download a file to a file from a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str local_path: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str local_path: path to download to + :param str|None cloud_path: full cloud path to download from (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to download from (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the download to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -131,10 +168,10 @@ def download_as_string(self, cloud_path=None, bucket_name=None, path_in_bucket=N """Download a file to a string from a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str|None cloud_path: full cloud path to download from (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to download from (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the download to complete :return str: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -142,46 +179,14 @@ def download_as_string(self, cloud_path=None, bucket_name=None, path_in_bucket=N logger.info("Downloaded %r from Google Cloud to as string.", blob.public_url) return data.decode() - def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or - `cloud_path` must be provided. - - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: - :return dict: - """ - if cloud_path: - bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) - - bucket = self.client.get_bucket(bucket_or_name=bucket_name) - blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) - - if blob is None: - return None - - return { - "custom_metadata": blob.metadata or {}, - "crc32c": blob.crc32c, - "size": blob.size, - "updated": blob.updated, - "time_created": blob.time_created, - "time_deleted": blob.time_deleted, - "custom_time": blob.custom_time, - "project_name": self.project_name, - "bucket_name": bucket_name, - "path_in_bucket": path_in_bucket, - } - def delete(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str|None cloud_path: full cloud path to file to delete (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to file to delete (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the request to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -192,11 +197,11 @@ def scandir(self, cloud_path=None, bucket_name=None, directory_path=None, filter """Yield the blobs belonging to the given "directory" in the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None directory_path: - :param callable filter: - :param float timeout: + :param str|None cloud_path: full cloud path of directory to scan (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud directory is located in + :param str|None directory_path: path of cloud directory to scan (e.g. `path/to/file.csv`) + :param callable filter: blob filter to constrain the yielded results + :param float timeout: time in seconds to allow for the request to complete :yield google.cloud.storage.blob.Blob: """ if cloud_path: @@ -243,13 +248,26 @@ def _compute_crc32c_checksum(self, string): checksum = Checksum(string.encode()) return base64.b64encode(checksum.digest()).decode("utf-8") - def _update_metadata(self, blob, metadata): - """Update the metadata for the given blob. Note that this is synced up with Google Cloud. + def _overwrite_blob_custom_metadata(self, blob, metadata): + """Overwrite the custom metadata for the given blob. Note that this is synced up with Google Cloud. - :param google.cloud.storage.blob.Blob blob: - :param dict metadata: + :param google.cloud.storage.blob.Blob blob: Google Cloud Storage blob to update + :param dict metadata: key-value pairs of metadata to overwrite the blob's metadata with :return None: """ - if metadata is not None: - blob.metadata = metadata - blob.patch() + if not metadata: + return None + + blob.metadata = self._encode_metadata(metadata) + blob.patch() + + def _encode_metadata(self, metadata): + """Encode metadata as a dictionary of JSON strings. + + :param dict metadata: + :return dict: + """ + if not isinstance(metadata, dict): + raise TypeError(f"Metadata for Google Cloud storage should be a dictionary; received {metadata!r}") + + return {key: json.dumps(value, cls=OctueJSONEncoder) for key, value in metadata.items()} diff --git a/octue/exceptions.py b/octue/exceptions.py index 09afba66c..95ceeac71 100644 --- a/octue/exceptions.py +++ b/octue/exceptions.py @@ -62,6 +62,10 @@ class InvalidTagException(OctueSDKException, ValueError): """Raise when a tag applied to a data file or dataset""" +class InvalidLabelException(OctueSDKException, ValueError): + """Raise when a label applied to a data file or dataset""" + + class ServiceNotFound(OctueSDKException): """Raise when a Service of the given ID has not been found on the Google Pub/Sub server (i.e. if there is no topic associated with the Service ID). diff --git a/octue/mixins/__init__.py b/octue/mixins/__init__.py index 50da35a2b..79166c404 100644 --- a/octue/mixins/__init__.py +++ b/octue/mixins/__init__.py @@ -3,6 +3,7 @@ from .filterable import Filterable from .hashable import Hashable from .identifiable import Identifiable +from .labelable import Labelable from .loggable import Loggable from .pathable import Pathable from .serialisable import Serialisable @@ -14,6 +15,7 @@ "Filterable", "Hashable", "Identifiable", + "Labelable", "Loggable", "MixinBase", "Pathable", diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index 327f38bb8..42dc8fe07 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -2,6 +2,7 @@ import numbers from octue import exceptions +from octue.utils.objects import get_nested_attribute def generate_complementary_filters(name, func): @@ -32,7 +33,7 @@ def generate_complementary_filters(name, func): IN_RANGE_FILTER_ACTIONS = generate_complementary_filters("in_range", lambda item, value: value[0] <= item <= value[1]) ICONTAINS_FILTER_ACTIONS = generate_complementary_filters( - "icontains", lambda item, value: value.lower() in item.lower() + "icontains", lambda item, value: value.casefold() in item.casefold() ) COMPARISON_FILTER_ACTIONS = { @@ -47,7 +48,7 @@ def generate_complementary_filters(name, func): TYPE_FILTERS = { "bool": IS_FILTER_ACTIONS, "str": { - **generate_complementary_filters("iequals", lambda item, value: value.lower() == item.lower()), + **generate_complementary_filters("iequals", lambda item, value: value.casefold() == item.casefold()), **generate_complementary_filters("starts_with", lambda item, value: item.startswith(value)), **generate_complementary_filters("ends_with", lambda item, value: item.endswith(value)), **EQUALS_FILTER_ACTIONS, @@ -58,6 +59,16 @@ def generate_complementary_filters(name, func): **IN_RANGE_FILTER_ACTIONS, }, "NoneType": IS_FILTER_ACTIONS, + "LabelSet": { + **EQUALS_FILTER_ACTIONS, + **CONTAINS_FILTER_ACTIONS, + **IS_FILTER_ACTIONS, + **generate_complementary_filters("any_label_contains", lambda item, value: item.any_label_contains(value)), + **generate_complementary_filters( + "any_label_starts_with", lambda item, value: item.any_label_starts_with(value) + ), + **generate_complementary_filters("any_label_ends_with", lambda item, value: item.any_label_ends_with(value)), + }, "datetime": { **EQUALS_FILTER_ACTIONS, **IS_FILTER_ACTIONS, @@ -84,14 +95,6 @@ def generate_complementary_filters(name, func): "in_date_range": lambda item, value: value[0] <= item.date() <= value[1], "in_time_range": lambda item, value: value[0] <= item.time() <= value[1], }, - "TagSet": { - **generate_complementary_filters("any_tag_contains", lambda item, value: item.any_tag_contains(value)), - **generate_complementary_filters("any_tag_starts_with", lambda item, value: item.any_tag_starts_with(value)), - **generate_complementary_filters("any_tag_ends_with", lambda item, value: item.any_tag_ends_with(value)), - **EQUALS_FILTER_ACTIONS, - **CONTAINS_FILTER_ACTIONS, - **IS_FILTER_ACTIONS, - }, } # Filters for interfaces e.g. iterables or numbers. @@ -112,31 +115,48 @@ def generate_complementary_filters(name, func): class Filterable: - def satisfies(self, filter_name, filter_value): - """ Check that the instance satisfies the given filter for the given filter value. """ + def satisfies(self, raise_error_if_filter_is_invalid=True, **kwargs): + """Check that the instance satisfies the given filter for the given filter value. The filter should be provided + as a single keyword argument such as `name__first__equals="Joe"` + + :param bool raise_error_if_filter_is_invalid: + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the + value to filter for + :return mixed: + """ + if len(kwargs) != 1: + raise ValueError(f"The satisfies method only takes one keyword argument; received {kwargs!r}.") + + filter_name, filter_value = list(kwargs.items())[0] + attribute_name, filter_action = self._split_filter_name(filter_name) try: - attribute = getattr(self, attribute_name) - except AttributeError: - raise AttributeError(f"An attribute named {attribute_name!r} does not exist on {self!r}.") + attribute = get_nested_attribute(self, attribute_name) + + except AttributeError as error: + if raise_error_if_filter_is_invalid: + raise error + return False filter_ = self._get_filter(attribute, filter_action) + return filter_(attribute, filter_value) def _split_filter_name(self, filter_name): """Split the filter name into the attribute name and filter action, raising an error if it the attribute name and filter action aren't delimited by a double underscore i.e. "__". """ - try: - attribute_name, filter_action = filter_name.split("__", 1) - except ValueError: + *attribute_names, filter_action = filter_name.split("__") + + if not attribute_names: raise exceptions.InvalidInputException( f"Invalid filter name {filter_name!r}. Filter names should be in the form " - f"'__'." + f"'____<...>__' with at least one attribute name " + f"included." ) - return attribute_name, filter_action + return ".".join(attribute_names), filter_action def _get_filter(self, attribute, filter_action): """Get the filter for the attribute and filter action, raising an error if there is no filter action of that @@ -146,9 +166,8 @@ def _get_filter(self, attribute, filter_action): return self._get_filter_actions_for_attribute(attribute)[filter_action] except KeyError as error: - attribute_type = type(attribute) raise exceptions.InvalidInputException( - f"There is no filter called {error.args[0]!r} for attributes of type {attribute_type}. The options " + f"There is no filter called {error.args[0]!r} for attributes of type {type(attribute)}. The options " f"are {self._get_filter_actions_for_attribute(attribute).keys()!r}" ) diff --git a/octue/mixins/labelable.py b/octue/mixins/labelable.py new file mode 100644 index 000000000..54ee9dc0a --- /dev/null +++ b/octue/mixins/labelable.py @@ -0,0 +1,22 @@ +from octue.resources.label import LabelSet + + +class Labelable: + """A mixin class allowing objects to be labelled.""" + + def __init__(self, *args, labels=None, **kwargs): + self.labels = labels + super().__init__(*args, **kwargs) + + def add_labels(self, *args): + """Add one or more new labels to the object. New labels will be cleaned and validated.""" + self.labels.update(*args) + + @property + def labels(self): + return self._labels + + @labels.setter + def labels(self, labels): + """Overwrite any existing label set and assign new labels.""" + self._labels = LabelSet(labels) diff --git a/octue/mixins/serialisable.py b/octue/mixins/serialisable.py index cac0d4db2..85865077d 100644 --- a/octue/mixins/serialisable.py +++ b/octue/mixins/serialisable.py @@ -1,21 +1,24 @@ import json +from octue.utils.decoders import OctueJSONDecoder from octue.utils.encoders import OctueJSONEncoder class Serialisable: """Mixin class to make resources serialisable to JSON. - Objects must have a `.logger` and a `.id` property + The `logger` field is always excluded from serialisation if it is present. """ _SERIALISE_FIELDS = None _EXCLUDE_SERIALISE_FIELDS = ("logger",) def __init__(self, *args, **kwargs): - """Constructor for serialisable mixin""" super().__init__(*args, **kwargs) + if "logger" not in self._EXCLUDE_SERIALISE_FIELDS: + self._EXCLUDE_SERIALISE_FIELDS = (*self._EXCLUDE_SERIALISE_FIELDS, "logger") + @classmethod def deserialise(cls, serialised_object, from_string=False): """Deserialise the given JSON-serialised object. @@ -25,7 +28,7 @@ def deserialise(cls, serialised_object, from_string=False): :return any: """ if from_string: - serialised_object = json.loads(serialised_object) + serialised_object = json.loads(serialised_object, cls=OctueJSONDecoder) return cls(**serialised_object) @@ -35,7 +38,6 @@ def to_file(self, file_name, **kwargs): :parameter str file_name: file to write to, including relative or absolute path and .json extension :return None: """ - self.logger.debug("Writing %s %s to file %s", self.__class__.__name__, self.id, file_name) with open(file_name, "w") as fp: fp.write(self.serialise(**kwargs, to_string=True)) @@ -62,8 +64,6 @@ def __init__(self): :return: json string or dict containing a serialised/primitive version of the resource. :rtype: str, dict """ - self.logger.debug("Serialising %s %s", self.__class__.__name__, self.id) - # Get all non-private and non-protected attributes except those excluded specifically names_of_attributes_to_serialise = self._SERIALISE_FIELDS or ( field_name @@ -95,4 +95,4 @@ def __init__(self): if to_string: return string - return json.loads(string) + return json.loads(string, cls=OctueJSONDecoder) diff --git a/octue/mixins/taggable.py b/octue/mixins/taggable.py index 0e4c0fc85..0ca2d6999 100644 --- a/octue/mixins/taggable.py +++ b/octue/mixins/taggable.py @@ -1,17 +1,16 @@ -from octue.resources.tag import TagSet +from octue.resources.tag import TagDict class Taggable: - """ A mixin class allowing objects to be tagged. """ + """A mixin class allowing objects to be tagged.""" def __init__(self, *args, tags=None, **kwargs): - """Constructor for Taggable mixins""" + self.tags = tags super().__init__(*args, **kwargs) - self._tags = TagSet(tags) - def add_tags(self, *args): - """ Adds one or more new tag strings to the object tags. New tags will be cleaned and validated. """ - self._tags.add_tags(*args) + def add_tags(self, tags=None, **kwargs): + """Add one or more new tags to the object. New tags will be cleaned and validated.""" + self.tags.update({**(tags or {}), **kwargs}) @property def tags(self): @@ -19,5 +18,5 @@ def tags(self): @tags.setter def tags(self, tags): - """ Overwrite any existing tag set and assign new tags. """ - self._tags = TagSet(tags) + """Overwrite any existing tags and assign the new ones.""" + self._tags = TagDict(tags) diff --git a/octue/resources/analysis.py b/octue/resources/analysis.py index 22ee079d2..20c1da3b7 100644 --- a/octue/resources/analysis.py +++ b/octue/resources/analysis.py @@ -2,7 +2,7 @@ import logging from octue.definitions import OUTPUT_STRANDS -from octue.mixins import Hashable, Identifiable, Loggable, Serialisable, Taggable +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Serialisable, Taggable from octue.resources.manifest import Manifest from octue.utils.encoders import OctueJSONEncoder from octue.utils.folders import get_file_name_from_strand @@ -23,7 +23,7 @@ CLASS_MAP = {"configuration_manifest": Manifest, "input_manifest": Manifest, "output_manifest": Manifest} -class Analysis(Identifiable, Loggable, Serialisable, Taggable): +class Analysis(Identifiable, Loggable, Serialisable, Labelable, Taggable): """Analysis class, holding references to all input and output data ## The Analysis Instance diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 4f9f08c84..358c8c13f 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -3,13 +3,14 @@ import logging import os import tempfile +import pkg_resources from google_crc32c import Checksum from octue.cloud import storage from octue.cloud.storage import GoogleCloudStorageClient from octue.cloud.storage.path import CLOUD_STORAGE_PROTOCOL from octue.exceptions import AttributeConflict, CloudLocationNotSpecified, FileNotFoundException, InvalidInputException -from octue.mixins import Filterable, Hashable, Identifiable, Loggable, Pathable, Serialisable, Taggable +from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.mixins.hashable import EMPTY_STRING_HASH_VALUE from octue.utils import isfile from octue.utils.time import convert_from_posix_time, convert_to_posix_time @@ -21,14 +22,14 @@ TEMPORARY_LOCAL_FILE_CACHE = {} OCTUE_METADATA_NAMESPACE = "octue" - ID_DEFAULT = None CLUSTER_DEFAULT = 0 SEQUENCE_DEFAULT = None TAGS_DEFAULT = None +LABELS_DEFAULT = None -class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): +class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): """Class for representing data files on the Octue system. Files in a manifest look like this: @@ -38,7 +39,8 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl "cluster": 0, "sequence": 0, "extension": "csv", - "tags": "", + "tags": {}, + "labels": [], "timestamp": datetime.datetime(2021, 5, 3, 18, 15, 58, 298086), "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86", "size_bytes": 59684813, @@ -56,7 +58,8 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl :param Pathable path_from: The root Pathable object (typically a Dataset) that this Datafile's path is relative to. :param int cluster: The cluster of files, within a dataset, to which this belongs (default 0) :param int sequence: A sequence number of this file within its cluster (if sequences are appropriate) - :param str tags: Space-separated string of tags relevant to this file + :param dict|TagDict tags: key-value pairs with string keys conforming to the Octue tag format (see TagDict) + :param iter(str) labels: Space-separated string of labels relevant to this file :param bool skip_checks: :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode options are the same as for the builtin open function) @@ -72,6 +75,7 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl "path", "sequence", "tags", + "labels", "timestamp", "_cloud_metadata", ) @@ -86,6 +90,7 @@ def __init__( cluster=CLUSTER_DEFAULT, sequence=SEQUENCE_DEFAULT, tags=TAGS_DEFAULT, + labels=LABELS_DEFAULT, skip_checks=True, mode="r", update_cloud_metadata=True, @@ -97,6 +102,7 @@ def __init__( immutable_hash_value=kwargs.pop("immutable_hash_value", None), logger=logger, tags=tags, + labels=labels, path=path, path_from=path_from, ) @@ -196,21 +202,23 @@ def from_cloud( if not allow_overwrite: cls._check_for_attribute_conflict(custom_metadata, **kwargs) - timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) - - if isinstance(timestamp, str): - timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") - datafile._set_id(kwargs.pop("id", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__id", ID_DEFAULT))) - datafile.timestamp = timestamp datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) + datafile.timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) + datafile.tags = kwargs.pop("tags", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__tags", TAGS_DEFAULT)) + datafile.cluster = kwargs.pop( "cluster", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__cluster", CLUSTER_DEFAULT) ) + datafile.sequence = kwargs.pop( "sequence", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__sequence", SEQUENCE_DEFAULT) ) - datafile.tags = kwargs.pop("tags", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__tags", TAGS_DEFAULT)) + + datafile.labels = kwargs.pop( + "labels", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__labels", LABELS_DEFAULT) + ) + datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} return datafile @@ -300,7 +308,7 @@ def update_cloud_metadata(self, project_name=None, cloud_path=None, bucket_name= project_name, cloud_path, bucket_name, path_in_bucket ) - GoogleCloudStorageClient(project_name=project_name).update_metadata( + GoogleCloudStorageClient(project_name=project_name).overwrite_custom_metadata( metadata=self.metadata(), bucket_name=bucket_name, path_in_bucket=path_in_bucket, @@ -522,7 +530,9 @@ def metadata(self, use_octue_namespace=True): "timestamp": self.timestamp, "cluster": self.cluster, "sequence": self.sequence, - "tags": self.tags.serialise(to_string=True), + "labels": self.labels, + "tags": self.tags, + "sdk_version": pkg_resources.get_distribution("octue").version, } if not use_octue_namespace: diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index edc753eb9..cdf7d0583 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -6,11 +6,12 @@ from octue import definitions from octue.cloud import storage from octue.cloud.storage import GoogleCloudStorageClient -from octue.exceptions import BrokenSequenceException, InvalidInputException, UnexpectedNumberOfResultsException -from octue.mixins import Hashable, Identifiable, Loggable, Pathable, Serialisable, Taggable +from octue.exceptions import BrokenSequenceException, InvalidInputException +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.resources.datafile import Datafile from octue.resources.filter_containers import FilterSet -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet +from octue.resources.tag import TagDict module_logger = logging.getLogger(__name__) @@ -19,20 +20,18 @@ DATAFILES_DIRECTORY = "datafiles" -class Dataset(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable): - """A representation of a dataset, containing files, tags, etc +class Dataset(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable): + """A representation of a dataset, containing files, labels, etc This is used to read a list of files (and their associated properties) into octue analysis, or to compile a list of output files (results) and their properties that will be sent back to the octue system. """ - _FILTERSET_ATTRIBUTE = "files" _ATTRIBUTES_TO_HASH = ("files",) - _SERIALISE_FIELDS = "files", "name", "tags", "id", "path" + _SERIALISE_FIELDS = "files", "name", "labels", "tags", "id", "path" - def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, tags=None, **kwargs): - """Construct a Dataset""" - super().__init__(name=name, id=id, logger=logger, tags=tags, path=path, path_from=path_from) + def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, tags=None, labels=None, **kwargs): + super().__init__(name=name, id=id, logger=logger, tags=tags, labels=labels, path=path, path_from=path_from) # TODO The decoders aren't being used; utils.decoders.OctueJSONDecoder should be used in twined # so that resources get automatically instantiated. @@ -88,7 +87,8 @@ def from_cloud(cls, project_name, cloud_path=None, bucket_name=None, path_to_dat id=serialised_dataset["id"], name=serialised_dataset["name"], path=storage.path.generate_gs_path(bucket_name, path_to_dataset_directory), - tags=TagSet(serialised_dataset["tags"]), + tags=TagDict(serialised_dataset["tags"]), + labels=LabelSet(serialised_dataset["labels"]), files=datafiles, ) @@ -178,7 +178,7 @@ def append(self, *args, **kwargs): ) self.files.add(*args, **kwargs) - def get_files(self, field_lookup, filter_value=None): + def get_files(self, **kwargs): warnings.warn( "The `Dataset.get_files` method has been deprecated and replaced with `Dataset.files.filter`, which has " "the same interface but with the `field_lookup` argument renamed to `filter_name`. Calls to " @@ -186,9 +186,9 @@ def get_files(self, field_lookup, filter_value=None): "in future.", DeprecationWarning, ) - return self.files.filter(filter_name=field_lookup, filter_value=filter_value) + return self.files.filter(**kwargs) - def get_file_sequence(self, filter_name=None, filter_value=None, strict=True): + def get_file_sequence(self, strict=True, **kwargs): """Get an ordered sequence of files matching a criterion Accepts the same search arguments as `get_files`. @@ -202,16 +202,13 @@ def get_file_sequence(self, filter_name=None, filter_value=None, strict=True): """ results = self.files - if filter_name is not None: - results = results.filter(filter_name=filter_name, filter_value=filter_value) + if kwargs: + results = results.filter(**kwargs) - results = results.filter("sequence__is_not", None) - - def get_sequence_number(file): - return file.sequence + results = results.filter(sequence__is_not=None) # Sort the results on ascending sequence number - results = sorted(results, key=get_sequence_number) + results = sorted(results, key=lambda file: file.sequence) # Check sequence is unique and sequential if strict: @@ -223,18 +220,11 @@ def get_sequence_number(file): return results - def get_file_by_tag(self, tag_string): - """Gets a data file from a manifest by searching for files with the provided tag(s) - - Gets exclusively one file; if no file or more than one file is found this results in an error. + def get_file_by_label(self, label): + """Get a single datafile from a dataset by filtering for files with the provided label. - :param tag_string: if this string appears as an exact match in the tags - :return: DataFile object + :param str label: the label to filter for + :raise octue.exceptions.UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters + :return octue.resources.datafile.DataFile: """ - results = self.files.filter(filter_name="tags__contains", filter_value=tag_string) - if len(results) > 1: - raise UnexpectedNumberOfResultsException("More than one result found when searching for a file by tag") - elif len(results) == 0: - raise UnexpectedNumberOfResultsException("No files found with this tag") - - return results.pop() + return self.files.one(labels__contains=label) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 62d3bed6e..194c020da 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,33 +1,130 @@ +from abc import ABC +from collections import UserDict + from octue import exceptions +from octue.mixins import Filterable +from octue.utils.objects import get_nested_attribute + + +class FilterContainer(ABC): + def filter(self, ignore_items_without_attribute=True, **kwargs): + """Return a new instance containing only the `Filterable`s to which the given filter criteria are `True`. + + :param bool ignore_items_without_attribute: if True, just ignore any members of the container without a filtered-for attribute rather than raising an error + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for + :return octue.resources.filter_containers.FilterContainer: + """ + if any(not isinstance(item, Filterable) for item in self): + raise TypeError(f"All items in a {type(self).__name__} must be of type {Filterable.__name__}.") + + raise_error_if_filter_is_invalid = not ignore_items_without_attribute + + if len(kwargs) == 1: + return type(self)( + ( + item + for item in self + if item.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) + ) + ) + + filter_names = list(kwargs) + + for filter_name in filter_names: + filter_value = kwargs.pop(filter_name) + return self.filter(raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) + + def order_by(self, attribute_name, reverse=False): + """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new + `FilterList` regardless of the type of filter container begun with. + + :param str attribute_name: name of attribute (optionally nested) to order by e.g. "a", "a.b", "a.b.c" + :param bool reverse: if True, reverse the ordering + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the container's members + :return FilterList: + """ + try: + return FilterList( + sorted(self, key=lambda item: get_nested_attribute(item, attribute_name), reverse=reverse) + ) + + except AttributeError: + raise exceptions.InvalidInputException( + f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." + ) + + def one(self, **kwargs): + """If a single result exists for the given filters, return it. Otherwise, raise an error. + + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for + :raise octue.exceptions.UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters + :return octue.resources.mixins.filterable.Filterable: + """ + results = self.filter(**kwargs) + + if len(results) > 1: + raise exceptions.UnexpectedNumberOfResultsException(f"More than one result found for filters {kwargs}.") + + if len(results) == 0: + raise exceptions.UnexpectedNumberOfResultsException(f"No results found for filters {kwargs}.") + + if isinstance(self, UserDict): + return results.popitem() + + return results.pop() + + +class FilterSet(FilterContainer, set): + pass + + +class FilterList(FilterContainer, list): + pass + +class FilterDict(FilterContainer, UserDict): + def filter(self, ignore_items_without_attribute=True, **kwargs): + """Return a new instance containing only the Filterables for which the given filter criteria apply are + satisfied. -def _filter(instance, filter_name=None, filter_value=None): - """Returns a new instance containing only the Filterables to which the given filter criteria apply. + :param bool ignore_items_without_attribute: if True, just ignore any members of the container without a filtered-for attribute rather than raising an error + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for + :return FilterDict: + """ + if any(not isinstance(item, Filterable) for item in self.values()): + raise TypeError(f"All values in a {type(self).__name__} must be of type {Filterable.__name__}.") - :param str filter_name: - :param any filter_value: - :return octue.resources.filter_containers.FilterSet: - """ - return instance.__class__((item for item in instance if item.satisfies(filter_name, filter_value))) + raise_error_if_filter_is_invalid = not ignore_items_without_attribute + if len(kwargs) == 1: + return type(self)( + { + key: value + for key, value in self.items() + if value.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) + } + ) -def _order_by(instance, attribute_name, reverse=False): - """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a - FilterSet. - """ - try: - return FilterList(sorted(instance, key=lambda item: getattr(item, attribute_name), reverse=reverse)) - except AttributeError: - raise exceptions.InvalidInputException( - f"An attribute named {attribute_name!r} does not exist on one or more members of {instance!r}." - ) + filter_names = list(kwargs) + for filter_name in filter_names: + filter_value = kwargs.pop(filter_name) + return self.filter(raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) -class FilterSet(set): - filter = _filter - order_by = _order_by + def order_by(self, attribute_name, reverse=False): + """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList. + :param str attribute_name: name of attribute (optionally nested) to order by e.g. "a", "a.b", "a.b.c" + :param bool reverse: if True, reverse the ordering + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the FilterDict's values + :return FilterList: + """ + try: + return FilterList( + sorted(self.items(), key=lambda item: get_nested_attribute(item[1], attribute_name), reverse=reverse) + ) -class FilterList(list): - filter = _filter - order_by = _order_by + except AttributeError: + raise exceptions.InvalidInputException( + f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." + ) diff --git a/octue/resources/label.py b/octue/resources/label.py new file mode 100644 index 000000000..986562994 --- /dev/null +++ b/octue/resources/label.py @@ -0,0 +1,113 @@ +import json +import re +from collections import UserString + +from octue.exceptions import InvalidLabelException +from octue.resources.filter_containers import FilterSet + + +LABEL_PATTERN = re.compile(r"^[a-z0-9][a-z0-9-]*(? 0: raise InvalidInputException("You cannot `prepare()` a manifest already instantiated with datasets") - for idx, dataset_spec in enumerate(data): + for index, dataset_specification in enumerate(data["datasets"]): - self.keys[dataset_spec["key"]] = idx - # TODO generate a unique name based on the filter key, tag datasets so that the tag filters in the spec + self.keys[dataset_specification["key"]] = index + # TODO generate a unique name based on the filter key, label datasets so that the label filters in the spec # apply automatically and generate a description of the dataset - self.datasets.append(Dataset(logger=self.logger, path_from=self, path=dataset_spec["key"])) + self.datasets.append(Dataset(logger=self.logger, path_from=self, path=dataset_specification["key"])) return self diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 375d2779e..d8ce473d3 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,195 +1,58 @@ import json import re -from functools import lru_cache +from collections import UserDict from octue.exceptions import InvalidTagException -from octue.mixins import Filterable, Serialisable -from octue.resources.filter_containers import FilterList, FilterSet +from octue.mixins import Serialisable from octue.utils.encoders import OctueJSONEncoder -TAG_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other - elif isinstance(other, Tag): - return self.name > other.name - - def __hash__(self): - """ Allow Tags to be contained in a set. """ - return hash(f"{type(self).__name__}{self.name}") - - def __contains__(self, item): - return item in self.name - - def __repr__(self): - return repr(self.name) - - def starts_with(self, value): - """ Does the tag start with the given value? """ - return self.name.startswith(value) - - def ends_with(self, value): - """ Does the tag end with the given value? """ - return self.name.endswith(value) - - @staticmethod - def _clean(name): - """ Ensure the tag name is a string and conforms to the tag regex pattern. """ - if not isinstance(name, str): - raise InvalidTagException("Tags must be expressed as a string.") - - cleaned_name = name.strip() - - if not re.match(TAG_PATTERN, cleaned_name): - raise InvalidTagException( - f"Invalid tag '{cleaned_name}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " - f"and '-'. They must not start with '-', ':', '/' or '.'" - ) - - return cleaned_name - - -class TagSet(Serialisable): - """ Class to handle a set of tags as a string. """ - - _FILTERSET_ATTRIBUTE = "tags" + self._check_tag_format(name) + super().__setitem__(name, value) - def __init__(self, tags=None, *args, **kwargs): - """ Construct a TagSet. """ - # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase - tags = tags or FilterSet() + def update(self, tags, **kwargs): + """Add multiple tags to the TagDict from another dictionary or as keyword arguments. - # JSON-encoded list of tag names, or space-delimited string of tag names. - if isinstance(tags, str): - try: - self.tags = FilterSet(Tag(tag) for tag in json.loads(tags)) - except json.decoder.JSONDecodeError: - self.tags = FilterSet(Tag(tag) for tag in tags.strip().split()) - - elif isinstance(tags, TagSet): - self.tags = FilterSet(tags.tags) - - # Tags can be some other iterable than a list, but each tag must be a Tag or string. - elif hasattr(tags, "__iter__"): - self.tags = FilterSet(tag if isinstance(tag, Tag) else Tag(tag) for tag in tags) - - else: - raise InvalidTagException( - "Tags must be expressed as a whitespace-delimited string or an iterable of strings or Tag instances." - ) - - def __eq__(self, other): - """ Does this TagSet have the same tags as another TagSet? """ - if not isinstance(other, TagSet): - return False - return self.tags == other.tags - - def __iter__(self): - """ Iterate over the tags in the TagSet. """ - yield from self.tags - - def __len__(self): - return len(self.tags) - - def __contains__(self, tag): - """ Return True if any of the tags exactly matches value, allowing test like `if 'a' in TagSet('a b')`. """ - if isinstance(tag, str): - return Tag(tag) in self.tags - if isinstance(tag, Tag): - return tag in self.tags - - def __repr__(self): - return f"" - - def add_tags(self, *args): - """Adds one or more new tag strings to the object tags. New tags will be cleaned and validated.""" - self.tags |= {Tag(arg) for arg in args} - - def get_subtags(self): - """ Return a new TagSet instance with all the subtags. """ - return TagSet(subtag for tag in self for subtag in tag.subtags) - - def any_tag_starts_with(self, value): - """ Implement a startswith method that returns true if any of the tags starts with value """ - return any(tag.starts_with(value) for tag in self) - - def any_tag_ends_with(self, value): - """ Implement an endswith method that returns true if any of the tags endswith value. """ - return any(tag.ends_with(value) for tag in self) - - def any_tag_contains(self, value): - """ Return True if any of the tags contains value. """ - return any(value in tag for tag in self) + :param dict|TagDict tags: tags to add + :param **kwargs: {str: any} pairs of tags as keyword arguments e.g. `my_tag=7` + :return None: + """ + self._check_tag_format(*tags) + super().update(tags, **kwargs) - def filter(self, filter_name=None, filter_value=None): - """Filter the tags with the given filter for the given value. + def _check_tag_format(self, *tags): + """Check if each tag conforms to the tag name pattern. - :param str filter_name: - :param any filter_value: - :return octue.resources.filter_containers.FilterSet: + :param *tags: any number of str items to check + :return: """ - return self.tags.filter(filter_name=filter_name, filter_value=filter_value) + for tag in tags: + if not re.match(TAG_NAME_PATTERN, tag): + raise InvalidTagException( + f"Invalid tag '{tag}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', and '_'. They must " + f"not start with '_'." + ) def serialise(self, to_string=False, **kwargs): - """Serialise to a sorted list of tag names. + """Serialise a TagDict to a JSON dictionary or string. :param bool to_string: - :return list|str: + :return str|dict: """ - string = json.dumps( - sorted(tag.name for tag in self.tags), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs - ) + string = json.dumps(self.data, cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs) if to_string: return string return json.loads(string) - - @classmethod - def deserialise(cls, serialised_tagset): - """Deserialise from a sorted list of tag names. - - :param list serialised_tagset: - :return TagSet: - """ - return cls(tags=serialised_tagset) diff --git a/octue/templates/template-child-services/parent_service/twine.json b/octue/templates/template-child-services/parent_service/twine.json index ef23b594d..1a6468dfd 100644 --- a/octue/templates/template-child-services/parent_service/twine.json +++ b/octue/templates/template-child-services/parent_service/twine.json @@ -4,13 +4,13 @@ "key": "wind_speed", "purpose": "A service that returns the average wind speed for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:wind_speed" + "filters": "labels:wind_speed" }, { "key": "elevation", "purpose": "A service that returns the elevation for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:elevation" + "filters": "labels:elevation" } ], "input_values_schema": { diff --git a/octue/templates/template-python-fractal/fractal/fractal.py b/octue/templates/template-python-fractal/fractal/fractal.py index 1b1b2e50b..328099df8 100644 --- a/octue/templates/template-python-fractal/fractal/fractal.py +++ b/octue/templates/template-python-fractal/fractal/fractal.py @@ -41,16 +41,15 @@ def fractal(analysis): "height": analysis.configuration_values["height"], } - # We'll add some tags, which will help to improve searchability and allow - # other apps, reports, users and analyses to automatically find figures and - # use them. + # We'll add some labels and tags, which will help to improve searchability and allow other apps, reports, users and + # analyses to automatically find figures and use them. # - # Get descriptive with tags... they are whitespace-delimited and colons can be - # used to provide subtags. Tags are case insensitive, and accept a-z, 0-9, - # hyphens and underscores (which can be used literally in search and are also - # used to separate words in natural language search). Other special characters - # will be stripped. - tags = "contents:fractal:mandelbrot type:figure:surface" + # Get descriptive with labels... they are whitespace-delimited. Labels are case insensitive, and accept a-z, 0-9, + # and hyphens which can be used literally in search and are also used to separate words in natural language search). + # Other special characters will be stripped. Tags are key value pairs where the values can be anything but the keys + # only accept a-z, 0-9, and underscores. + labels = {"complex-figure"} + tags = {"contents": "fractal:mandelbrot"} # Get the output dataset which will be used for storing the figure file(s) output_dataset = analysis.output_manifest.get_dataset("fractal_figure_files") @@ -65,6 +64,7 @@ def fractal(analysis): local_path_prefix=output_dataset.path, # TODO set up for the right paths Destination (root of the output dataset folder on the present machine) skip_checks=True, # We haven't created the actual file yet, so it'll definitely fail checks! tags=tags, + labels=labels, ) # Actually write the contents to the file specified by the Datafile diff --git a/octue/templates/template-python-fractal/twine.json b/octue/templates/template-python-fractal/twine.json index b85156705..03c53ae42 100644 --- a/octue/templates/template-python-fractal/twine.json +++ b/octue/templates/template-python-fractal/twine.json @@ -60,11 +60,12 @@ } } }, - "output_manifest": [ - { - "key": "fractal_figure_files", - "purpose": "A dataset containing .json files containing the output figures", - "filters": "tags:(fractal AND figure) files:(extension:json)" - } - ] + "output_manifest": { + "datasets": [ + { + "key": "fractal_figure_files", + "purpose": "A dataset containing .json files containing the output figures" + } + ] + } } diff --git a/octue/templates/template-using-manifests/app.py b/octue/templates/template-using-manifests/app.py index a9a2796fc..1bb035395 100644 --- a/octue/templates/template-using-manifests/app.py +++ b/octue/templates/template-using-manifests/app.py @@ -35,8 +35,8 @@ def run(analysis, *args, **kwargs): # capabilities. Let's get the metadata and the timeseries files, whilst showing off a couple of the filters. # # See the Dataset class help for more. - metadata_file = input_dataset.get_file_by_tag("meta") - timeseries_files = input_dataset.get_file_sequence("tags__contains", filter_value="timeseries") + metadata_file = input_dataset.get_file_by_label("meta") + timeseries_files = input_dataset.get_file_sequence(labels__contains="timeseries") # # We used these because they're special helpers - in this case ensuring that there's only one metadata file and # ensuring that the timeseries files come in a strictly ordered sequence. @@ -45,7 +45,7 @@ def run(analysis, *args, **kwargs): # metadata_files = input_dataset.get_files("name__icontains", filter_value="meta") # # There's generally a few ways to do it. Choose one which is likely to be most consistent - for example if your - # filenames might be subject to change, but you have better control over the tags, rely on those. + # filenames might be subject to change, but you have better control over the labels, rely on those. # At this point it's over to you, to do whatever you want with the contents of these files. # For this example app, we will: @@ -63,16 +63,13 @@ def run(analysis, *args, **kwargs): # course, because we haven't done the processing yet)... output_dataset = analysis.output_manifest.get_dataset("cleaned_met_mast_data") - # We'll add tags to the output dataset, which will help to improve searchability and allow - # other apps, reports, users and analyses to automatically find figures and - # use them. + # We'll add some labels, which will help to improve searchability and allow other apps, reports, users and + # analyses to automatically find figures and use them. # - # Get descriptive with tags... they are whitespace-delimited and colons can be - # used to provide subtags. Tags are case insensitive, and accept a-z, 0-9, - # hyphens and underscores (which can be used literally in search and are also - # used to separate words in natural language search). Other special characters - # will be stripped. - output_dataset.tags = "met mast cleaned" + # Get descriptive with labels... they are whitespace-delimited. Labels are case insensitive, and accept a-z, 0-9, + # and hyphens which can be used literally in search and are also used to separate words in natural language search). + # Other special characters will be stripped. + output_dataset.labels = "met mast cleaned" # Create a Datafile to hold the concatenated, cleaned output data. We could put it in the current directory # (by leaving local_path_prefix unspecified) but it makes sense to put it in a folder specific to this output @@ -83,7 +80,7 @@ def run(analysis, *args, **kwargs): path="cleaned.csv", path_from=output_dataset, # Tells it where it should be stored, in this case the output dataset folder skip_checks=True, # We haven't created the actual file yet, so checks would definitely fail! - tags="timeseries", + labels="timeseries", ) # Write the file (now we know where to write it) @@ -97,5 +94,5 @@ def run(analysis, *args, **kwargs): # all :) # # If you're running this on your local machine, that's it - but when this code runs as an analysis in the cloud, - # The files in the output manifest are copied into the cloud store. Their names and tags are registered in a search + # The files in the output manifest are copied into the cloud store. Their names and labels are registered in a search # index so your colleagues can find the dataset you've produced. diff --git a/octue/templates/template-using-manifests/data/input/manifest.json b/octue/templates/template-using-manifests/data/input/manifest.json index 7eb991fc4..fd174c45a 100644 --- a/octue/templates/template-using-manifests/data/input/manifest.json +++ b/octue/templates/template-using-manifests/data/input/manifest.json @@ -7,14 +7,16 @@ { "id": "7ead4669-8162-4f64-8cd5-4abe92509e17", "name": "meteorological mast dataset", - "tags": ["met", "mast", "wind", "location:108346"], + "tags": {"location": 108346}, + "labels": ["met", "mast", "wind"], "files": [ { "path": "08DEC/High Res Meteorological Mast Data - 8 Dec_1.csv", "cluster": 0, "sequence": 0, "extension": "csv", - "tags": ["timeseries"], + "tags": {}, + "labels": ["timeseries"], "timestamp": 1605783547.0, "id": "acff07bc-7c19-4ed5-be6d-a6546eae8e86", "name": "High Res Meteorological Mast Data - 8 Dec_1.csv", @@ -26,7 +28,8 @@ "cluster": 0, "sequence": 1, "extension": "csv", - "tags": ["timeseries"], + "tags": {}, + "labels": ["timeseries"], "timestamp": 1605783547.0, "id": "bdff07bc-7c19-4ed5-be6d-a6546eae8e45", "name": "High Res Meteorological Mast Data - 8 Dec_2.csv", @@ -38,7 +41,8 @@ "cluster": 1, "sequence": 0, "extension": "dat", - "tags": ["meta"], + "tags": {}, + "labels": ["meta"], "timestamp": 1605783547.0, "id": "ceff07bc-7c19-4ed5-be6d-a6546eae8e86", "name": "meta - 8 Dec_1.da", diff --git a/octue/templates/template-using-manifests/twine.json b/octue/templates/template-using-manifests/twine.json index 7bf9e2324..3ce7b182c 100644 --- a/octue/templates/template-using-manifests/twine.json +++ b/octue/templates/template-using-manifests/twine.json @@ -12,18 +12,20 @@ } } }, - "input_manifest": [ - { - "key": "raw_met_mast_data", - "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up", - "filters": "tags:(met AND mast) files:(extension:csv)" - } - ], - "output_manifest": [ - { - "key": "cleaned_met_mast_data", - "purpose": "A dataset containing .csv files of cleaned meteorological mast data", - "filters": "tags:(met AND mast AND cleaned) files:(extension:csv)" - } - ] + "input_manifest": { + "datasets": [ + { + "key": "raw_met_mast_data", + "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up" + } + ] + }, + "output_manifest": { + "datasets": [ + { + "key": "cleaned_met_mast_data", + "purpose": "A dataset containing .csv files of cleaned meteorological mast data" + } + ] + } } diff --git a/octue/utils/decoders.py b/octue/utils/decoders.py index 70d2db914..35ecfc9ec 100644 --- a/octue/utils/decoders.py +++ b/octue/utils/decoders.py @@ -1,28 +1,22 @@ +import json from json import JSONDecoder +import dateutil.parser -from octue.resources import Datafile, Dataset, Manifest +class OctueJSONDecoder(JSONDecoder): + """A JSON Decoder to convert default json objects into their Datafile, Dataset or Manifest classes as appropriate""" -def default_object_hook(obj): - """A hook to convert default json objects into their Datafile, Dataset or Manifest class as appropriate""" - - # object hooks are called whenever a json object is created. When nested, this is done from innermost (deepest - # nesting) out so it's safe to work at multiple levels here - - if "files" in obj: - files = [Datafile(**df) for df in obj.pop("files")] - return {**obj, "files": files} - - if "datasets" in obj: - datasets = [Dataset(**ds) for ds in obj.pop("datasets")] - return Manifest(**obj, datasets=datasets) + def __init__(self, *args, object_hook=None, **kwargs): + json.JSONDecoder.__init__(self, object_hook=object_hook or self.object_hook, *args, **kwargs) - return obj + def object_hook(self, obj): + if "_type" not in obj: + return obj + if obj["_type"] == "set": + return set(obj["items"]) -class OctueJSONDecoder(JSONDecoder): - """A JSON Decoder to convert default json objects into their Datafile, Dataset or Manifest classes as appropriate""" + if obj["_type"] == "datetime": + return dateutil.parser.parse(obj["value"]) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.object_hook = self.object_hook or default_object_hook + return obj diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index 59604bf27..f263839ce 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -1,10 +1,11 @@ import datetime +from collections import UserString from twined.utils import TwinedEncoder class OctueJSONEncoder(TwinedEncoder): - """A JSON Encoder which allows objects having a `serialise()` method to control their own conversion to primitives""" + """A JSON Encoder which allows objects having a `serialise()` method to control their own conversion to primitives.""" def default(self, obj): @@ -12,8 +13,15 @@ def default(self, obj): if hasattr(obj, "serialise"): return obj.serialise() + # Serialise sets as sorted list (JSON doesn't support sets). + if isinstance(obj, set): + return {"_type": "set", "items": sorted(obj)} + + if isinstance(obj, UserString): + return str(obj) + if isinstance(obj, datetime.datetime): - return obj.isoformat() + return {"_type": "datetime", "value": obj.isoformat()} # Otherwise let the base class default method raise the TypeError return TwinedEncoder.default(self, obj) diff --git a/octue/utils/objects.py b/octue/utils/objects.py new file mode 100644 index 000000000..b0b1d4be9 --- /dev/null +++ b/octue/utils/objects.py @@ -0,0 +1,29 @@ +import functools + + +def get_nested_attribute(instance, nested_attribute_name): + """Get the value of a nested attribute from a class instance or dictionary, with each level of nesting being + another dictionary or class instance. + + :param dict|object instance: + :param str nested_attribute_names: dot-separated nested attribute name e.g. "a.b.c", "a.b", or "a" + :return any: + """ + nested_attribute_names = nested_attribute_name.split(".") + return functools.reduce(getattr_or_subscribe, nested_attribute_names, instance) + + +def getattr_or_subscribe(instance, name): + """Get an attribute from a class instance or a value from a dictionary. + + :param dict|object instance: + :param str name: name of attribute or dictionary key + :return any: + """ + try: + return getattr(instance, name) + except AttributeError: + try: + return instance[name] + except TypeError: + raise AttributeError(f"{instance!r} does not have an attribute or key named {name!r}.") diff --git a/requirements-dev.txt b/requirements-dev.txt index 57d3e2115..734379fc3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ # Testing # ------------------------------------------------------------------------------ pluggy -gcp-storage-emulator>=2021.3.28 +gcp-storage-emulator>=2021.5.5 tox>=3.23.0 # Code quality diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index f1e8ed902..e5020b6f8 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -1,231 +1,253 @@ from datetime import date, datetime, time +from unittest.mock import Mock from octue import exceptions from octue.mixins.filterable import Filterable -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet +from octue.resources.tag import TagDict from tests.base import BaseTestCase class FilterableSubclass(Filterable): - def __init__(self, name=None, is_alive=None, iterable=None, age=None, owner=None, timestamp=None): - self.name = name - self.is_alive = is_alive - self.iterable = iterable - self.age = age - self.owner = owner - self.timestamp = timestamp + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) class TestFilterable(BaseTestCase): def test_error_raised_when_invalid_filter_name_received(self): """ Ensure an error is raised when an invalid filter name is provided. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass().satisfies(filter_name="invalid_filter_name", filter_value=None) + FilterableSubclass().satisfies(invalid_filter_name=None) def test_error_raised_when_non_existent_attribute_name_received(self): """ Ensure an error is raised when a non-existent attribute name is used in the filter name. """ with self.assertRaises(AttributeError): - FilterableSubclass().satisfies(filter_name="boogaloo__is_a_dance", filter_value=True) + FilterableSubclass().satisfies(boogaloo__is_a_dance=True) def test_error_raised_when_valid_but_non_existent_filter_name_received(self): """ Ensure an error is raised when a valid but non-existent filter name is received. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass().satisfies(filter_name="age__is_secret", filter_value=True) + FilterableSubclass(age=23).satisfies(age__is_secret=True) def test_error_raised_when_attribute_type_has_no_filters_defined(self): """Ensure an error is raised when a filter for an attribute whose type doesn't have any filters defined is received. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass(age=lambda: None).satisfies(filter_name="age__equals", filter_value=True) + FilterableSubclass(age=lambda: None).satisfies(age__equals=True) + + def test_error_raised_if_more_than_one_filter_is_provided(self): + """Test that an error is raised if more than one filter is provided to the satisfies method.""" + with self.assertRaises(ValueError): + FilterableSubclass(age=23).satisfies(age__equals=23, age__equals__gt=20) def test_bool_filters(self): """ Test that the boolean filters work as expected. """ filterable_thing = FilterableSubclass(is_alive=True) - self.assertTrue(filterable_thing.satisfies("is_alive__is", True)) - self.assertFalse(filterable_thing.satisfies("is_alive__is", False)) - self.assertTrue(filterable_thing.satisfies("is_alive__is_not", False)) - self.assertFalse(filterable_thing.satisfies("is_alive__is_not", True)) + self.assertTrue(filterable_thing.satisfies(is_alive__is=True)) + self.assertFalse(filterable_thing.satisfies(is_alive__is=False)) + self.assertTrue(filterable_thing.satisfies(is_alive__is_not=False)) + self.assertFalse(filterable_thing.satisfies(is_alive__is_not=True)) def test_str_filters(self): """ Test that the string filters work as expected. """ filterable_thing = FilterableSubclass(name="Michael") - self.assertTrue(filterable_thing.satisfies("name__icontains", "m")) - self.assertFalse(filterable_thing.satisfies("name__icontains", "d")) - self.assertTrue(filterable_thing.satisfies("name__not_icontains", "d")) - self.assertFalse(filterable_thing.satisfies("name__not_icontains", "m")) - self.assertTrue(filterable_thing.satisfies("name__contains", "M")) - self.assertFalse(filterable_thing.satisfies("name__contains", "d")) - self.assertTrue(filterable_thing.satisfies("name__ends_with", "l")) - self.assertFalse(filterable_thing.satisfies("name__ends_with", "M")) - self.assertTrue(filterable_thing.satisfies("name__not_ends_with", "M")) - self.assertFalse(filterable_thing.satisfies("name__not_ends_with", "l")) - self.assertTrue(filterable_thing.satisfies("name__starts_with", "M")) - self.assertFalse(filterable_thing.satisfies("name__starts_with", "l")) - self.assertTrue(filterable_thing.satisfies("name__not_starts_with", "l")) - self.assertFalse(filterable_thing.satisfies("name__not_starts_with", "M")) - self.assertTrue(filterable_thing.satisfies("name__equals", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__equals", "Clive")) - self.assertTrue(filterable_thing.satisfies("name__not_equals", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__not_equals", "Michael")) - self.assertTrue(filterable_thing.satisfies("name__iequals", "michael")) - self.assertFalse(filterable_thing.satisfies("name__iequals", "James")) - self.assertTrue(filterable_thing.satisfies("name__not_iequals", "James")) - self.assertFalse(filterable_thing.satisfies("name__not_iequals", "michael")) - self.assertTrue(filterable_thing.satisfies("name__is", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__is", "Clive")) - self.assertTrue(filterable_thing.satisfies("name__is_not", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__is_not", "Michael")) - self.assertTrue(filterable_thing.satisfies("name__lt", "Noel")) - self.assertFalse(filterable_thing.satisfies("name__lt", "Harry")) - self.assertTrue(filterable_thing.satisfies("name__lte", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__lte", "Harry")) - self.assertTrue(filterable_thing.satisfies("name__gt", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__gt", "Noel")) - self.assertTrue(filterable_thing.satisfies("name__gte", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__gte", "Noel")) - self.assertTrue(filterable_thing.satisfies("name__in_range", ("Amy", "Zoe"))) - self.assertFalse(filterable_thing.satisfies("name__in_range", ("Noel", "Peter"))) - self.assertTrue(filterable_thing.satisfies("name__not_in_range", ("Noel", "Peter"))) - self.assertFalse(filterable_thing.satisfies("name__not_in_range", ("Amy", "Zoe"))) + self.assertTrue(filterable_thing.satisfies(name__icontains="m")) + self.assertFalse(filterable_thing.satisfies(name__icontains="d")) + self.assertTrue(filterable_thing.satisfies(name__not_icontains="d")) + self.assertFalse(filterable_thing.satisfies(name__not_icontains="m")) + self.assertTrue(filterable_thing.satisfies(name__contains="M")) + self.assertFalse(filterable_thing.satisfies(name__contains="d")) + self.assertTrue(filterable_thing.satisfies(name__ends_with="l")) + self.assertFalse(filterable_thing.satisfies(name__ends_with="M")) + self.assertTrue(filterable_thing.satisfies(name__not_ends_with="M")) + self.assertFalse(filterable_thing.satisfies(name__not_ends_with="l")) + self.assertTrue(filterable_thing.satisfies(name__starts_with="M")) + self.assertFalse(filterable_thing.satisfies(name__starts_with="l")) + self.assertTrue(filterable_thing.satisfies(name__not_starts_with="l")) + self.assertFalse(filterable_thing.satisfies(name__not_starts_with="M")) + self.assertTrue(filterable_thing.satisfies(name__equals="Michael")) + self.assertFalse(filterable_thing.satisfies(name__equals="Clive")) + self.assertTrue(filterable_thing.satisfies(name__not_equals="Clive")) + self.assertFalse(filterable_thing.satisfies(name__not_equals="Michael")) + self.assertTrue(filterable_thing.satisfies(name__iequals="michael")) + self.assertFalse(filterable_thing.satisfies(name__iequals="James")) + self.assertTrue(filterable_thing.satisfies(name__not_iequals="James")) + self.assertFalse(filterable_thing.satisfies(name__not_iequals="michael")) + self.assertTrue(filterable_thing.satisfies(name__is="Michael")) + self.assertFalse(filterable_thing.satisfies(name__is="Clive")) + self.assertTrue(filterable_thing.satisfies(name__is_not="Clive")) + self.assertFalse(filterable_thing.satisfies(name__is_not="Michael")) + self.assertTrue(filterable_thing.satisfies(name__lt="Noel")) + self.assertFalse(filterable_thing.satisfies(name__lt="Harry")) + self.assertTrue(filterable_thing.satisfies(name__lte="Michael")) + self.assertFalse(filterable_thing.satisfies(name__lte="Harry")) + self.assertTrue(filterable_thing.satisfies(name__gt="Clive")) + self.assertFalse(filterable_thing.satisfies(name__gt="Noel")) + self.assertTrue(filterable_thing.satisfies(name__gte="Michael")) + self.assertFalse(filterable_thing.satisfies(name__gte="Noel")) + self.assertTrue(filterable_thing.satisfies(name__in_range=("Amy", "Zoe"))) + self.assertFalse(filterable_thing.satisfies(name__in_range=("Noel", "Peter"))) + self.assertTrue(filterable_thing.satisfies(name__not_in_range=("Noel", "Peter"))) + self.assertFalse(filterable_thing.satisfies(name__not_in_range=("Amy", "Zoe"))) def test_none_filters(self): """ Test that the None filters work as expected. """ filterable_thing = FilterableSubclass(owner=None) - self.assertTrue(filterable_thing.satisfies("owner__is", None)) - self.assertFalse(filterable_thing.satisfies("owner__is", True)) - self.assertTrue(filterable_thing.satisfies("owner__is_not", True)) - self.assertFalse(filterable_thing.satisfies("owner__is_not", None)) + self.assertTrue(filterable_thing.satisfies(owner__is=None)) + self.assertFalse(filterable_thing.satisfies(owner__is=True)) + self.assertTrue(filterable_thing.satisfies(owner__is_not=True)) + self.assertFalse(filterable_thing.satisfies(owner__is_not=None)) def test_number_filters_with_integers_and_floats(self): """ Test that the number filters work as expected for integers and floats. """ for age in (5, 5.2): filterable_thing = FilterableSubclass(age=age) - self.assertTrue(filterable_thing.satisfies("age__equals", age)) - self.assertFalse(filterable_thing.satisfies("age__equals", 63)) - self.assertTrue(filterable_thing.satisfies("age__not_equals", 63)) - self.assertFalse(filterable_thing.satisfies("age__not_equals", age)) - self.assertTrue(filterable_thing.satisfies("age__lt", 6)) - self.assertFalse(filterable_thing.satisfies("age__lt", 0)) - self.assertTrue(filterable_thing.satisfies("age__lte", age)) - self.assertFalse(filterable_thing.satisfies("age__lte", 0)) - self.assertTrue(filterable_thing.satisfies("age__gt", 4)) - self.assertFalse(filterable_thing.satisfies("age__gt", 63)) - self.assertTrue(filterable_thing.satisfies("age__gte", age)) - self.assertFalse(filterable_thing.satisfies("age__gte", 63)) - self.assertTrue(filterable_thing.satisfies("age__is", age)) - self.assertFalse(filterable_thing.satisfies("age__is", 63)) - self.assertTrue(filterable_thing.satisfies("age__is_not", 63)) - self.assertFalse(filterable_thing.satisfies("age__is_not", age)) - self.assertTrue(filterable_thing.satisfies("age__in_range", (0, 10))) - self.assertFalse(filterable_thing.satisfies("age__in_range", (0, 3))) - self.assertTrue(filterable_thing.satisfies("age__not_in_range", (0, 3))) - self.assertFalse(filterable_thing.satisfies("age__not_in_range", (0, 10))) + self.assertTrue(filterable_thing.satisfies(age__equals=age)) + self.assertFalse(filterable_thing.satisfies(age__equals=63)) + self.assertTrue(filterable_thing.satisfies(age__not_equals=63)) + self.assertFalse(filterable_thing.satisfies(age__not_equals=age)) + self.assertTrue(filterable_thing.satisfies(age__lt=6)) + self.assertFalse(filterable_thing.satisfies(age__lt=0)) + self.assertTrue(filterable_thing.satisfies(age__lte=age)) + self.assertFalse(filterable_thing.satisfies(age__lte=0)) + self.assertTrue(filterable_thing.satisfies(age__gt=4)) + self.assertFalse(filterable_thing.satisfies(age__gt=63)) + self.assertTrue(filterable_thing.satisfies(age__gte=age)) + self.assertFalse(filterable_thing.satisfies(age__gte=63)) + self.assertTrue(filterable_thing.satisfies(age__is=age)) + self.assertFalse(filterable_thing.satisfies(age__is=63)) + self.assertTrue(filterable_thing.satisfies(age__is_not=63)) + self.assertFalse(filterable_thing.satisfies(age__is_not=age)) + self.assertTrue(filterable_thing.satisfies(age__in_range=(0, 10))) + self.assertFalse(filterable_thing.satisfies(age__in_range=(0, 3))) + self.assertTrue(filterable_thing.satisfies(age__not_in_range=(0, 3))) + self.assertFalse(filterable_thing.satisfies(age__not_in_range=(0, 10))) def test_iterable_filters(self): """ Test that the iterable filters work as expected with lists, sets, and tuples. """ for iterable in ([1, 2, 3], {1, 2, 3}, (1, 2, 3)): filterable_thing = FilterableSubclass(iterable=iterable) - self.assertTrue(filterable_thing.satisfies("iterable__contains", 1)) - self.assertFalse(filterable_thing.satisfies("iterable__contains", 5)) - self.assertTrue(filterable_thing.satisfies("iterable__not_contains", 5)) - self.assertFalse(filterable_thing.satisfies("iterable__not_contains", 1)) - self.assertTrue(filterable_thing.satisfies("iterable__is", iterable)) - self.assertFalse(filterable_thing.satisfies("iterable__is", None)) - self.assertTrue(filterable_thing.satisfies("iterable__is_not", None)) - self.assertFalse(filterable_thing.satisfies("iterable__is_not", iterable)) + self.assertTrue(filterable_thing.satisfies(iterable__contains=1)) + self.assertFalse(filterable_thing.satisfies(iterable__contains=5)) + self.assertTrue(filterable_thing.satisfies(iterable__not_contains=5)) + self.assertFalse(filterable_thing.satisfies(iterable__not_contains=1)) + self.assertTrue(filterable_thing.satisfies(iterable__is=iterable)) + self.assertFalse(filterable_thing.satisfies(iterable__is=None)) + self.assertTrue(filterable_thing.satisfies(iterable__is_not=None)) + self.assertFalse(filterable_thing.satisfies(iterable__is_not=iterable)) + + def test_label_set_filters(self): + """ Test the filters for Labelset. """ + filterable_thing = FilterableSubclass(iterable=LabelSet({"fred", "charlie"})) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_contains="a")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_contains="z")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_contains="z")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_contains="a")) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_starts_with="f")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_starts_with="e")) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_ends_with="e")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_ends_with="i")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_starts_with="e")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_starts_with="f")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_ends_with="i")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_ends_with="e")) def test_datetime_filters(self): my_datetime = datetime(2000, 1, 1) filterable_thing = FilterableSubclass(timestamp=my_datetime) - self.assertTrue(filterable_thing.satisfies("timestamp__equals", my_datetime)) - self.assertFalse(filterable_thing.satisfies("timestamp__equals", datetime(2, 2, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__not_equals", datetime(2, 2, 2))) - self.assertFalse(filterable_thing.satisfies("timestamp__not_equals", my_datetime)) - self.assertTrue(filterable_thing.satisfies("timestamp__is", my_datetime)) - self.assertFalse(filterable_thing.satisfies("timestamp__is", datetime(2, 2, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__is_not", datetime(2, 2, 2))) - self.assertFalse(filterable_thing.satisfies("timestamp__is_not", my_datetime)) - self.assertTrue(filterable_thing.satisfies("timestamp__gt", datetime(1900, 1, 2))) - self.assertFalse(filterable_thing.satisfies("timestamp__gt", datetime(3000, 1, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__gte", my_datetime)) - self.assertFalse(filterable_thing.satisfies("timestamp__gte", datetime(3000, 1, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__lt", datetime(3000, 1, 2))) - self.assertFalse(filterable_thing.satisfies("timestamp__lt", datetime(1990, 1, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__lte", my_datetime)) - self.assertFalse(filterable_thing.satisfies("timestamp__lte", datetime(1900, 1, 2))) - self.assertTrue(filterable_thing.satisfies("timestamp__in_range", (datetime(1900, 1, 2), datetime(3000, 1, 2)))) - self.assertFalse( - filterable_thing.satisfies("timestamp__in_range", (datetime(2100, 1, 2), datetime(3000, 1, 2))) - ) + self.assertTrue(filterable_thing.satisfies(timestamp__equals=my_datetime)) + self.assertFalse(filterable_thing.satisfies(timestamp__equals=datetime(2, 2, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__not_equals=datetime(2, 2, 2))) + self.assertFalse(filterable_thing.satisfies(timestamp__not_equals=my_datetime)) + self.assertTrue(filterable_thing.satisfies(timestamp__is=my_datetime)) + self.assertFalse(filterable_thing.satisfies(timestamp__is=datetime(2, 2, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__is_not=datetime(2, 2, 2))) + self.assertFalse(filterable_thing.satisfies(timestamp__is_not=my_datetime)) + self.assertTrue(filterable_thing.satisfies(timestamp__gt=datetime(1900, 1, 2))) + self.assertFalse(filterable_thing.satisfies(timestamp__gt=datetime(3000, 1, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__gte=my_datetime)) + self.assertFalse(filterable_thing.satisfies(timestamp__gte=datetime(3000, 1, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__lt=datetime(3000, 1, 2))) + self.assertFalse(filterable_thing.satisfies(timestamp__lt=datetime(1990, 1, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__lte=my_datetime)) + self.assertFalse(filterable_thing.satisfies(timestamp__lte=datetime(1900, 1, 2))) + self.assertTrue(filterable_thing.satisfies(timestamp__in_range=(datetime(1900, 1, 2), datetime(3000, 1, 2)))) + self.assertFalse(filterable_thing.satisfies(timestamp__in_range=(datetime(2100, 1, 2), datetime(3000, 1, 2)))) self.assertTrue( - filterable_thing.satisfies("timestamp__not_in_range", (datetime(2100, 1, 2), datetime(3000, 1, 2))) + filterable_thing.satisfies(timestamp__not_in_range=(datetime(2100, 1, 2), datetime(3000, 1, 2))) ) self.assertFalse( - filterable_thing.satisfies("timestamp__not_in_range", (datetime(1900, 1, 2), datetime(3000, 1, 2))) + filterable_thing.satisfies(timestamp__not_in_range=(datetime(1900, 1, 2), datetime(3000, 1, 2))) ) - self.assertTrue(filterable_thing.satisfies("timestamp__year_equals", 2000)) - self.assertFalse(filterable_thing.satisfies("timestamp__year_equals", 3000)) - self.assertTrue(filterable_thing.satisfies("timestamp__year_in", {2000, 3000, 4000})) - self.assertFalse(filterable_thing.satisfies("timestamp__year_in", {3000, 4000})) - self.assertTrue(filterable_thing.satisfies("timestamp__month_equals", 1)) - self.assertFalse(filterable_thing.satisfies("timestamp__month_equals", 9)) - self.assertTrue(filterable_thing.satisfies("timestamp__month_in", {1, 2, 3})) - self.assertFalse(filterable_thing.satisfies("timestamp__month_in", {2, 3})) - self.assertTrue(filterable_thing.satisfies("timestamp__day_equals", 1)) - self.assertFalse(filterable_thing.satisfies("timestamp__day_equals", 2)) - self.assertTrue(filterable_thing.satisfies("timestamp__day_in", {1, 2, 3})) - self.assertFalse(filterable_thing.satisfies("timestamp__day_in", {2, 3})) - self.assertTrue(filterable_thing.satisfies("timestamp__weekday_equals", 5)) - self.assertFalse(filterable_thing.satisfies("timestamp__weekday_equals", 3)) - self.assertTrue(filterable_thing.satisfies("timestamp__weekday_in", {5, 6, 7})) - self.assertFalse(filterable_thing.satisfies("timestamp__weekday_in", {6, 7})) - self.assertTrue(filterable_thing.satisfies("timestamp__iso_weekday_equals", 6)) - self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_equals", 4)) - self.assertTrue(filterable_thing.satisfies("timestamp__iso_weekday_in", {5, 6, 7})) - self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_in", {7, 8})) - self.assertTrue(filterable_thing.satisfies("timestamp__time_equals", time(0, 0, 0))) - self.assertFalse(filterable_thing.satisfies("timestamp__time_equals", time(1, 2, 3))) - self.assertTrue(filterable_thing.satisfies("timestamp__hour_equals", 0)) - self.assertFalse(filterable_thing.satisfies("timestamp__hour_equals", 1)) - self.assertTrue(filterable_thing.satisfies("timestamp__hour_in", {0, 1, 2})) - self.assertFalse(filterable_thing.satisfies("timestamp__hour_in", {1, 2})) - self.assertTrue(filterable_thing.satisfies("timestamp__minute_equals", 0)) - self.assertFalse(filterable_thing.satisfies("timestamp__minute_equals", 1)) - self.assertTrue(filterable_thing.satisfies("timestamp__minute_in", {0, 1, 2})) - self.assertFalse(filterable_thing.satisfies("timestamp__minute_in", {1, 2})) - self.assertTrue(filterable_thing.satisfies("timestamp__second_equals", 0)) - self.assertFalse(filterable_thing.satisfies("timestamp__second_equals", 1)) - self.assertTrue(filterable_thing.satisfies("timestamp__second_in", {0, 1, 2})) - self.assertFalse(filterable_thing.satisfies("timestamp__second_in", {1, 2})) - self.assertTrue(filterable_thing.satisfies("timestamp__in_date_range", (date(1000, 1, 4), date(3000, 7, 10)))) - self.assertFalse(filterable_thing.satisfies("timestamp__in_date_range", (date(2000, 1, 4), date(3000, 7, 10)))) - self.assertTrue(filterable_thing.satisfies("timestamp__in_time_range", (time(0, 0, 0), time(13, 2, 22)))) - self.assertFalse(filterable_thing.satisfies("timestamp__in_time_range", (time(0, 0, 1), time(13, 2, 22)))) - - def test_tag_set_filters(self): - """ Test the filters for TagSet. """ - filterable_thing = FilterableSubclass(iterable=TagSet({"fred", "charlie"})) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_contains", "a")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_contains", "z")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_contains", "z")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_contains", "a")) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_starts_with", "f")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_starts_with", "e")) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_ends_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_ends_with", "i")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_starts_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_starts_with", "f")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_ends_with", "i")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_ends_with", "e")) + self.assertTrue(filterable_thing.satisfies(timestamp__year_equals=2000)) + self.assertFalse(filterable_thing.satisfies(timestamp__year_equals=3000)) + self.assertTrue(filterable_thing.satisfies(timestamp__year_in={2000, 3000, 4000})) + self.assertFalse(filterable_thing.satisfies(timestamp__year_in={3000, 4000})) + self.assertTrue(filterable_thing.satisfies(timestamp__month_equals=1)) + self.assertFalse(filterable_thing.satisfies(timestamp__month_equals=9)) + self.assertTrue(filterable_thing.satisfies(timestamp__month_in={1, 2, 3})) + self.assertFalse(filterable_thing.satisfies(timestamp__month_in={2, 3})) + self.assertTrue(filterable_thing.satisfies(timestamp__day_equals=1)) + self.assertFalse(filterable_thing.satisfies(timestamp__day_equals=2)) + self.assertTrue(filterable_thing.satisfies(timestamp__day_in={1, 2, 3})) + self.assertFalse(filterable_thing.satisfies(timestamp__day_in={2, 3})) + self.assertTrue(filterable_thing.satisfies(timestamp__weekday_equals=5)) + self.assertFalse(filterable_thing.satisfies(timestamp__weekday_equals=3)) + self.assertTrue(filterable_thing.satisfies(timestamp__weekday_in={5, 6, 7})) + self.assertFalse(filterable_thing.satisfies(timestamp__weekday_in={6, 7})) + self.assertTrue(filterable_thing.satisfies(timestamp__iso_weekday_equals=6)) + self.assertFalse(filterable_thing.satisfies(timestamp__iso_weekday_equals=4)) + self.assertTrue(filterable_thing.satisfies(timestamp__iso_weekday_in={5, 6, 7})) + self.assertFalse(filterable_thing.satisfies(timestamp__iso_weekday_in={7, 8})) + self.assertTrue(filterable_thing.satisfies(timestamp__time_equals=time(0, 0, 0))) + self.assertFalse(filterable_thing.satisfies(timestamp__time_equals=time(1, 2, 3))) + self.assertTrue(filterable_thing.satisfies(timestamp__hour_equals=0)) + self.assertFalse(filterable_thing.satisfies(timestamp__hour_equals=1)) + self.assertTrue(filterable_thing.satisfies(timestamp__hour_in={0, 1, 2})) + self.assertFalse(filterable_thing.satisfies(timestamp__hour_in={1, 2})) + self.assertTrue(filterable_thing.satisfies(timestamp__minute_equals=0)) + self.assertFalse(filterable_thing.satisfies(timestamp__minute_equals=1)) + self.assertTrue(filterable_thing.satisfies(timestamp__minute_in={0, 1, 2})) + self.assertFalse(filterable_thing.satisfies(timestamp__minute_in={1, 2})) + self.assertTrue(filterable_thing.satisfies(timestamp__second_equals=0)) + self.assertFalse(filterable_thing.satisfies(timestamp__second_equals=1)) + self.assertTrue(filterable_thing.satisfies(timestamp__second_in={0, 1, 2})) + self.assertFalse(filterable_thing.satisfies(timestamp__second_in={1, 2})) + self.assertTrue(filterable_thing.satisfies(timestamp__in_date_range=(date(1000, 1, 4), date(3000, 7, 10)))) + self.assertFalse(filterable_thing.satisfies(timestamp__in_date_range=(date(2000, 1, 4), date(3000, 7, 10)))) + self.assertTrue(filterable_thing.satisfies(timestamp__in_time_range=(time(0, 0, 0), time(13, 2, 22)))) + self.assertFalse(filterable_thing.satisfies(timestamp__in_time_range=(time(0, 0, 1), time(13, 2, 22)))) def test_filtering_different_attributes_on_same_instance(self): """ Ensure all filterable attributes on an instance can be checked for filter satisfaction. """ filterable_thing = FilterableSubclass(name="Fred", is_alive=True, iterable={1, 2, 3}, age=5.2, owner=None) - self.assertTrue(filterable_thing.satisfies("name__icontains", "f")) - self.assertTrue(filterable_thing.satisfies("name__not_icontains", "j")) - self.assertFalse(filterable_thing.satisfies("is_alive__is", False)) - self.assertTrue(filterable_thing.satisfies("iterable__contains", 3)) - self.assertTrue(filterable_thing.satisfies("age__equals", 5.2)) - self.assertTrue(filterable_thing.satisfies("age__not_equals", 5)) - self.assertTrue(filterable_thing.satisfies("owner__is", None)) + self.assertTrue(filterable_thing.satisfies(name__icontains="f")) + self.assertTrue(filterable_thing.satisfies(name__not_icontains="j")) + self.assertFalse(filterable_thing.satisfies(is_alive__is=False)) + self.assertTrue(filterable_thing.satisfies(iterable__contains=3)) + self.assertTrue(filterable_thing.satisfies(age__equals=5.2)) + self.assertTrue(filterable_thing.satisfies(age__not_equals=5)) + self.assertTrue(filterable_thing.satisfies(owner__is=None)) + + def test_filtering_with_nested_attributes(self): + """Test that Filterable subclasses can be checked for satisfaction of a filter of nested attributes.""" + inner_mock = Mock(b=3) + outer_mock = Mock(a=inner_mock) + filterable_thing = FilterableSubclass(name=outer_mock) + self.assertTrue(filterable_thing.satisfies(name__a__b__equals=3)) + + def test_filtering_with_nested_attributes_ending_in_dictionary_key(self): + """Test that Filterable subclasses can be checked for satisfaction of a filter of nested attributes that ends + with a dictionary key. + """ + filterable_thing = FilterableSubclass(name={"first": "Joe", "last": "Bloggs"}) + self.assertTrue(filterable_thing.satisfies(name__first__equals="Joe")) + self.assertTrue(filterable_thing.satisfies(name__last__equals="Bloggs")) + + def test_tag_dict_filters(self): + """Test some filters that apply to a TagDict. These should behave just the same as for a dictionary.""" + filterable_thing = FilterableSubclass(tags=TagDict({"first": "Joe", "middle": "Horatio", "last": "Bloggs"})) + self.assertTrue(filterable_thing.satisfies(tags__last__lt="Kevin")) + self.assertFalse(filterable_thing.satisfies(tags__middle__is="Boratio")) diff --git a/tests/mixins/test_labellable.py b/tests/mixins/test_labellable.py new file mode 100644 index 000000000..0a6260051 --- /dev/null +++ b/tests/mixins/test_labellable.py @@ -0,0 +1,81 @@ +from octue import exceptions +from octue.mixins import Labelable +from octue.resources.label import Label, LabelSet +from ..base import BaseTestCase + + +class MyLabelable(Labelable): + pass + + +class LabelableTestCase(BaseTestCase): + def test_instantiates(self): + """Ensures the class instantiates without arguments""" + Labelable() + + def test_instantiates_with_labels(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable = MyLabelable(labels="") + self.assertEqual(len(labelable.labels), 0) + + labelable = MyLabelable(labels=None) + self.assertEqual(len(labelable.labels), 0) + + labelable = MyLabelable(labels="a b c") + self.assertEqual(labelable.labels, {Label("a"), Label("b"), Label("c")}) + + def test_instantiates_with_label_set(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable_1 = MyLabelable(labels="") + self.assertIsInstance(labelable_1.labels, LabelSet) + labelable_2 = MyLabelable(labels=labelable_1.labels) + self.assertFalse(labelable_1 is labelable_2) + + def test_fails_to_instantiates_with_non_iterable(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + + class NoIter: + pass + + with self.assertRaises(exceptions.InvalidLabelException) as error: + MyLabelable(labels=NoIter()) + + self.assertIn( + "Labels must be expressed as a whitespace-delimited string or an iterable of strings", + error.exception.args[0], + ) + + def test_reset_labels(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable = MyLabelable(labels="a b") + labelable.labels = "b c" + self.assertEqual(labelable.labels, {Label("b"), Label("c")}) + + def test_valid_labels(self): + """Ensures valid labels do not raise an error""" + labelable = MyLabelable() + labelable.add_labels("a-valid-label") + labelable.add_labels("label") + labelable.add_labels("a1829tag") + labelable.add_labels("1829") + self.assertEqual( + labelable.labels, + { + Label("a-valid-label"), + Label("label"), + Label("a1829tag"), + Label("1829"), + }, + ) + + def test_mixture_valid_invalid(self): + """Ensures that adding a variety of labels, some of which are invalid, doesn't partially add them to the object""" + labelable = MyLabelable() + labelable.add_labels("first-valid-should-be-added") + try: + labelable.add_labels("second-valid-should-not-be-added-because", "-the-third-is-invalid:") + + except exceptions.InvalidLabelException: + pass + + self.assertEqual({Label("first-valid-should-be-added")}, labelable.labels) diff --git a/tests/mixins/test_serialisable.py b/tests/mixins/test_serialisable.py index a8ed723c9..96d29ec89 100644 --- a/tests/mixins/test_serialisable.py +++ b/tests/mixins/test_serialisable.py @@ -25,13 +25,15 @@ def test_instantiates_with_no_args(self): """Ensures the class instantiates without arguments""" Serialisable() - def test_raises_attribute_error_with_missing_logger(self): - """Ensures class instantiates with a string uuid""" - resource = Serialisable() - with self.assertRaises(AttributeError) as error: - resource.serialise() + def test_logger_attribute_excluded_even_when_not_explicitly_excluded(self): + """Test that a Serialisable with "logger" not explicitly included in the `_EXCLUDE_SERIALISE_FIELDS` class + variable still excludes the `logger` attribute. + """ - self.assertIn("'Serialisable' object has no attribute 'logger'", error.exception.args[0]) + class SerialisableWithLoggerNotExplicitlyExcluded(Serialisable): + _EXCLUDE_SERIALISE_FIELDS = [] + + self.assertTrue("logger" in SerialisableWithLoggerNotExplicitlyExcluded()._EXCLUDE_SERIALISE_FIELDS) def test_returns_primitive_without_logger_or_protected_fields(self): """Ensures class instantiates with a UUID()""" diff --git a/tests/mixins/test_taggable.py b/tests/mixins/test_taggable.py index 597b17757..48b23f4a7 100644 --- a/tests/mixins/test_taggable.py +++ b/tests/mixins/test_taggable.py @@ -1,86 +1,78 @@ from octue import exceptions -from octue.mixins import MixinBase, Taggable -from octue.resources.tag import Tag, TagSet +from octue.mixins import Taggable +from octue.resources.tag import TagDict from ..base import BaseTestCase -class MyTaggable(Taggable, MixinBase): +class MyTaggable(Taggable): pass class TaggableTestCase(BaseTestCase): def test_instantiates(self): - """Ensures the class instantiates without arguments""" - Taggable() + """Ensures the class instantiates without arguments.""" + taggable = Taggable() + self.assertEqual(taggable.tags, {}) - def test_instantiates_with_tags(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable = MyTaggable(tags="") - self.assertEqual(len(taggable.tags), 0) - - taggable = MyTaggable(tags=None) - self.assertEqual(len(taggable.tags), 0) - - taggable = MyTaggable(tags="a b c") - self.assertEqual(set(taggable.tags), {Tag("a"), Tag("b"), Tag("c")}) - - def test_instantiates_with_tag_set(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable_1 = MyTaggable(tags="") - self.assertIsInstance(taggable_1.tags, TagSet) - taggable_2 = MyTaggable(tags=taggable_1.tags) - self.assertFalse(taggable_1 is taggable_2) + def test_instantiating_with_no_tags(self): + """Test that instantiating a Taggable with no tags results in an empty TagDict on the tags attribute.""" + self.assertEqual(MyTaggable().tags, TagDict()) def test_fails_to_instantiates_with_non_iterable(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" + """Test that instantiation with a non-iterable fails.""" class NoIter: pass - with self.assertRaises(exceptions.InvalidTagException) as error: + with self.assertRaises(TypeError): MyTaggable(tags=NoIter()) - self.assertIn( - "Tags must be expressed as a whitespace-delimited string or an iterable of strings", error.exception.args[0] - ) + def test_instantiates_with_dict(self): + """Test instantiation with a dictionary works.""" + tags = {"height": 9, "width": 8.7, "depth": 100} + taggable = MyTaggable(tags=tags) + self.assertEqual(taggable.tags, tags) + + def test_instantiates_with_tag_dict(self): + """Test instantiation with a TagDict.""" + taggable_1 = MyTaggable(tags={"a": 2}) + self.assertIsInstance(taggable_1.tags, TagDict) + taggable_2 = MyTaggable(tags=taggable_1.tags) + self.assertFalse(taggable_1 is taggable_2) - def test_reset_tags(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable = MyTaggable(tags="a b") - taggable.tags = "b c" - self.assertEqual(set(taggable.tags), {Tag("b"), Tag("c")}) + def test_setting_tags_overwrites_previous_tags(self): + """Ensure tags can be overwritten with new ones.""" + taggable = MyTaggable(tags={"a": 1, "b": 2}) + taggable.tags = {"c": 3, "d": 4} + self.assertEqual(taggable.tags, {"c": 3, "d": 4}) - def test_valid_tags(self): - """Ensures valid tags do not raise an error""" + def test_add_valid_tags(self): + """Ensures adding valid tags works.""" taggable = MyTaggable() - taggable.add_tags("a-valid-tag") - taggable.add_tags("a:tag") - taggable.add_tags("a:-tag") # <--- yes, this is valid deliberately as it allows people to do negation - taggable.add_tags("a1829tag") - taggable.add_tags("1829") - taggable.add_tags("number:1829") - taggable.add_tags("multiple:discriminators:used") + + taggable.add_tags({"a_valid_tag": "blah"}) + taggable.add_tags({"a1829tag": "blah"}) + taggable.add_tags({"1829": "blah", "number_1829": "blah"}) # Add multiple tags at once. + self.assertEqual( - set(taggable.tags), - { - Tag("a-valid-tag"), - Tag("a:tag"), - Tag("a:-tag"), - Tag("a1829tag"), - Tag("1829"), - Tag("number:1829"), - Tag("multiple:discriminators:used"), - }, + taggable.tags, + {"a_valid_tag": "blah", "a1829tag": "blah", "1829": "blah", "number_1829": "blah"}, ) - def test_mixture_valid_invalid(self): - """Ensures that adding a variety of tags, some of which are invalid, doesn't partially add them to the object""" + def test_add_tags_via_kwargs(self): + """Test tags can be added via kwargs.""" taggable = MyTaggable() - taggable.add_tags("first-valid-should-be-added") - try: - taggable.add_tags("second-valid-should-not-be-added-because", "-the-third-is-invalid:") + taggable.add_tags(hello="blib", hi="glib") + self.assertEqual(taggable.tags, {"hello": "blib", "hi": "glib"}) - except exceptions.InvalidTagException: - pass + def test_adding_mixture_of_valid_and_invalid_tags_fails_completely(self): + """Ensure that adding a variety of tags, some of which are invalid, doesn't partially add the set including the + invalid tags to the object. + """ + taggable = MyTaggable() + taggable.add_tags({"first_valid_should_be_added": "hello"}) + + with self.assertRaises(exceptions.InvalidTagException): + taggable.add_tags({"second_valid_should_not_be_added_because": 1, "_the_third_is_invalid:": 2}) - self.assertEqual({Tag("first-valid-should-be-added")}, set(taggable.tags)) + self.assertEqual(taggable.tags, {"first_valid_should_be_added": "hello"}) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 8426f156f..8ea8d135a 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -10,7 +10,8 @@ from octue.cloud.storage import GoogleCloudStorageClient from octue.mixins import MixinBase, Pathable from octue.resources.datafile import TEMPORARY_LOCAL_FILE_CACHE, Datafile -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet +from octue.resources.tag import TagDict from tests import TEST_BUCKET_NAME, TEST_PROJECT_NAME from ..base import BaseTestCase @@ -50,8 +51,7 @@ def create_datafile_in_cloud( with tempfile.NamedTemporaryFile("w", delete=False) as temporary_file: temporary_file.write(contents) - timestamp = kwargs.pop("timestamp", None) - datafile = Datafile(path=temporary_file.name, timestamp=timestamp, **kwargs) + datafile = Datafile(path=temporary_file.name, **kwargs) datafile.to_cloud(project_name=project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) return datafile, project_name, bucket_name, path_in_bucket, contents @@ -151,6 +151,7 @@ def test_serialisable(self): "timestamp", "sequence", "tags", + "labels", "_cloud_metadata", } @@ -190,7 +191,8 @@ def test_from_cloud_with_bare_file(self): self.assertEqual(datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}") self.assertEqual(datafile.cluster, 0) self.assertEqual(datafile.sequence, None) - self.assertEqual(datafile.tags, TagSet()) + self.assertEqual(datafile.tags, TagDict()) + self.assertEqual(datafile.labels, LabelSet()) self.assertTrue(isinstance(datafile.size_bytes, int)) self.assertTrue(isinstance(datafile._last_modified, float)) self.assertTrue(isinstance(datafile.hash_value, str)) @@ -201,7 +203,8 @@ def test_from_cloud_with_datafile(self): timestamp=datetime.now(tz=timezone.utc), cluster=0, sequence=1, - tags={"blah:shah:nah", "blib", "glib"}, + labels={"blah-shah-nah", "blib", "glib"}, + tags={"good": True, "how_good": "very"}, ) gs_path = f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}" downloaded_datafile = Datafile.from_cloud(project_name, cloud_path=gs_path) @@ -213,6 +216,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) self.assertEqual(downloaded_datafile.tags, datafile.tags) + self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -513,14 +517,14 @@ def test_from_datafile_as_context_manager(self): datafile, f, ): - datafile.add_tags("blue") + datafile.add_labels("blue") f.write(new_contents) # Check that the cloud metadata has been updated. re_downloaded_datafile = Datafile.from_cloud( project_name, bucket_name=bucket_name, datafile_path=path_in_bucket ) - self.assertTrue("blue" in re_downloaded_datafile.tags) + self.assertTrue("blue" in re_downloaded_datafile.labels) # The file cache must be cleared so the modified cloud file is downloaded. re_downloaded_datafile.clear_from_file_cache() @@ -535,9 +539,18 @@ def test_metadata(self): self.assertEqual( datafile.metadata().keys(), - {"octue__id", "octue__timestamp", "octue__cluster", "octue__sequence", "octue__tags"}, + { + "octue__id", + "octue__timestamp", + "octue__cluster", + "octue__sequence", + "octue__tags", + "octue__labels", + "octue__sdk_version", + }, ) self.assertEqual( - datafile.metadata(use_octue_namespace=False).keys(), {"id", "timestamp", "cluster", "sequence", "tags"} + datafile.metadata(use_octue_namespace=False).keys(), + {"id", "timestamp", "cluster", "sequence", "tags", "labels", "sdk_version"}, ) diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index c7fffdabf..6d153f53f 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -21,7 +21,7 @@ def test_instantiates_with_no_args(self): def test_instantiates_with_kwargs(self): """Ensures that keyword arguments can be used to construct the dataset initially""" files = [Datafile(path="path-within-dataset/a_test_file.csv")] - resource = Dataset(files=files, tags="one two") + resource = Dataset(files=files, labels="one two") self.assertEqual(len(resource.files), 1) def test_len(self): @@ -55,7 +55,7 @@ def test_add_single_file_to_empty_dataset(self): def test_add_single_file_to_existing_dataset(self): """Ensures that when a dataset is not empty, it can be added to""" files = [Datafile(path="path-within-dataset/a_test_file.csv")] - resource = Dataset(files=files, tags="one two") + resource = Dataset(files=files, labels="one two", tags={"a": "b"}) resource.add(Datafile(path="path-within-dataset/a_test_file.csv")) self.assertEqual(len(resource.files), 2) @@ -88,7 +88,7 @@ class NotADatafile: self.assertIn("must be of class Datafile to add it to a Dataset", e.exception.args[0]) def test_filter_catches_single_underscore_mistake(self): - """Ensures that if the field name is a single underscore, that gets caught as an error""" + """Ensure that if the filter name contains only single underscores, an error is raised.""" resource = Dataset( files=[ Datafile(path="path-within-dataset/A_Test_file.csv"), @@ -97,13 +97,9 @@ def test_filter_catches_single_underscore_mistake(self): ) with self.assertRaises(exceptions.InvalidInputException) as e: - resource.files.filter("name_icontains", filter_value="Test") + resource.files.filter(name_icontains="Test") - self.assertIn( - "Invalid filter name 'name_icontains'. Filter names should be in the form " - "'__'.", - e.exception.args[0], - ) + self.assertIn("Invalid filter name 'name_icontains'. Filter names should be in the form", e.exception.args[0]) def test_filter_name_contains(self): """Ensures that filter works with the name_contains and name_icontains lookups""" @@ -113,15 +109,15 @@ def test_filter_name_contains(self): Datafile(path="path-within-dataset/a_test_file.txt"), ] ) - files = resource.files.filter("name__icontains", filter_value="Test") + files = resource.files.filter(name__icontains="Test") self.assertEqual(2, len(files)) - files = resource.files.filter("name__icontains", filter_value="A") + files = resource.files.filter(name__icontains="A") self.assertEqual(2, len(files)) - files = resource.files.filter("name__contains", filter_value="Test") + files = resource.files.filter(name__contains="Test") self.assertEqual(1, len(files)) - files = resource.files.filter("name__icontains", filter_value="test") + files = resource.files.filter(name__icontains="test") self.assertEqual(2, len(files)) - files = resource.files.filter("name__icontains", filter_value="file") + files = resource.files.filter(name__icontains="file") self.assertEqual(2, len(files)) def test_filter_name_with(self): @@ -132,70 +128,70 @@ def test_filter_name_with(self): Datafile(path="path-within-dataset/a_your_file.csv"), ] ) - files = resource.files.filter("name__starts_with", filter_value="a_my") + files = resource.files.filter(name__starts_with="a_my") self.assertEqual(1, len(files)) - files = resource.files.filter("name__starts_with", filter_value="a_your") + files = resource.files.filter(name__starts_with="a_your") self.assertEqual(1, len(files)) - files = resource.files.filter("name__starts_with", filter_value="a_") + files = resource.files.filter(name__starts_with="a_") self.assertEqual(2, len(files)) - files = resource.files.filter("name__starts_with", filter_value="b") + files = resource.files.filter(name__starts_with="b") self.assertEqual(0, len(files)) - files = resource.files.filter("name__ends_with", filter_value="_file.csv") + files = resource.files.filter(name__ends_with="_file.csv") self.assertEqual(2, len(files)) - files = resource.files.filter("name__ends_with", filter_value="r_file.csv") + files = resource.files.filter(name__ends_with="r_file.csv") self.assertEqual(1, len(files)) - files = resource.files.filter("name__ends_with", filter_value="y_file.csv") + files = resource.files.filter(name__ends_with="y_file.csv") self.assertEqual(1, len(files)) - files = resource.files.filter("name__ends_with", filter_value="other.csv") + files = resource.files.filter(name__ends_with="other.csv") self.assertEqual(0, len(files)) - def test_filter_by_tag(self): - """Ensures that filter works with tag lookups""" + def test_filter_by_label(self): + """Ensures that filter works with label lookups""" resource = Dataset( files=[ - Datafile(path="path-within-dataset/a_my_file.csv", tags="one a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="two a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="three all"), + Datafile(path="path-within-dataset/a_my_file.csv", labels="one a2 b3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="two a2 b3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="three all"), ] ) - files = resource.files.filter("tags__contains", filter_value="a") + files = resource.files.filter(labels__contains="a") self.assertEqual(0, len(files)) - files = resource.files.filter("tags__contains", filter_value="one") + files = resource.files.filter(labels__contains="one") self.assertEqual(1, len(files)) - files = resource.files.filter("tags__contains", filter_value="all") + files = resource.files.filter(labels__contains="all") self.assertEqual(3, len(files)) - files = resource.files.filter("tags__any_tag_starts_with", filter_value="b") + files = resource.files.filter(labels__any_label_starts_with="b") self.assertEqual(2, len(files)) - files = resource.files.filter("tags__any_tag_ends_with", filter_value="3") + files = resource.files.filter(labels__any_label_ends_with="3") self.assertEqual(2, len(files)) - # files = resource.files.filter("tags__contains", filter_value="hre") + # files = resource.files.filter(labels__contains="hre") # self.assertEqual(1, len(files)) - def test_get_file_by_tag(self): - """Ensures that get_files works with tag lookups""" + def test_get_file_by_label(self): + """Ensure files can be accessed by label from the dataset.""" files = [ - Datafile(path="path-within-dataset/a_my_file.csv", tags="one a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="two a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="three all"), + Datafile(path="path-within-dataset/a_my_file.csv", labels="one a b3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="two a2 b3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="three all"), ] resource = Dataset(files=files) # Check working for single result - self.assertIs(resource.get_file_by_tag("three"), files[2]) + self.assertIs(resource.get_file_by_label("three"), files[2]) # Check raises for too many results with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - resource.get_file_by_tag("all") + resource.get_file_by_label("all") self.assertIn("More than one result found", e.exception.args[0]) # Check raises for no result with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - resource.get_file_by_tag("billyjeanisnotmylover") + resource.get_file_by_label("billyjeanisnotmylover") - self.assertIn("No files found with this tag", e.exception.args[0]) + self.assertIn("No results found for filters {'labels__contains': 'billyjeanisnotmylover'}", e.exception.args[0]) def test_filter_by_sequence_not_none(self): """Ensures that filter works with sequence lookups""" @@ -206,7 +202,7 @@ def test_filter_by_sequence_not_none(self): Datafile(path="path-within-dataset/a_your_file.csv", sequence=None), ] ) - files = resource.files.filter("sequence__is_not", None) + files = resource.files.filter(sequence__is_not=None) self.assertEqual(2, len(files)) def test_get_file_sequence(self): @@ -217,7 +213,7 @@ def test_get_file_sequence(self): Datafile(path="path-within-dataset/a_your_file.csv", sequence=None), ] - got_files = Dataset(files=files).get_file_sequence("name__ends_with", filter_value=".csv", strict=True) + got_files = Dataset(files=files).get_file_sequence(name__ends_with=".csv", strict=True) self.assertEqual(got_files, files[:2]) def test_get_broken_file_sequence(self): @@ -230,7 +226,7 @@ def test_get_broken_file_sequence(self): ] ) with self.assertRaises(exceptions.BrokenSequenceException): - resource.get_file_sequence("name__ends_with", filter_value=".csv", strict=True) + resource.get_file_sequence(name__ends_with=".csv", strict=True) def test_filter_name_filters_include_extension(self): """Ensures that filters applied to the name will catch terms in the extension""" @@ -239,9 +235,7 @@ def test_filter_name_filters_include_extension(self): Datafile(path="path-within-dataset/a_test_file.txt"), ] - self.assertEqual( - Dataset(files=files).files.filter("name__icontains", filter_value="txt"), FilterSet({files[1]}) - ) + self.assertEqual(Dataset(files=files).files.filter(name__icontains="txt"), FilterSet({files[1]})) def test_filter_name_filters_exclude_path(self): """Ensures that filters applied to the name will not catch terms in the extension""" @@ -251,7 +245,7 @@ def test_filter_name_filters_exclude_path(self): Datafile(path="second-path-within-dataset/a_test_file.txt"), ] ) - files = resource.files.filter("name__icontains", filter_value="second") + files = resource.files.filter(name__icontains="second") self.assertEqual(0, len(files)) def test_using_get_files_raises_deprecation_warning(self): @@ -264,7 +258,7 @@ def test_using_get_files_raises_deprecation_warning(self): ) with warnings.catch_warnings(record=True) as warning: - filtered_files = resource.get_files("name__icontains", filter_value="second") + filtered_files = resource.get_files(name__icontains="second") self.assertEqual(len(warning), 1) self.assertTrue(issubclass(warning[-1].category, DeprecationWarning)) self.assertIn("deprecated", str(warning[-1].message)) @@ -316,9 +310,10 @@ def test_from_cloud(self): dataset = Dataset( name="dataset_0", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}, tags={"a": "b"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}, tags={"a": "b"}), }, + tags={"a": "b", "c": 1}, ) project_name = "test-project" @@ -347,6 +342,7 @@ def test_from_cloud(self): self.assertEqual(persisted_dataset.name, dataset.name) self.assertEqual(persisted_dataset.hash_value, dataset.hash_value) self.assertEqual(persisted_dataset.tags, dataset.tags) + self.assertEqual(persisted_dataset.labels, dataset.labels) self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files}) for file in persisted_dataset: @@ -368,9 +364,10 @@ def test_to_cloud(self): dataset = Dataset( files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), - } + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), + }, + tags={"a": "b", "c": 1}, ) project_name = "test-project" @@ -413,3 +410,5 @@ def test_to_cloud(self): "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_1.txt", ], ) + + self.assertEqual(persisted_dataset["tags"], dataset.tags.serialise()) diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index a0059e8e7..6c8f2a3db 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -1,43 +1,259 @@ from octue import exceptions from octue.mixins import Filterable -from octue.resources.filter_containers import FilterList, FilterSet +from octue.resources.filter_containers import FilterDict, FilterList, FilterSet from tests.base import BaseTestCase -class Cat(Filterable): - def __init__(self, name=None, previous_names=None, age=None): - self.name = name - self.previous_names = previous_names - self.age = age +class FilterableThing(Filterable): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def __eq__(self, other): + return vars(self) == vars(other) + + def __hash__(self): + return hash(str(vars(self).items())) class TestFilterSet(BaseTestCase): + def test_error_raised_if_any_items_are_not_filterable(self): + """Test that an error is raised if any items in the FilterSet are not of type Filterable.""" + filter_set = FilterSet([1, 2, 3]) + + with self.assertRaises(TypeError): + filter_set.filter(a__equals=2) + + def test_filter_with_filterables_of_differing_attributes(self): + """Test filtering with filterables of differing attributes ignores the filterables lacking the filtered-for + attribute. + """ + filterables = {FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)} + filter_set = FilterSet(filterables) + + self.assertEqual(filter_set.filter(a__gt=2), {FilterableThing(a=3), FilterableThing(a=77)}) + self.assertEqual(filter_set.filter(b__equals=90), {FilterableThing(b=90)}) + self.assertEqual(filter_set.filter(b__equals=0), set()) + + def test_filter_with_filterables_of_differing_attributes_fails_if_setting_enabled(self): + """Test filtering with filterables of differing attributes raises an error if any filterables lack the + filtered-for attribute when `ignore_items_without_attribute` is False. + """ + filter_set = FilterSet({FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)}) + + for kwarg in {"a__gt": 2}, {"b__equals": 90}, {"b__equals": 0}: + with self.assertRaises(AttributeError): + filter_set.filter(**kwarg, ignore_items_without_attribute=False) + + def test_filtering_with_multiple_filters(self): + """Test that multiple filters can be specified in FilterSet.filter at once.""" + filterables = {FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)} + self.assertEqual(FilterSet(filterables).filter(a__equals=3, b__gt=80), {FilterableThing(a=3, b=99)}) + + def test_one_fails_if_no_results(self): + """Test that the `one` method raises an error if there are no results.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + filterables.one(a__equals=10) + + def test_one_fails_if_more_than_one_result(self): + """Test that the `one` method raises an error if there is more than one result.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + filterables.one(a__equals=3) + + def test_one(self): + """Test that the `one` method works and returns one result.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + self.assertEqual(filterables.one(a__equals=77), FilterableThing(a=77)) + def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ - filter_set = FilterSet([Cat(age=5), Cat(age=4), Cat(age=3)]) + filter_set = FilterSet([FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)]) with self.assertRaises(exceptions.InvalidInputException): filter_set.order_by("dog-likeness") def test_order_by_with_string_attribute(self): """ Test ordering a FilterSet by a string attribute returns an appropriately ordered FilterList. """ - cats = [Cat(name="Zorg"), Cat(name="James"), Cat(name="Princess Carolyn")] + cats = [FilterableThing(name="Zorg"), FilterableThing(name="James"), FilterableThing(name="Princess Carolyn")] sorted_filter_set = FilterSet(cats).order_by("name") self.assertEqual(sorted_filter_set, FilterList([cats[1], cats[2], cats[0]])) def test_order_by_with_int_attribute(self): """ Test ordering a FilterSet by an integer attribute returns an appropriately ordered FilterList. """ - cats = [Cat(age=5), Cat(age=4), Cat(age=3)] + cats = [FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)] sorted_filter_set = FilterSet(cats).order_by("age") self.assertEqual(sorted_filter_set, FilterList(reversed(cats))) def test_order_by_list_attribute(self): """ Test that ordering by list attributes orders by the size of the list. """ - cats = [Cat(previous_names=["Scatta", "Catta"]), Cat(previous_names=["Kitty"]), Cat(previous_names=[])] + cats = [ + FilterableThing(previous_names=["Scatta", "Catta"]), + FilterableThing(previous_names=["Kitty"]), + FilterableThing(previous_names=[]), + ] sorted_filter_set = FilterSet(cats).order_by("previous_names") self.assertEqual(sorted_filter_set, FilterList(reversed(cats))) def test_order_by_in_reverse(self): """ Test ordering in reverse works correctly. """ - cats = [Cat(age=5), Cat(age=3), Cat(age=4)] + cats = [FilterableThing(age=5), FilterableThing(age=3), FilterableThing(age=4)] sorted_filter_set = FilterSet(cats).order_by("age", reverse=True) self.assertEqual(sorted_filter_set, FilterList([cats[0], cats[2], cats[1]])) + + +class TestFilterDict(BaseTestCase): + ANIMALS = FilterDict( + { + "cat": FilterableThing(name="Princess Carolyn", age=3, size="small", previous_names=["scatta", "catta"]), + "dog": FilterableThing(name="Spot", age=90, size="big", previous_names=[]), + "another_dog": FilterableThing( + name="Ranger", age=91, size="small", previous_names=["doggo", "oggo", "loggo"] + ), + } + ) + + def test_instantiate(self): + """Test that a FilterDict can be instantiated like a dictionary.""" + filter_dict = FilterDict(a=1, b=3) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) + + filter_dict = FilterDict({"a": 1, "b": 3}) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) + + filter_dict = FilterDict(**{"a": 1, "b": 3}) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) + + def test_error_raised_when_filtering_if_any_items_are_not_filterable(self): + """Test that an error is raised if any values in the FilterDict are not of type Filterable.""" + filter_dict = FilterDict({"a": 1, "b": 2, "c": 3}) + + with self.assertRaises(TypeError): + filter_dict.filter(my_attribute__equals=2) + + def test_filter(self): + """Test that a FilterDict can be filtered on its values when they are all Filterables.""" + filterables = { + "first-filterable": FilterableThing(my_value=3), + "second-filterable": FilterableThing(my_value=90), + } + + filter_dict = FilterDict(filterables) + self.assertEqual(filter_dict.filter(my_value__equals=90).keys(), {"second-filterable"}) + self.assertEqual(filter_dict.filter(my_value__gt=2), filterables) + + def test_filter_with_filterables_of_differing_attributes(self): + """Test filtering with filterables of differing attributes ignores the filterables lacking the filtered-for + attribute. + """ + filterables = { + "first-filterable": FilterableThing(a=3), + "second-filterable": FilterableThing(b=90), + "third-filterable": FilterableThing(a=77), + } + + filter_dict = FilterDict(filterables) + self.assertEqual(filter_dict.filter(a__gt=2).keys(), {"first-filterable", "third-filterable"}) + self.assertEqual(filter_dict.filter(b__equals=90).keys(), {"second-filterable"}) + self.assertEqual(filter_dict.filter(b__equals=0).keys(), set()) + + def test_filter_with_filterables_of_differing_attributes_fails_if_setting_enabled(self): + """Test filtering with filterables of differing attributes raises an error if any filterables lack the + filtered-for attribute when `ignore_items_without_attribute` is False. + """ + filterables = { + "first-filterable": FilterableThing(a=3), + "second-filterable": FilterableThing(b=90), + "third-filterable": FilterableThing(a=77), + } + + filter_dict = FilterDict(filterables) + + for kwarg in {"a__gt": 2}, {"b__equals": 90}, {"b__equals": 0}: + with self.assertRaises(AttributeError): + filter_dict.filter(**kwarg, ignore_items_without_attribute=False) + + def test_filter_chaining(self): + """Test that filters can be chained to filter a FilterDict multiple times.""" + animals_with_age_of_at_least_90 = self.ANIMALS.filter(age__gte=90) + self.assertEqual({"dog", "another_dog"}, animals_with_age_of_at_least_90.keys()) + + animals_with_age_of_at_least_90_and_size_small = animals_with_age_of_at_least_90.filter(size__equals="small") + self.assertEqual(animals_with_age_of_at_least_90_and_size_small.keys(), {"another_dog"}) + + def test_filtering_with_multiple_filters(self): + """Test that multiple filters can be specified in FilterDict.filter at once.""" + self.assertEqual(self.ANIMALS.filter(size__equals="small", age__lt=5), {"cat": self.ANIMALS["cat"]}) + + def test_one_fails_if_no_results(self): + """Test that the `one` method raises an error if there are no results.""" + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + self.ANIMALS.one(age__equals=10) + + def test_one_fails_if_more_than_one_result(self): + """Test that the `one` method raises an error if there is more than one result.""" + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + self.ANIMALS.one(size__equals="small") + + def test_one(self): + """Test that the `one` method works and returns one result.""" + self.assertEqual(self.ANIMALS.one(age__equals=91), ("another_dog", self.ANIMALS["another_dog"])) + + def test_ordering_by_a_non_existent_attribute(self): + """Ensure an error is raised if ordering is attempted by a non-existent attribute.""" + with self.assertRaises(exceptions.InvalidInputException): + self.ANIMALS.order_by("dog-likeness") + + def test_order_by_with_string_attribute(self): + """Test that ordering a FilterDict by a string attribute returns an appropriately ordered FilterList.""" + self.assertEqual( + self.ANIMALS.order_by("name"), + FilterList( + ( + ("cat", self.ANIMALS["cat"]), + ("another_dog", self.ANIMALS["another_dog"]), + ("dog", self.ANIMALS["dog"]), + ) + ), + ) + + def test_order_by_with_int_attribute(self): + """ Test ordering a FilterDict by an integer attribute returns an appropriately ordered FilterList. """ + self.assertEqual( + self.ANIMALS.order_by("age"), + FilterList( + ( + ("cat", self.ANIMALS["cat"]), + ("dog", self.ANIMALS["dog"]), + ("another_dog", self.ANIMALS["another_dog"]), + ) + ), + ) + + def test_order_by_list_attribute(self): + """Test that ordering by list attributes orders members alphabetically by the first element of each list.""" + self.assertEqual( + self.ANIMALS.order_by("previous_names"), + FilterList( + ( + ("dog", self.ANIMALS["dog"]), + ("another_dog", self.ANIMALS["another_dog"]), + ("cat", self.ANIMALS["cat"]), + ) + ), + ) + + def test_order_by_in_reverse(self): + """ Test ordering in reverse works correctly. """ + self.assertEqual( + self.ANIMALS.order_by("age", reverse=True), + FilterList( + ( + ("another_dog", self.ANIMALS["another_dog"]), + ("dog", self.ANIMALS["dog"]), + ("cat", self.ANIMALS["cat"]), + ) + ), + ) diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py new file mode 100644 index 000000000..5c32e14a1 --- /dev/null +++ b/tests/resources/test_label.py @@ -0,0 +1,199 @@ +import json + +from octue import exceptions +from octue.resources.filter_containers import FilterSet +from octue.resources.label import Label, LabelSet +from octue.utils.decoders import OctueJSONDecoder +from octue.utils.encoders import OctueJSONEncoder +from tests.base import BaseTestCase + + +class TestLabel(BaseTestCase): + def test_invalid_labels_cause_error(self): + """Test that invalid labels cause an error to be raised.""" + for label in ( + ":a", + "@", + "a_b", + "-bah", + "humbug:", + r"back\slashy", + {"not-a": "string"}, + "/a", + "a/", + "blah:3.5.", + "HELLO-WORLD", + "Asia/Pacific", + ): + with self.assertRaises(exceptions.InvalidLabelException): + Label(label) + + def test_valid_labels(self): + """Test that valid labels instantiate as expected.""" + for label in "hello", "hello-world", "hello-world-goodbye", "blah-35": + Label(label) + + def test_label_comparison(self): + """ Test that labels can be alphabetically compared. """ + self.assertTrue(Label("a") < Label("b")) + self.assertTrue(Label("b") > Label("a")) + self.assertTrue(Label("a") != Label("b")) + self.assertTrue(Label("a") == Label("a")) + + def test_label_comparison_with_strings(self): + """ Test that labels can be alphabetically compared with strings in both directions. """ + self.assertTrue(Label("a") < "b") + self.assertTrue(Label("b") > "a") + self.assertTrue(Label("a") != "b") + self.assertTrue(Label("a") == "a") + self.assertTrue("b" > Label("a")) + self.assertTrue("a" < Label("b")) + self.assertTrue("b" != Label("a")) + self.assertTrue("a" == Label("a")) + + def test_labels_compare_unequal_to_non_str_or_label_types(self): + """ Test that comparing for equality a Label with a non-string-or-Label type returns False. """ + self.assertFalse(Label("a") == 1) + self.assertTrue(Label("a") != 1) + + def test_contains(self): + """ Test that labels can be checked for containment. """ + self.assertIn("e", Label("hello")) + + def test_starts_with(self): + """ Test that the start of a label can be checked. """ + self.assertTrue(Label("hello").startswith("h")) + self.assertFalse(Label("hello").startswith("e")) + + def test_ends_with(self): + """ Test that the end of a label can be checked. """ + self.assertTrue(Label("hello").endswith("o")) + self.assertFalse(Label("hello").endswith("e")) + + +class TestLabelSet(BaseTestCase): + LABEL_SET = LabelSet(labels="a b-c d-e-f") + + def test_instantiation_from_space_delimited_string(self): + """ Test that a LabelSet can be instantiated from a space-delimited string of label names.""" + label_set = LabelSet(labels="a b-c d-e-f") + self.assertEqual(label_set, self.LABEL_SET) + + def test_instantiation_from_iterable_of_strings(self): + """ Test that a LabelSet can be instantiated from an iterable of strings.""" + label_set = LabelSet(labels=["a", "b-c", "d-e-f"]) + self.assertEqual(label_set, self.LABEL_SET) + + def test_instantiation_from_iterable_of_labels(self): + """ Test that a LabelSet can be instantiated from an iterable of labels.""" + label_set = LabelSet(labels=[Label("a"), Label("b-c"), Label("d-e-f")]) + self.assertEqual(label_set, self.LABEL_SET) + + def test_instantiation_from_filter_set_of_strings(self): + """ Test that a LabelSet can be instantiated from a FilterSet of strings.""" + label_set = LabelSet(labels=FilterSet({"a", "b-c", "d-e-f"})) + self.assertEqual(label_set, self.LABEL_SET) + + def test_instantiation_from_filter_set_of_labels(self): + """ Test that a LabelSet can be instantiated from a FilterSet of labels.""" + label_set = LabelSet(labels=FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) + + def test_instantiation_from_label_set(self): + """ Test that a LabelSet can be instantiated from another LabelSet. """ + self.assertEqual(self.LABEL_SET, LabelSet(self.LABEL_SET)) + + def test_equality(self): + """ Ensure two LabelSets with the same labels compare equal. """ + self.assertTrue(self.LABEL_SET == LabelSet(labels="a b-c d-e-f")) + self.assertTrue(self.LABEL_SET == {"a", "b-c", "d-e-f"}) + + def test_inequality(self): + """ Ensure two LabelSets with different labels compare unequal. """ + self.assertTrue(self.LABEL_SET != LabelSet(labels="a")) + + def test_non_label_sets_compare_unequal_to_label_sets(self): + """ Ensure a LabelSet and a non-LabelSet compare unequal. """ + self.assertFalse(self.LABEL_SET == "a") + self.assertTrue(self.LABEL_SET != "a") + + def test_iterating_over(self): + """ Ensure a LabelSet can be iterated over. """ + self.assertEqual(set(self.LABEL_SET), {Label("a"), Label("b-c"), Label("d-e-f")}) + + def test_contains_with_string(self): + """ Ensure we can check that a LabelSet has a certain label using a string form. """ + self.assertTrue("d-e-f" in self.LABEL_SET) + self.assertFalse("hello" in self.LABEL_SET) + + def test_contains_with_label(self): + """ Ensure we can check that a LabelSet has a certain label. """ + self.assertTrue(Label("d-e-f") in self.LABEL_SET) + self.assertFalse(Label("hello") in self.LABEL_SET) + + def test_contains_only_matches_full_labels(self): + """ Test that the has_label method only matches full labels (i.e. that it doesn't match sublabels or parts of labels.""" + for label in "a", "b-c", "d-e-f": + self.assertTrue(label in self.LABEL_SET) + + for label in "b", "c", "d", "e", "f": + self.assertFalse(label in self.LABEL_SET) + + def test_add(self): + """Test that the add method adds a valid label but raises an error for an invalid label.""" + label_set = LabelSet({"a", "b"}) + label_set.add("c") + self.assertEqual(label_set, {"a", "b", "c"}) + + with self.assertRaises(exceptions.InvalidLabelException): + label_set.add("d_") + + def test_update(self): + """Test that the update method adds valid labels but raises an error for invalid labels.""" + label_set = LabelSet({"a", "b"}) + label_set.update("c", "d") + self.assertEqual(label_set, {"a", "b", "c", "d"}) + + with self.assertRaises(exceptions.InvalidLabelException): + label_set.update("e", "f_") + + def test_any_label_starts_with(self): + """ Ensure starts_with only checks the starts of labels, and doesn't check the starts of sublabels. """ + for label in "a", "b", "d": + self.assertTrue(self.LABEL_SET.any_label_starts_with(label)) + + for label in "c", "e", "f": + self.assertFalse(self.LABEL_SET.any_label_starts_with(label)) + + def test_any_label_ends_swith(self): + """ Ensure ends_with doesn't check ends of sublabels. """ + for label in "a", "c", "f": + self.assertTrue(self.LABEL_SET.any_label_ends_with(label)) + + for label in "b", "d", "e": + self.assertFalse(self.LABEL_SET.any_label_ends_with(label)) + + def test_serialise(self): + """Ensure that LabelSets serialise to a list.""" + self.assertEqual( + json.dumps(self.LABEL_SET, cls=OctueJSONEncoder), + json.dumps({"_type": "set", "items": ["a", "b-c", "d-e-f"]}), + ) + + def test_serialise_orders_labels(self): + """Ensure that serialising a LabelSet results in a sorted list.""" + label_set = LabelSet("z hello a c-no") + self.assertEqual( + json.dumps(label_set, cls=OctueJSONEncoder), + json.dumps({"_type": "set", "items": ["a", "c-no", "hello", "z"]}), + ) + + def test_deserialise(self): + """Test that serialisation is reversible.""" + serialised_label_set = json.dumps(self.LABEL_SET, cls=OctueJSONEncoder) + deserialised_label_set = LabelSet(json.loads(serialised_label_set, cls=OctueJSONDecoder)) + self.assertEqual(deserialised_label_set, self.LABEL_SET) + + def test_repr(self): + """Test the representation of a LabelSet appears as expected.""" + self.assertEqual(repr(self.LABEL_SET), f"LabelSet({set(self.LABEL_SET)})") diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index b423ac5f6..0b81fdb45 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -72,8 +72,8 @@ def test_to_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) @@ -115,8 +115,8 @@ def test_to_cloud_without_storing_datasets(self): name="my-dataset", path=temporary_directory, files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) @@ -156,8 +156,8 @@ def test_from_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) diff --git a/tests/resources/test_tag.py b/tests/resources/test_tag.py index 84578e099..7d6fd2442 100644 --- a/tests/resources/test_tag.py +++ b/tests/resources/test_tag.py @@ -1,204 +1,54 @@ -from octue import exceptions -from octue.resources.filter_containers import FilterList, FilterSet -from octue.resources.tag import Tag, TagSet -from tests.base import BaseTestCase - - -class TestTag(BaseTestCase): - def test_invalid_tags_cause_error(self): - """Test that invalid tags cause an error to be raised.""" - for tag in ":a", "@", "a_b", "-bah", "humbug:", r"back\slashy", {"not-a": "string"}, "/a", "a/", "blah:3.5.": - with self.assertRaises(exceptions.InvalidTagException): - Tag(tag) - - def test_valid_tags(self): - """Test that valid tags instantiate as expected.""" - for tag in "hello", "hello:world", "hello-world:goodbye", "HELLO-WORLD", "Asia/Pacific", "blah:3.5": - Tag(tag) - - def test_subtags(self): - """ Test that subtags are correctly parsed from tags. """ - self.assertEqual(Tag("a:b:c").subtags, FilterList([Tag("a"), Tag("b"), Tag("c")])) - - def test_tag_comparison(self): - """ Test that tags can be alphabetically compared. """ - self.assertTrue(Tag("a") < Tag("b")) - self.assertTrue(Tag("b") > Tag("a")) - self.assertTrue(Tag("a") != Tag("b")) - self.assertTrue(Tag("a") == Tag("a")) - - def test_tag_comparison_with_strings(self): - """ Test that tags can be alphabetically compared with strings in both directions. """ - self.assertTrue(Tag("a") < "b") - self.assertTrue(Tag("b") > "a") - self.assertTrue(Tag("a") != "b") - self.assertTrue(Tag("a") == "a") - self.assertTrue("b" > Tag("a")) - self.assertTrue("a" < Tag("b")) - self.assertTrue("b" != Tag("a")) - self.assertTrue("a" == Tag("a")) - - def test_tags_compare_unequal_to_non_str_or_tag_types(self): - """ Test that comparing for equality a Tag with a non-string-or-Tag type returns False. """ - self.assertFalse(Tag("a") == 1) - self.assertTrue(Tag("a") != 1) - - def test_contains(self): - """ Test that tags can be checked for containment. """ - self.assertIn("e", Tag("hello")) - - def test_starts_with(self): - """ Test that the start of a tag can be checked. """ - self.assertTrue(Tag("hello").starts_with("h")) - self.assertFalse(Tag("hello").starts_with("e")) - - def test_subtags_starts_with(self): - """ Test that the start of subtags can be checked. """ - self.assertTrue(TagSet(Tag("hello:world").subtags).any_tag_starts_with("w")) - self.assertFalse(TagSet(Tag("hello:world").subtags).any_tag_starts_with("e")) - - def test_ends_with(self): - """ Test that the end of a tag can be checked. """ - self.assertTrue(Tag("hello").ends_with("o")) - self.assertFalse(Tag("hello").ends_with("e")) - - def test_subtags_ends_with(self): - """ Test that the end of subtags can be checked. """ - self.assertTrue(TagSet(Tag("hello:world").subtags).any_tag_ends_with("o")) - self.assertFalse(TagSet(Tag("hello:world").subtags).any_tag_ends_with("e")) - - -class TestTagSet(BaseTestCase): - TAG_SET = TagSet(tags="a b:c d:e:f") - - def test_instantiation_from_space_delimited_string(self): - """ Test that a TagSet can be instantiated from a space-delimited string of tag names.""" - tag_set = TagSet(tags="a b:c d:e:f") - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_iterable_of_strings(self): - """ Test that a TagSet can be instantiated from an iterable of strings.""" - tag_set = TagSet(tags=["a", "b:c", "d:e:f"]) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) +from unittest import TestCase - def test_instantiation_from_iterable_of_tags(self): - """ Test that a TagSet can be instantiated from an iterable of Tags.""" - tag_set = TagSet(tags=[Tag("a"), Tag("b:c"), Tag("d:e:f")]) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_filter_set_of_strings(self): - """ Test that a TagSet can be instantiated from a FilterSet of strings.""" - tag_set = TagSet(tags=FilterSet({"a", "b:c", "d:e:f"})) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_filter_set_of_tags(self): - """ Test that a TagSet can be instantiated from a FilterSet of Tags.""" - tag_set = TagSet(tags=FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_tag_set(self): - """ Test that a TagSet can be instantiated from another TagSet. """ - self.assertEqual(self.TAG_SET, TagSet(self.TAG_SET)) - - def test_equality(self): - """ Ensure two TagSets with the same tags compare equal. """ - self.assertTrue(self.TAG_SET == TagSet(tags="a b:c d:e:f")) - - def test_inequality(self): - """ Ensure two TagSets with different tags compare unequal. """ - self.assertTrue(self.TAG_SET != TagSet(tags="a")) - - def test_non_tag_sets_compare_unequal_to_tag_sets(self): - """ Ensure a TagSet and a non-TagSet compare unequal. """ - self.assertFalse(self.TAG_SET == "a") - self.assertTrue(self.TAG_SET != "a") - - def test_iterating_over(self): - """ Ensure a TagSet can be iterated over. """ - self.assertEqual(set(self.TAG_SET), {Tag("a"), Tag("b:c"), Tag("d:e:f")}) - - def test_contains_with_string(self): - """ Ensure we can check that a TagSet has a certain tag using a string form. """ - self.assertTrue("d:e:f" in self.TAG_SET) - self.assertFalse("hello" in self.TAG_SET) - - def test_contains_with_tag(self): - """ Ensure we can check that a TagSet has a certain tag. """ - self.assertTrue(Tag("d:e:f") in self.TAG_SET) - self.assertFalse(Tag("hello") in self.TAG_SET) - - def test_contains_only_matches_full_tags(self): - """ Test that the has_tag method only matches full tags (i.e. that it doesn't match subtags or parts of tags.""" - for tag in "a", "b:c", "d:e:f": - self.assertTrue(tag in self.TAG_SET) - - for tag in "b", "c", "d", "e", "f": - self.assertFalse(tag in self.TAG_SET) - - def test_get_subtags(self): - """ Test subtags can be accessed as a new TagSet. """ - self.assertEqual(TagSet("meta:sys2:3456 blah").get_subtags(), TagSet("meta sys2 3456 blah")) - - def test_any_tag_starts_with(self): - """ Ensure starts_with only checks the starts of tags, and doesn't check the starts of subtags. """ - for tag in "a", "b", "d": - self.assertTrue(self.TAG_SET.any_tag_starts_with(tag)) - - for tag in "c", "e", "f": - self.assertFalse(self.TAG_SET.any_tag_starts_with(tag)) - - def test_any_tag_ends_swith(self): - """ Ensure ends_with doesn't check ends of subtags. """ - for tag in "a", "c", "f": - self.assertTrue(self.TAG_SET.any_tag_ends_with(tag)) +from octue import exceptions +from octue.resources.tag import TagDict - for tag in "b", "d", "e": - self.assertFalse(self.TAG_SET.any_tag_ends_with(tag)) - def test_any_tag_contains_searches_for_tags_and_subtags(self): - """ Ensure tags and subtags can be searched for. """ - for tag in "a", "b", "d": - self.assertTrue(self.TAG_SET.any_tag_contains(tag)) +class TestTagDict(TestCase): + def test_instantiate_from_dict(self): + """Test that a TagDict can be instantiated from a dictionary.""" + tag_dict = TagDict({"a": 1, "b": 2}) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) - for subtag in "c", "e", "f": - self.assertTrue(self.TAG_SET.any_tag_contains(subtag)) + def test_instantiate_from_kwargs(self): + """Test that a TagDict can be instantiated from kwargs.""" + tag_dict = TagDict(**{"a": 1, "b": 2}) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) - def test_filter(self): - """ Test that tag sets can be filtered. """ - tag_set = TagSet(tags="tag1 tag2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") - self.assertEqual( - tag_set.tags.filter("name__starts_with", "meta"), - FilterSet({Tag("meta:sys1:1234"), Tag("meta:sys2:3456"), Tag("meta:sys2:55")}), - ) + def test_instantiation_fails_if_tag_name_fails_validation(self): + """Test that TagDict instantiation fails if any keys don't conform to the tag name pattern.""" + with self.assertRaises(exceptions.InvalidTagException): + TagDict({".blah.": "blue"}) - def test_filter_chaining(self): - """ Test that filters can be chained. """ - tag_set = TagSet(tags="tag1 tag2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") + def test_update_fails_if_tag_name_fails_validation(self): + """Test that updating fails if any keys don't conform to the tag name pattern.""" + tag_dict = TagDict({"a": 1, "b": 2}) - filtered_tags_1 = tag_set.tags.filter("name__starts_with", "meta") - self.assertEqual(filtered_tags_1, TagSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55").tags) + with self.assertRaises(exceptions.InvalidTagException): + tag_dict.update({"@": 3, "d": 4}) - filtered_tags_2 = filtered_tags_1.filter("name__contains", "sys2") - self.assertEqual(filtered_tags_2, TagSet("meta:sys2:3456 meta:sys2:55").tags) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) - filtered_tags_3 = filtered_tags_1.filter("name__equals", "meta:sys2:55") - self.assertEqual(filtered_tags_3, TagSet("meta:sys2:55").tags) + def test_update(self): + """Test that TagDicts can be updated with tags with valid names.""" + tag_dict = TagDict({"a": 1, "b": 2}) + tag_dict.update({"c": 3, "d": 4}) + self.assertEqual(tag_dict, {"a": 1, "b": 2, "c": 3, "d": 4}) - def test_serialise(self): - """ Ensure that TagSets are serialised to the string form of a list. """ - self.assertEqual(self.TAG_SET.serialise(), ["a", "b:c", "d:e:f"]) + def test_setitem_fails_if_tag_name_fails_validation(self): + """Test that setting an item on a TagDict fails if the name fails validation.""" + tag_dict = TagDict() - def test_serialise_orders_tags(self): - """Ensure that TagSets serialise to a list.""" - tag_set = TagSet("z hello a c:no") - self.assertEqual(tag_set.serialise(), ["a", "c:no", "hello", "z"]) + with self.assertRaises(exceptions.InvalidTagException): + tag_dict["@@@"] = 9 - def test_deserialise(self): - """Test that serialisation is reversible.""" - serialised_tag_set = self.TAG_SET.serialise() - deserialised_tag_set = TagSet.deserialise(serialised_tag_set) - self.assertEqual(deserialised_tag_set, self.TAG_SET) + def test_setitem(self): + """Test setting an item on a TagDict.""" + tag_dict = TagDict() + tag_dict["hello"] = 9 + self.assertEqual(tag_dict, {"hello": 9}) - def test_repr(self): - """Test the representation of a TagSet appears as expected.""" - self.assertEqual(repr(self.TAG_SET), f"") + def test_equality_to_dict(self): + """Test that TagDicts compare equal to dictionaries with the same contents.""" + tag_dict = TagDict({"a": 1, "b": 2}) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) diff --git a/tests/test_runner.py b/tests/test_runner.py index a7ee96b17..f74fd1e18 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -127,16 +127,18 @@ def test_output_manifest_is_not_none(self): app_src=mock_app, twine=""" { - "output_manifest": [ - { - "key": "open_foam_result", - "purpose": "A dataset containing solution fields of an openfoam case." - }, - { - "key": "airfoil_cp_values", - "purpose": "A file containing cp values" - } - ] + "output_manifest": { + "datasets": [ + { + "key": "open_foam_result", + "purpose": "A dataset containing solution fields of an openfoam case." + }, + { + "key": "airfoil_cp_values", + "purpose": "A file containing cp values" + } + ] + } } """, ) diff --git a/tests/utils/test_objects.py b/tests/utils/test_objects.py new file mode 100644 index 000000000..ab077db25 --- /dev/null +++ b/tests/utils/test_objects.py @@ -0,0 +1,26 @@ +from unittest import TestCase +from unittest.mock import Mock + +from octue.utils.objects import get_nested_attribute, getattr_or_subscribe + + +class TestObjects(TestCase): + def test_getattr_or_subscribe_with_dictionary(self): + """Test that the getattr_or_subscribe function can get values from a dictionary.""" + self.assertEqual(getattr_or_subscribe(instance={"hello": "world"}, name="hello"), "world") + + def test_getattr_or_subscribe_with_object(self): + """Test that the getattr_or_subscribe function can get attribute values from a class instance.""" + self.assertEqual(getattr_or_subscribe(instance=Mock(hello="world"), name="hello"), "world") + + def test_get_nested_attribute(self): + """Test that nested attributes can be accessed.""" + inner_mock = Mock(b=3) + outer_mock = Mock(a=inner_mock) + self.assertEqual(get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b"), 3) + + def test_get_nested_dictionary_attribute(self): + """Test that nested attributes ending in a dictionary key can be accessed.""" + inner_mock = Mock(b={"hello": "world"}) + outer_mock = Mock(a=inner_mock) + self.assertEqual(get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b.hello"), "world")