From cd32c92adc8635d9b47a5ab927b0fcffafe858be Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 10 May 2021 16:00:17 +0100 Subject: [PATCH 001/103] CLN: Remove filters field from manifest strand in twines --- octue/templates/template-python-fractal/twine.json | 3 +-- octue/templates/template-using-manifests/twine.json | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/octue/templates/template-python-fractal/twine.json b/octue/templates/template-python-fractal/twine.json index b85156705..33a3b2856 100644 --- a/octue/templates/template-python-fractal/twine.json +++ b/octue/templates/template-python-fractal/twine.json @@ -63,8 +63,7 @@ "output_manifest": [ { "key": "fractal_figure_files", - "purpose": "A dataset containing .json files containing the output figures", - "filters": "tags:(fractal AND figure) files:(extension:json)" + "purpose": "A dataset containing .json files containing the output figures" } ] } diff --git a/octue/templates/template-using-manifests/twine.json b/octue/templates/template-using-manifests/twine.json index 7bf9e2324..5b0ec4e17 100644 --- a/octue/templates/template-using-manifests/twine.json +++ b/octue/templates/template-using-manifests/twine.json @@ -15,15 +15,13 @@ "input_manifest": [ { "key": "raw_met_mast_data", - "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up", - "filters": "tags:(met AND mast) files:(extension:csv)" + "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up" } ], "output_manifest": [ { "key": "cleaned_met_mast_data", - "purpose": "A dataset containing .csv files of cleaned meteorological mast data", - "filters": "tags:(met AND mast AND cleaned) files:(extension:csv)" + "purpose": "A dataset containing .csv files of cleaned meteorological mast data" } ] } From 0c579aecea81097352e5ba7df8f24cc3e9b220bb Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 12 May 2021 12:31:38 +0100 Subject: [PATCH 002/103] IMP: Disallow more than one colon in tags --- octue/resources/tag.py | 2 +- .../fractal/fractal.py | 2 +- tests/mixins/test_taggable.py | 2 - tests/resources/test_datafile.py | 2 +- tests/resources/test_tag.py | 62 +++++++++---------- 5 files changed, 34 insertions(+), 36 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index e2d807b93..529c1da09 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -8,7 +8,7 @@ from octue.utils.encoders import OctueJSONEncoder -TAG_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:\-/]*(? Date: Wed, 12 May 2021 15:36:34 +0100 Subject: [PATCH 003/103] REV: Revert "IMP: Disallow more than one colon in tags" This reverts commit 0c579aecea81097352e5ba7df8f24cc3e9b220bb. --- octue/resources/tag.py | 2 +- .../fractal/fractal.py | 2 +- tests/mixins/test_taggable.py | 2 + tests/resources/test_datafile.py | 2 +- tests/resources/test_tag.py | 62 +++++++++---------- 5 files changed, 36 insertions(+), 34 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 529c1da09..e2d807b93 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -8,7 +8,7 @@ from octue.utils.encoders import OctueJSONEncoder -TAG_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9\-/]*:?[A-Za-z0-9\-/]*(? Date: Wed, 12 May 2021 20:33:59 +0100 Subject: [PATCH 004/103] DEP: Use new version of twined --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5388c710e..1b60fc2ca 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "google-cloud-storage>=1.35.1", "google-crc32c>=1.1.2", "gunicorn", - "twined>=0.0.19", + "twined @ https://github.com/octue/twined/archive/feature/tag-templates.zip", ], url="https://www.github.com/octue/octue-sdk-python", license="MIT", From d15fe51a96fa37c5e43a7e9bfea29d7a2c679fc4 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 12 May 2021 21:06:06 +0100 Subject: [PATCH 005/103] TST: Only run deployment test if RUN_DEPLOYMENT_TESTS envvar is true --- tests/cloud/deployment/google/test_cloud_run.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/cloud/deployment/google/test_cloud_run.py b/tests/cloud/deployment/google/test_cloud_run.py index f276b79d5..8d7356636 100644 --- a/tests/cloud/deployment/google/test_cloud_run.py +++ b/tests/cloud/deployment/google/test_cloud_run.py @@ -1,6 +1,7 @@ import base64 import json import os +import unittest import uuid from unittest import TestCase, mock @@ -64,9 +65,13 @@ def test_error_is_raised_if_service_id_environment_variable_is_missing_or_empty( project_name="a-project-name", data={}, question_uuid="8c859f87-b594-4297-883f-cd1c7718ef29" ) - def test_cloud_run_integration(self): - """Test that the Google Cloud Run integration works, providing a service that can be asked questions and send - responses. + @unittest.skipUnless( + condition=os.getenv("RUN_DEPLOYMENT_TESTS", "").lower() == "true", + reason="'RUN_DEPLOYMENT_TESTS' environment variable is False or not present.", + ) + def test_cloud_run_deployment(self): + """Test that the Google Cloud Run example deployment works, providing a service that can be asked questions and + send responses. """ service_to_ask = "octue.services.009ea106-dc37-4521-a8cc-3e0836064334" asker = Service(backend=GCPPubSubBackend(project_name=TEST_PROJECT_NAME)) From fa853cf8a091343da6612f519a8abbe68b24b6a9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Thu, 13 May 2021 20:37:38 +0100 Subject: [PATCH 006/103] IMP: Convert tags to labels --- docs/source/analysis_objects.rst | 4 +- docs/source/child_services.rst | 4 +- docs/source/cloud_storage.rst | 2 +- docs/source/cloud_storage_advanced_usage.rst | 6 +- docs/source/conf.py | 2 +- docs/source/datafile.rst | 14 +- docs/source/dataset.rst | 12 +- docs/source/filter_containers.rst | 16 +- octue/exceptions.py | 4 + octue/mixins/__init__.py | 5 +- octue/mixins/filterable.py | 14 +- octue/mixins/labelable.py | 22 ++ octue/resources/analysis.py | 4 +- octue/resources/datafile.py | 21 +- octue/resources/dataset.py | 28 +-- octue/resources/filter_containers.py | 10 +- octue/resources/label.py | 195 +++++++++++++++++ octue/resources/manifest.py | 2 +- .../parent_service/twine.json | 4 +- .../fractal/fractal.py | 10 +- .../templates/template-using-manifests/app.py | 18 +- .../data/input/manifest.json | 12 +- tests/mixins/test_filterable.py | 32 +-- tests/mixins/test_labellable.py | 87 ++++++++ tests/mixins/test_taggable.py | 86 -------- tests/resources/test_datafile.py | 14 +- tests/resources/test_dataset.py | 54 ++--- tests/resources/test_label.py | 204 ++++++++++++++++++ tests/resources/test_manifest.py | 12 +- tests/resources/test_tag.py | 204 ------------------ 30 files changed, 665 insertions(+), 437 deletions(-) create mode 100644 octue/mixins/labelable.py create mode 100644 octue/resources/label.py create mode 100644 tests/mixins/test_labellable.py delete mode 100644 tests/mixins/test_taggable.py create mode 100644 tests/resources/test_label.py delete mode 100644 tests/resources/test_tag.py diff --git a/docs/source/analysis_objects.rst b/docs/source/analysis_objects.rst index bb2adcfc5..60e8105f5 100644 --- a/docs/source/analysis_objects.rst +++ b/docs/source/analysis_objects.rst @@ -37,8 +37,8 @@ the hash: - ``cluster`` - ``sequence`` - ``timestamp`` - - ``tags`` + - ``labels`` -- For a ``Dataset``, the hashes of its ``Datafiles`` are included, along with its ``tags``. +- For a ``Dataset``, the hashes of its ``Datafiles`` are included, along with its ``labels``. - For a ``Manifest``, the hashes of its ``Datasets`` are included, along with its ``keys``. diff --git a/docs/source/child_services.rst b/docs/source/child_services.rst index 665a105de..4cf8bb3eb 100644 --- a/docs/source/child_services.rst +++ b/docs/source/child_services.rst @@ -104,13 +104,13 @@ The children field must also be present in the ``twine.json`` file: "key": "wind_speed", "purpose": "A service that returns the average wind speed for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:wind_speed" + "filters": "labels:wind_speed" }, { "key": "elevation", "purpose": "A service that returns the elevation for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:elevation" + "filters": "labels:elevation" } ], ... diff --git a/docs/source/cloud_storage.rst b/docs/source/cloud_storage.rst index 6f68bac75..367957c6d 100644 --- a/docs/source/cloud_storage.rst +++ b/docs/source/cloud_storage.rst @@ -12,7 +12,7 @@ in Octue SDK, please join the discussion `in this issue. , bucket_name=, path_in_bucket=, - metadata={"tags": ["blah", "glah", "jah"], "cleaned": True, "id": 3} + metadata={"labels": ["blah", "glah", "jah"], "cleaned": True, "id": 3} ) storage_client.upload_from_string( string='[{"height": 99, "width": 72}, {"height": 12, "width": 103}]', bucket_name=, path_in_bucket=, - metadata={"tags": ["dimensions"], "cleaned": True, "id": 96} + metadata={"labels": ["dimensions"], "cleaned": True, "id": 96} ) **Downloading** @@ -61,7 +61,7 @@ to any of these methods. bucket_name=, path_in_bucket=, ) - >>> {"tags": ["dimensions"], "cleaned": True, "id": 96} + >>> {"labels": ["dimensions"], "cleaned": True, "id": 96} **Deleting** diff --git a/docs/source/conf.py b/docs/source/conf.py index e3c7f6af4..0c2d54be4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -50,7 +50,7 @@ # # The short X.Y version. version = "1.0" -# The full version, including alpha/beta/rc tags. +# The full version, including alpha/beta/rc labels. release = "1.0" # The language for content autogenerated by Sphinx. Refer to documentation diff --git a/docs/source/datafile.rst b/docs/source/datafile.rst index 6971341ab..712df7a53 100644 --- a/docs/source/datafile.rst +++ b/docs/source/datafile.rst @@ -10,7 +10,7 @@ the following main attributes: - ``path`` - the path of this file, which may include folders or subfolders, within the dataset. - ``cluster`` - the integer cluster of files, within a dataset, to which this belongs (default 0) - ``sequence`` - a sequence number of this file within its cluster (if sequences are appropriate) -- ``tags`` - a space-separated string or iterable of tags relevant to this file +- ``labels`` - a space-separated string or iterable of labels relevant to this file - ``timestamp`` - a posix timestamp associated with the file, in seconds since epoch, typically when it was created but could relate to a relevant time point for the data @@ -50,7 +50,7 @@ Example A datafile.timestamp = new_metadata["timestamp"] datafile.cluster = new_metadata["cluster"] datafile.sequence = new_metadata["sequence"] - datafile.tags = new_metadata["tags"] + datafile.labels = new_metadata["labels"] Example B @@ -76,7 +76,7 @@ Example B datafile.timestamp = datetime.now() datafile.cluster = 0 datafile.sequence = 3 - datafile.tags = {"manufacturer:Vestas", "output:1MW"} + datafile.labels = {"manufacturer:Vestas", "output:1MW"} datafile.to_cloud() # Or, datafile.update_cloud_metadata() @@ -122,10 +122,10 @@ For creating new data in a new local file: sequence = 2 - tags = {"cleaned:True", "type:linear"} + labels = {"cleaned:True", "type:linear"} - with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, mode="w") as datafile, f: + with Datafile(path="path/to/local/file.dat", sequence=sequence, labels=labels, mode="w") as datafile, f: f.write("This is some cleaned data.") datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") @@ -139,7 +139,7 @@ For existing data in an existing local file: sequence = 2 - tags = {"cleaned:True", "type:linear"} + labels = {"cleaned:True", "type:linear"} - datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags) + datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, labels=labels) datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") diff --git a/docs/source/dataset.rst b/docs/source/dataset.rst index 79390a941..a1bf3a660 100644 --- a/docs/source/dataset.rst +++ b/docs/source/dataset.rst @@ -7,7 +7,7 @@ Dataset A ``Dataset`` contains any number of ``Datafiles`` along with the following metadata: - ``name`` -- ``tags`` +- ``labels`` The files are stored in a ``FilterSet``, meaning they can be easily filtered according to any attribute of the :doc:`Datafile ` instances it contains. @@ -23,23 +23,23 @@ You can filter a ``Dataset``'s files as follows: dataset = Dataset( files=[ - Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", tags="one a:2 b:3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", tags="two a:2 b:3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/another_file.csv", tags="three all"), + Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", labels="one a:2 b:3 all"), + Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", labels="two a:2 b:3 all"), + Datafile(timestamp=time.time(), path="path-within-dataset/another_file.csv", labels="three all"), ] ) dataset.files.filter(filter_name="name__ends_with", filter_value=".csv") >>> , })> - dataset.files.filter("tags__contains", filter_value="a:2") + dataset.files.filter("labels__contains", filter_value="a:2") >>> , })> You can also chain filters indefinitely: .. code-block:: python - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("tags__contains", filter_value="a:2") + dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("labels__contains", filter_value="a:2") >>> })> Find out more about ``FilterSets`` :doc:`here `, including all the possible filters available for each type of object stored on diff --git a/docs/source/filter_containers.rst b/docs/source/filter_containers.rst index 3bed20e3d..a4087fd34 100644 --- a/docs/source/filter_containers.rst +++ b/docs/source/filter_containers.rst @@ -19,7 +19,7 @@ There are two types of filter containers currently implemented: ``FilterSets`` are currently used in: - ``Dataset.files`` to store ``Datafiles`` -- ``TagSet.tags`` to store ``Tags`` +- ``Labelset.labels`` to store ``Labels`` You can see filtering in action on the files of a ``Dataset`` :doc:`here `. @@ -73,18 +73,18 @@ The following filters are implemented for the following types: * ``is`` * ``is_not`` -- ``TagSet``: +- ``Labelset``: * ``is`` * ``is_not`` * ``equals`` * ``not_equals`` - * ``any_tag_contains`` - * ``not_any_tag_contains`` - * ``any_tag_starts_with`` - * ``not_any_tag_starts_with`` - * ``any_tag_ends_with`` - * ``not_any_tag_ends_with`` + * ``any_label_contains`` + * ``not_any_label_contains`` + * ``any_label_starts_with`` + * ``not_any_label_starts_with`` + * ``any_label_ends_with`` + * ``not_any_label_ends_with`` diff --git a/octue/exceptions.py b/octue/exceptions.py index 09afba66c..95ceeac71 100644 --- a/octue/exceptions.py +++ b/octue/exceptions.py @@ -62,6 +62,10 @@ class InvalidTagException(OctueSDKException, ValueError): """Raise when a tag applied to a data file or dataset""" +class InvalidLabelException(OctueSDKException, ValueError): + """Raise when a label applied to a data file or dataset""" + + class ServiceNotFound(OctueSDKException): """Raise when a Service of the given ID has not been found on the Google Pub/Sub server (i.e. if there is no topic associated with the Service ID). diff --git a/octue/mixins/__init__.py b/octue/mixins/__init__.py index 50da35a2b..2f96d472e 100644 --- a/octue/mixins/__init__.py +++ b/octue/mixins/__init__.py @@ -3,10 +3,10 @@ from .filterable import Filterable from .hashable import Hashable from .identifiable import Identifiable +from .labelable import Labelable from .loggable import Loggable from .pathable import Pathable from .serialisable import Serialisable -from .taggable import Taggable __all__ = ( @@ -14,9 +14,10 @@ "Filterable", "Hashable", "Identifiable", + "Labelable", "Loggable", "MixinBase", "Pathable", "Serialisable", - "Taggable", + "Labelable", ) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index a6b66d468..b3a039f9c 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -49,13 +49,13 @@ **ICONTAINS_FILTER_ACTIONS, }, "NoneType": IS_FILTER_ACTIONS, - "TagSet": { - "any_tag_contains": lambda item, filter_value: item.any_tag_contains(filter_value), - "not_any_tag_contains": lambda item, filter_value: not item.any_tag_contains(filter_value), - "any_tag_starts_with": lambda item, filter_value: item.any_tag_starts_with(filter_value), - "not_any_tag_starts_with": lambda item, filter_value: not item.any_tag_starts_with(filter_value), - "any_tag_ends_with": lambda item, filter_value: item.any_tag_ends_with(filter_value), - "not_any_tag_ends_with": lambda item, filter_value: not item.any_tag_ends_with(filter_value), + "LabelSet": { + "any_label_contains": lambda item, filter_value: item.any_label_contains(filter_value), + "not_any_label_contains": lambda item, filter_value: not item.any_label_contains(filter_value), + "any_label_starts_with": lambda item, filter_value: item.any_label_starts_with(filter_value), + "not_any_label_starts_with": lambda item, filter_value: not item.any_label_starts_with(filter_value), + "any_label_ends_with": lambda item, filter_value: item.any_label_ends_with(filter_value), + "not_any_label_ends_with": lambda item, filter_value: not item.any_label_ends_with(filter_value), **EQUALS_FILTER_ACTIONS, **CONTAINS_FILTER_ACTIONS, **IS_FILTER_ACTIONS, diff --git a/octue/mixins/labelable.py b/octue/mixins/labelable.py new file mode 100644 index 000000000..8c5f0c48d --- /dev/null +++ b/octue/mixins/labelable.py @@ -0,0 +1,22 @@ +from octue.resources.label import LabelSet + + +class Labelable: + """ A mixin class allowing objects to be labelled. """ + + def __init__(self, *args, labels=None, **kwargs): + super().__init__(*args, **kwargs) + self._labels = LabelSet(labels) + + def add_labels(self, *args): + """ Adds one or more new label strings to the object labels. New labels will be cleaned and validated. """ + self._labels.add_labels(*args) + + @property + def labels(self): + return self._labels + + @labels.setter + def labels(self, labels): + """ Overwrite any existing label set and assign new label. """ + self._labels = LabelSet(labels) diff --git a/octue/resources/analysis.py b/octue/resources/analysis.py index 22ee079d2..c08104610 100644 --- a/octue/resources/analysis.py +++ b/octue/resources/analysis.py @@ -2,7 +2,7 @@ import logging from octue.definitions import OUTPUT_STRANDS -from octue.mixins import Hashable, Identifiable, Loggable, Serialisable, Taggable +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Serialisable from octue.resources.manifest import Manifest from octue.utils.encoders import OctueJSONEncoder from octue.utils.folders import get_file_name_from_strand @@ -23,7 +23,7 @@ CLASS_MAP = {"configuration_manifest": Manifest, "input_manifest": Manifest, "output_manifest": Manifest} -class Analysis(Identifiable, Loggable, Serialisable, Taggable): +class Analysis(Identifiable, Loggable, Serialisable, Labelable): """Analysis class, holding references to all input and output data ## The Analysis Instance diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index e839eb5b1..9136f382d 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -9,7 +9,7 @@ from octue.cloud.storage import GoogleCloudStorageClient from octue.cloud.storage.path import CLOUD_STORAGE_PROTOCOL from octue.exceptions import AttributeConflict, CloudLocationNotSpecified, FileNotFoundException, InvalidInputException -from octue.mixins import Filterable, Hashable, Identifiable, Loggable, Pathable, Serialisable, Taggable +from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable from octue.mixins.hashable import EMPTY_STRING_HASH_VALUE from octue.utils import isfile from octue.utils.time import convert_from_posix_time, convert_to_posix_time @@ -24,10 +24,10 @@ ID_DEFAULT = None CLUSTER_DEFAULT = 0 SEQUENCE_DEFAULT = None -TAGS_DEFAULT = None +LABELS_DEFAULT = None -class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): +class Datafile(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): """Class for representing data files on the Octue system. Files in a manifest look like this: @@ -37,7 +37,8 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl "cluster": 0, "sequence": 0, "extension": "csv", - "tags": "", + "tags": {}, + "labels": [], "timestamp": datetime.datetime(2021, 5, 3, 18, 15, 58, 298086), "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86", "size_bytes": 59684813, @@ -55,7 +56,7 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl :param Pathable path_from: The root Pathable object (typically a Dataset) that this Datafile's path is relative to. :param int cluster: The cluster of files, within a dataset, to which this belongs (default 0) :param int sequence: A sequence number of this file within its cluster (if sequences are appropriate) - :param str tags: Space-separated string of tags relevant to this file + :param str labels: Space-separated string of labels relevant to this file :param bool skip_checks: :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode options are the same as for the builtin open function) @@ -70,7 +71,7 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl "name", "path", "sequence", - "tags", + "labels", "timestamp", "_cloud_metadata", ) @@ -84,7 +85,7 @@ def __init__( path_from=None, cluster=CLUSTER_DEFAULT, sequence=SEQUENCE_DEFAULT, - tags=TAGS_DEFAULT, + labels=LABELS_DEFAULT, skip_checks=True, mode="r", update_cloud_metadata=True, @@ -95,7 +96,7 @@ def __init__( name=kwargs.pop("name", None), immutable_hash_value=kwargs.pop("immutable_hash_value", None), logger=logger, - tags=tags, + labels=labels, path=path, path_from=path_from, ) @@ -202,7 +203,7 @@ def from_cloud( datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) datafile.cluster = kwargs.pop("cluster", custom_metadata.get("cluster", CLUSTER_DEFAULT)) datafile.sequence = kwargs.pop("sequence", custom_metadata.get("sequence", SEQUENCE_DEFAULT)) - datafile.tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) + datafile.labels = kwargs.pop("labels", custom_metadata.get("labels", LABELS_DEFAULT)) datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} return datafile @@ -488,7 +489,7 @@ def metadata(self): "timestamp": self.timestamp, "cluster": self.cluster, "sequence": self.sequence, - "tags": self.tags.serialise(to_string=True), + "labels": self.labels.serialise(to_string=True), } diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 406a4b87d..07fa2c6e5 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -7,10 +7,10 @@ from octue.cloud import storage from octue.cloud.storage import GoogleCloudStorageClient from octue.exceptions import BrokenSequenceException, InvalidInputException, UnexpectedNumberOfResultsException -from octue.mixins import Hashable, Identifiable, Loggable, Pathable, Serialisable, Taggable +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable from octue.resources.datafile import Datafile from octue.resources.filter_containers import FilterSet -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet module_logger = logging.getLogger(__name__) @@ -19,8 +19,8 @@ DATAFILES_DIRECTORY = "datafiles" -class Dataset(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable): - """A representation of a dataset, containing files, tags, etc +class Dataset(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashable): + """A representation of a dataset, containing files, labels, etc This is used to read a list of files (and their associated properties) into octue analysis, or to compile a list of output files (results) and their properties that will be sent back to the octue system. @@ -28,11 +28,11 @@ class Dataset(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable _FILTERSET_ATTRIBUTE = "files" _ATTRIBUTES_TO_HASH = ("files",) - _SERIALISE_FIELDS = "files", "name", "tags", "id", "path" + _SERIALISE_FIELDS = "files", "name", "labels", "id", "path" - def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, tags=None, **kwargs): + def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, labels=None, **kwargs): """Construct a Dataset""" - super().__init__(name=name, id=id, logger=logger, tags=tags, path=path, path_from=path_from) + super().__init__(name=name, id=id, logger=logger, labels=labels, path=path, path_from=path_from) # TODO The decoders aren't being used; utils.decoders.OctueJSONDecoder should be used in twined # so that resources get automatically instantiated. @@ -85,7 +85,7 @@ def from_cloud(cls, project_name, bucket_name, path_to_dataset_directory): id=serialised_dataset["id"], name=serialised_dataset["name"], path=storage.path.generate_gs_path(bucket_name, path_to_dataset_directory), - tags=TagSet(serialised_dataset["tags"]), + labels=LabelSet(serialised_dataset["labels"]), files=datafiles, ) @@ -213,18 +213,18 @@ def get_sequence_number(file): return results - def get_file_by_tag(self, tag_string): - """Gets a data file from a manifest by searching for files with the provided tag(s) + def get_file_by_label(self, tag_string): + """Gets a data file from a manifest by searching for files with the provided label(s) Gets exclusively one file; if no file or more than one file is found this results in an error. - :param tag_string: if this string appears as an exact match in the tags + :param tag_string: if this string appears as an exact match in the labels :return: DataFile object """ - results = self.files.filter(filter_name="tags__contains", filter_value=tag_string) + results = self.files.filter(filter_name="labels__contains", filter_value=tag_string) if len(results) > 1: - raise UnexpectedNumberOfResultsException("More than one result found when searching for a file by tag") + raise UnexpectedNumberOfResultsException("More than one result found when searching for a file by label") elif len(results) == 0: - raise UnexpectedNumberOfResultsException("No files found with this tag") + raise UnexpectedNumberOfResultsException("No files found with this label") return results.pop() diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 62d3bed6e..1c4244e62 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,25 +1,25 @@ from octue import exceptions -def _filter(instance, filter_name=None, filter_value=None): +def _filter(self, filter_name=None, filter_value=None): """Returns a new instance containing only the Filterables to which the given filter criteria apply. :param str filter_name: :param any filter_value: :return octue.resources.filter_containers.FilterSet: """ - return instance.__class__((item for item in instance if item.satisfies(filter_name, filter_value))) + return self.__class__((item for item in self if item.satisfies(filter_name, filter_value))) -def _order_by(instance, attribute_name, reverse=False): +def _order_by(self, attribute_name, reverse=False): """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a FilterSet. """ try: - return FilterList(sorted(instance, key=lambda item: getattr(item, attribute_name), reverse=reverse)) + return FilterList(sorted(self, key=lambda item: getattr(item, attribute_name), reverse=reverse)) except AttributeError: raise exceptions.InvalidInputException( - f"An attribute named {attribute_name!r} does not exist on one or more members of {instance!r}." + f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." ) diff --git a/octue/resources/label.py b/octue/resources/label.py new file mode 100644 index 000000000..c0781b75c --- /dev/null +++ b/octue/resources/label.py @@ -0,0 +1,195 @@ +import json +import re +from functools import lru_cache + +from octue.exceptions import InvalidLabelException +from octue.mixins import Filterable, Serialisable +from octue.resources.filter_containers import FilterList, FilterSet +from octue.utils.encoders import OctueJSONEncoder + + +LABEL_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other + elif isinstance(other, Label): + return self.name > other.name + + def __hash__(self): + """ Allow Labels to be contained in a set. """ + return hash(f"{type(self).__name__}{self.name}") + + def __contains__(self, item): + return item in self.name + + def __repr__(self): + return repr(self.name) + + def starts_with(self, value): + """ Does the label start with the given value? """ + return self.name.startswith(value) + + def ends_with(self, value): + """ Does the label end with the given value? """ + return self.name.endswith(value) + + @staticmethod + def _clean(name): + """ Ensure the label name is a string and conforms to the label regex pattern. """ + if not isinstance(name, str): + raise InvalidLabelException("Labels must be expressed as a string.") + + cleaned_name = name.strip() + + if not re.match(LABEL_PATTERN, cleaned_name): + raise InvalidLabelException( + f"Invalid label '{cleaned_name}'. Labels must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " + f"and '-'. They must not start with '-', ':', '/' or '.'" + ) + + return cleaned_name + + +class LabelSet(Serialisable): + """ Class to handle a set of labels as a string. """ + + _FILTERSET_ATTRIBUTE = "labels" + + def __init__(self, labels=None, *args, **kwargs): + """ Construct a LabelSet. """ + # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase + labels = labels or FilterSet() + + # JSON-encoded list of label names, or space-delimited string of label names. + if isinstance(labels, str): + try: + self.labels = FilterSet(Label(label) for label in json.loads(labels)) + except json.decoder.JSONDecodeError: + self.labels = FilterSet(Label(label) for label in labels.strip().split()) + + elif isinstance(labels, LabelSet): + self.labels = FilterSet(labels.labels) + + # Labels can be some other iterable than a list, but each label must be a Label or string. + elif hasattr(labels, "__iter__"): + self.labels = FilterSet(label if isinstance(label, Label) else Label(label) for label in labels) + + else: + raise InvalidLabelException( + "Labels must be expressed as a whitespace-delimited string or an iterable of strings or Label instances." + ) + + def __eq__(self, other): + """ Does this LabelSet have the same labels as another LabelSet? """ + if not isinstance(other, LabelSet): + return False + return self.labels == other.labels + + def __iter__(self): + """ Iterate over the labels in the LabelSet. """ + yield from self.labels + + def __len__(self): + return len(self.labels) + + def __contains__(self, label): + """ Return True if any of the labels exactly matches value, allowing test like `if 'a' in LabelSet('a b')`. """ + if isinstance(label, str): + return Label(label) in self.labels + if isinstance(label, Label): + return label in self.labels + + def __repr__(self): + return f"<{type(self).__name__}({self.labels})>" + + def add_labels(self, *args): + """Adds one or more new label strings to the object labels. New labels will be cleaned and validated.""" + self.labels |= {Label(arg) for arg in args} + + def get_sublabels(self): + """ Return a new LabelSet instance with all the sublabels. """ + return LabelSet(sublabel for label in self for sublabel in label.sublabels) + + def any_label_starts_with(self, value): + """ Implement a startswith method that returns true if any of the labels starts with value """ + return any(label.starts_with(value) for label in self) + + def any_label_ends_with(self, value): + """ Implement an endswith method that returns true if any of the labels endswith value. """ + return any(label.ends_with(value) for label in self) + + def any_label_contains(self, value): + """ Return True if any of the labels contains value. """ + return any(value in label for label in self) + + def filter(self, filter_name=None, filter_value=None): + """Filter the labels with the given filter for the given value. + + :param str filter_name: + :param any filter_value: + :return octue.resources.filter_containers.FilterSet: + """ + return self.labels.filter(filter_name=filter_name, filter_value=filter_value) + + def serialise(self, to_string=False, **kwargs): + """Serialise to a sorted list of label names. + + :param bool to_string: + :return list|str: + """ + string = json.dumps( + sorted(label.name for label in self.labels), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs + ) + + if to_string: + return string + + return json.loads(string) + + @classmethod + def deserialise(cls, serialised_labelset): + """Deserialise from a sorted list of label names. + + :param list serialised_labelset: + :return LabelSet: + """ + return cls(labels=serialised_labelset) diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index d5224fa8e..c1735e0ab 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -141,7 +141,7 @@ def prepare(self, data): for idx, dataset_spec in enumerate(data): self.keys[dataset_spec["key"]] = idx - # TODO generate a unique name based on the filter key, tag datasets so that the tag filters in the spec + # TODO generate a unique name based on the filter key, label datasets so that the label filters in the spec # apply automatically and generate a description of the dataset self.datasets.append(Dataset(logger=self.logger, path_from=self, path=dataset_spec["key"])) diff --git a/octue/templates/template-child-services/parent_service/twine.json b/octue/templates/template-child-services/parent_service/twine.json index ef23b594d..1a6468dfd 100644 --- a/octue/templates/template-child-services/parent_service/twine.json +++ b/octue/templates/template-child-services/parent_service/twine.json @@ -4,13 +4,13 @@ "key": "wind_speed", "purpose": "A service that returns the average wind speed for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:wind_speed" + "filters": "labels:wind_speed" }, { "key": "elevation", "purpose": "A service that returns the elevation for a given latitude and longitude.", "notes": "Some notes.", - "filters": "tags:elevation" + "filters": "labels:elevation" } ], "input_values_schema": { diff --git a/octue/templates/template-python-fractal/fractal/fractal.py b/octue/templates/template-python-fractal/fractal/fractal.py index 1b1b2e50b..fcd84e5f0 100644 --- a/octue/templates/template-python-fractal/fractal/fractal.py +++ b/octue/templates/template-python-fractal/fractal/fractal.py @@ -41,16 +41,16 @@ def fractal(analysis): "height": analysis.configuration_values["height"], } - # We'll add some tags, which will help to improve searchability and allow + # We'll add some labels, which will help to improve searchability and allow # other apps, reports, users and analyses to automatically find figures and # use them. # - # Get descriptive with tags... they are whitespace-delimited and colons can be - # used to provide subtags. Tags are case insensitive, and accept a-z, 0-9, + # Get descriptive with labels... they are whitespace-delimited and colons can be + # used to provide sublabels. Labels are case insensitive, and accept a-z, 0-9, # hyphens and underscores (which can be used literally in search and are also # used to separate words in natural language search). Other special characters # will be stripped. - tags = "contents:fractal:mandelbrot type:figure:surface" + labels = "contents:fractal:mandelbrot type:figure:surface" # Get the output dataset which will be used for storing the figure file(s) output_dataset = analysis.output_manifest.get_dataset("fractal_figure_files") @@ -64,7 +64,7 @@ def fractal(analysis): path="my_mandelbrot_file.json", # File name including extension (and can include subfolders within the dataset) local_path_prefix=output_dataset.path, # TODO set up for the right paths Destination (root of the output dataset folder on the present machine) skip_checks=True, # We haven't created the actual file yet, so it'll definitely fail checks! - tags=tags, + labels=labels, ) # Actually write the contents to the file specified by the Datafile diff --git a/octue/templates/template-using-manifests/app.py b/octue/templates/template-using-manifests/app.py index a9a2796fc..41d5a1e16 100644 --- a/octue/templates/template-using-manifests/app.py +++ b/octue/templates/template-using-manifests/app.py @@ -35,8 +35,8 @@ def run(analysis, *args, **kwargs): # capabilities. Let's get the metadata and the timeseries files, whilst showing off a couple of the filters. # # See the Dataset class help for more. - metadata_file = input_dataset.get_file_by_tag("meta") - timeseries_files = input_dataset.get_file_sequence("tags__contains", filter_value="timeseries") + metadata_file = input_dataset.get_file_by_label("meta") + timeseries_files = input_dataset.get_file_sequence("labels__contains", filter_value="timeseries") # # We used these because they're special helpers - in this case ensuring that there's only one metadata file and # ensuring that the timeseries files come in a strictly ordered sequence. @@ -45,7 +45,7 @@ def run(analysis, *args, **kwargs): # metadata_files = input_dataset.get_files("name__icontains", filter_value="meta") # # There's generally a few ways to do it. Choose one which is likely to be most consistent - for example if your - # filenames might be subject to change, but you have better control over the tags, rely on those. + # filenames might be subject to change, but you have better control over the labels, rely on those. # At this point it's over to you, to do whatever you want with the contents of these files. # For this example app, we will: @@ -63,16 +63,16 @@ def run(analysis, *args, **kwargs): # course, because we haven't done the processing yet)... output_dataset = analysis.output_manifest.get_dataset("cleaned_met_mast_data") - # We'll add tags to the output dataset, which will help to improve searchability and allow + # We'll add labels to the output dataset, which will help to improve searchability and allow # other apps, reports, users and analyses to automatically find figures and # use them. # - # Get descriptive with tags... they are whitespace-delimited and colons can be - # used to provide subtags. Tags are case insensitive, and accept a-z, 0-9, + # Get descriptive with labels... they are whitespace-delimited and colons can be + # used to provide sublabels. Labels are case insensitive, and accept a-z, 0-9, # hyphens and underscores (which can be used literally in search and are also # used to separate words in natural language search). Other special characters # will be stripped. - output_dataset.tags = "met mast cleaned" + output_dataset.labels = "met mast cleaned" # Create a Datafile to hold the concatenated, cleaned output data. We could put it in the current directory # (by leaving local_path_prefix unspecified) but it makes sense to put it in a folder specific to this output @@ -83,7 +83,7 @@ def run(analysis, *args, **kwargs): path="cleaned.csv", path_from=output_dataset, # Tells it where it should be stored, in this case the output dataset folder skip_checks=True, # We haven't created the actual file yet, so checks would definitely fail! - tags="timeseries", + labels="timeseries", ) # Write the file (now we know where to write it) @@ -97,5 +97,5 @@ def run(analysis, *args, **kwargs): # all :) # # If you're running this on your local machine, that's it - but when this code runs as an analysis in the cloud, - # The files in the output manifest are copied into the cloud store. Their names and tags are registered in a search + # The files in the output manifest are copied into the cloud store. Their names and labels are registered in a search # index so your colleagues can find the dataset you've produced. diff --git a/octue/templates/template-using-manifests/data/input/manifest.json b/octue/templates/template-using-manifests/data/input/manifest.json index 7eb991fc4..b22466621 100644 --- a/octue/templates/template-using-manifests/data/input/manifest.json +++ b/octue/templates/template-using-manifests/data/input/manifest.json @@ -7,14 +7,16 @@ { "id": "7ead4669-8162-4f64-8cd5-4abe92509e17", "name": "meteorological mast dataset", - "tags": ["met", "mast", "wind", "location:108346"], + "tags": {}, + "labels": ["met", "mast", "wind", "location:108346"], "files": [ { "path": "08DEC/High Res Meteorological Mast Data - 8 Dec_1.csv", "cluster": 0, "sequence": 0, "extension": "csv", - "tags": ["timeseries"], + "tags": {}, + "labels": ["timeseries"], "timestamp": 1605783547.0, "id": "acff07bc-7c19-4ed5-be6d-a6546eae8e86", "name": "High Res Meteorological Mast Data - 8 Dec_1.csv", @@ -26,7 +28,8 @@ "cluster": 0, "sequence": 1, "extension": "csv", - "tags": ["timeseries"], + "tags": {}, + "labels": ["timeseries"], "timestamp": 1605783547.0, "id": "bdff07bc-7c19-4ed5-be6d-a6546eae8e45", "name": "High Res Meteorological Mast Data - 8 Dec_2.csv", @@ -38,7 +41,8 @@ "cluster": 1, "sequence": 0, "extension": "dat", - "tags": ["meta"], + "tags": {}, + "labels": ["meta"], "timestamp": 1605783547.0, "id": "ceff07bc-7c19-4ed5-be6d-a6546eae8e86", "name": "meta - 8 Dec_1.da", diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 14242c99c..8c4e7a6fa 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -1,6 +1,6 @@ from octue import exceptions from octue.mixins.filterable import Filterable -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet from tests.base import BaseTestCase @@ -124,21 +124,21 @@ def test_iterable_filters(self): self.assertTrue(filterable_thing.satisfies("iterable__is_not", None)) self.assertFalse(filterable_thing.satisfies("iterable__is_not", iterable)) - def test_tag_set_filters(self): - """ Test the filters for TagSet. """ - filterable_thing = FilterableSubclass(iterable=TagSet({"fred", "charlie"})) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_contains", "a")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_contains", "z")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_contains", "z")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_contains", "a")) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_starts_with", "f")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_starts_with", "e")) - self.assertTrue(filterable_thing.satisfies("iterable__any_tag_ends_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__any_tag_ends_with", "i")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_starts_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_starts_with", "f")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_tag_ends_with", "i")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_tag_ends_with", "e")) + def test_label_set_filters(self): + """ Test the filters for Labelset. """ + filterable_thing = FilterableSubclass(iterable=LabelSet({"fred", "charlie"})) + self.assertTrue(filterable_thing.satisfies("iterable__any_label_contains", "a")) + self.assertFalse(filterable_thing.satisfies("iterable__any_label_contains", "z")) + self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_contains", "z")) + self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_contains", "a")) + self.assertTrue(filterable_thing.satisfies("iterable__any_label_starts_with", "f")) + self.assertFalse(filterable_thing.satisfies("iterable__any_label_starts_with", "e")) + self.assertTrue(filterable_thing.satisfies("iterable__any_label_ends_with", "e")) + self.assertFalse(filterable_thing.satisfies("iterable__any_label_ends_with", "i")) + self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_starts_with", "e")) + self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_starts_with", "f")) + self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_ends_with", "i")) + self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_ends_with", "e")) def test_filtering_different_attributes_on_same_instance(self): """ Ensure all filterable attributes on an instance can be checked for filter satisfaction. """ diff --git a/tests/mixins/test_labellable.py b/tests/mixins/test_labellable.py new file mode 100644 index 000000000..f2974efe3 --- /dev/null +++ b/tests/mixins/test_labellable.py @@ -0,0 +1,87 @@ +from octue import exceptions +from octue.mixins import Labelable, MixinBase +from octue.resources.label import Label, LabelSet +from ..base import BaseTestCase + + +class MyLabelable(Labelable, MixinBase): + pass + + +class LabelableTestCase(BaseTestCase): + def test_instantiates(self): + """Ensures the class instantiates without arguments""" + Labelable() + + def test_instantiates_with_labels(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable = MyLabelable(labels="") + self.assertEqual(len(labelable.labels), 0) + + labelable = MyLabelable(labels=None) + self.assertEqual(len(labelable.labels), 0) + + labelable = MyLabelable(labels="a b c") + self.assertEqual(set(labelable.labels), {Label("a"), Label("b"), Label("c")}) + + def test_instantiates_with_label_set(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable_1 = MyLabelable(labels="") + self.assertIsInstance(labelable_1.labels, LabelSet) + labelable_2 = MyLabelable(labels=labelable_1.labels) + self.assertFalse(labelable_1 is labelable_2) + + def test_fails_to_instantiates_with_non_iterable(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + + class NoIter: + pass + + with self.assertRaises(exceptions.InvalidLabelException) as error: + MyLabelable(labels=NoIter()) + + self.assertIn( + "Labels must be expressed as a whitespace-delimited string or an iterable of strings", + error.exception.args[0], + ) + + def test_reset_labels(self): + """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" + labelable = MyLabelable(labels="a b") + labelable.labels = "b c" + self.assertEqual(set(labelable.labels), {Label("b"), Label("c")}) + + def test_valid_labels(self): + """Ensures valid labels do not raise an error""" + labelable = MyLabelable() + labelable.add_labels("a-valid-label") + labelable.add_labels("a:label") + labelable.add_labels("a:-label") # <--- yes, this is valid deliberately as it allows people to do negation + labelable.add_labels("a1829tag") + labelable.add_labels("1829") + labelable.add_labels("number:1829") + labelable.add_labels("multiple:discriminators:used") + self.assertEqual( + set(labelable.labels), + { + Label("a-valid-label"), + Label("a:label"), + Label("a:-label"), + Label("a1829tag"), + Label("1829"), + Label("number:1829"), + Label("multiple:discriminators:used"), + }, + ) + + def test_mixture_valid_invalid(self): + """Ensures that adding a variety of labels, some of which are invalid, doesn't partially add them to the object""" + labelable = MyLabelable() + labelable.add_labels("first-valid-should-be-added") + try: + labelable.add_labels("second-valid-should-not-be-added-because", "-the-third-is-invalid:") + + except exceptions.InvalidLabelException: + pass + + self.assertEqual({Label("first-valid-should-be-added")}, set(labelable.labels)) diff --git a/tests/mixins/test_taggable.py b/tests/mixins/test_taggable.py deleted file mode 100644 index 597b17757..000000000 --- a/tests/mixins/test_taggable.py +++ /dev/null @@ -1,86 +0,0 @@ -from octue import exceptions -from octue.mixins import MixinBase, Taggable -from octue.resources.tag import Tag, TagSet -from ..base import BaseTestCase - - -class MyTaggable(Taggable, MixinBase): - pass - - -class TaggableTestCase(BaseTestCase): - def test_instantiates(self): - """Ensures the class instantiates without arguments""" - Taggable() - - def test_instantiates_with_tags(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable = MyTaggable(tags="") - self.assertEqual(len(taggable.tags), 0) - - taggable = MyTaggable(tags=None) - self.assertEqual(len(taggable.tags), 0) - - taggable = MyTaggable(tags="a b c") - self.assertEqual(set(taggable.tags), {Tag("a"), Tag("b"), Tag("c")}) - - def test_instantiates_with_tag_set(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable_1 = MyTaggable(tags="") - self.assertIsInstance(taggable_1.tags, TagSet) - taggable_2 = MyTaggable(tags=taggable_1.tags) - self.assertFalse(taggable_1 is taggable_2) - - def test_fails_to_instantiates_with_non_iterable(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - - class NoIter: - pass - - with self.assertRaises(exceptions.InvalidTagException) as error: - MyTaggable(tags=NoIter()) - - self.assertIn( - "Tags must be expressed as a whitespace-delimited string or an iterable of strings", error.exception.args[0] - ) - - def test_reset_tags(self): - """Ensures datafile inherits correctly from the Taggable class and passes arguments through""" - taggable = MyTaggable(tags="a b") - taggable.tags = "b c" - self.assertEqual(set(taggable.tags), {Tag("b"), Tag("c")}) - - def test_valid_tags(self): - """Ensures valid tags do not raise an error""" - taggable = MyTaggable() - taggable.add_tags("a-valid-tag") - taggable.add_tags("a:tag") - taggable.add_tags("a:-tag") # <--- yes, this is valid deliberately as it allows people to do negation - taggable.add_tags("a1829tag") - taggable.add_tags("1829") - taggable.add_tags("number:1829") - taggable.add_tags("multiple:discriminators:used") - self.assertEqual( - set(taggable.tags), - { - Tag("a-valid-tag"), - Tag("a:tag"), - Tag("a:-tag"), - Tag("a1829tag"), - Tag("1829"), - Tag("number:1829"), - Tag("multiple:discriminators:used"), - }, - ) - - def test_mixture_valid_invalid(self): - """Ensures that adding a variety of tags, some of which are invalid, doesn't partially add them to the object""" - taggable = MyTaggable() - taggable.add_tags("first-valid-should-be-added") - try: - taggable.add_tags("second-valid-should-not-be-added-because", "-the-third-is-invalid:") - - except exceptions.InvalidTagException: - pass - - self.assertEqual({Tag("first-valid-should-be-added")}, set(taggable.tags)) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index c08012cec..b34cb5d27 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -10,7 +10,7 @@ from octue.cloud.storage import GoogleCloudStorageClient from octue.mixins import MixinBase, Pathable from octue.resources.datafile import TEMPORARY_LOCAL_FILE_CACHE, Datafile -from octue.resources.tag import TagSet +from octue.resources.label import LabelSet from tests import TEST_BUCKET_NAME, TEST_PROJECT_NAME from ..base import BaseTestCase @@ -150,7 +150,7 @@ def test_serialisable(self): "path", "timestamp", "sequence", - "tags", + "labels", "_cloud_metadata", } @@ -190,7 +190,7 @@ def test_from_cloud_with_bare_file(self): self.assertEqual(datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}") self.assertEqual(datafile.cluster, 0) self.assertEqual(datafile.sequence, None) - self.assertEqual(datafile.tags, TagSet()) + self.assertEqual(datafile.labels, LabelSet()) self.assertTrue(isinstance(datafile.size_bytes, int)) self.assertTrue(isinstance(datafile._last_modified, float)) self.assertTrue(isinstance(datafile.hash_value, str)) @@ -201,7 +201,7 @@ def test_from_cloud_with_datafile(self): timestamp=datetime.now(tz=timezone.utc), cluster=0, sequence=1, - tags={"blah:shah:nah", "blib", "glib"}, + labels={"blah:shah:nah", "blib", "glib"}, ) downloaded_datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) @@ -212,7 +212,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) - self.assertEqual(downloaded_datafile.tags, datafile.tags) + self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -500,12 +500,12 @@ def test_from_datafile_as_context_manager(self): self.assertNotEqual(original_content, new_contents) with Datafile.from_cloud(project_name, bucket_name, path_in_bucket, mode="w") as (datafile, f): - datafile.add_tags("blue") + datafile.add_labels("blue") f.write(new_contents) # Check that the cloud metadata has been updated. re_downloaded_datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) - self.assertTrue("blue" in re_downloaded_datafile.tags) + self.assertTrue("blue" in re_downloaded_datafile.labels) # The file cache must be cleared so the modified cloud file is downloaded. re_downloaded_datafile.clear_from_file_cache() diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 96660f1df..028a1dbaa 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -21,7 +21,7 @@ def test_instantiates_with_no_args(self): def test_instantiates_with_kwargs(self): """Ensures that keyword arguments can be used to construct the dataset initially""" files = [Datafile(path="path-within-dataset/a_test_file.csv")] - resource = Dataset(files=files, tags="one two") + resource = Dataset(files=files, labels="one two") self.assertEqual(len(resource.files), 1) def test_len(self): @@ -55,7 +55,7 @@ def test_add_single_file_to_empty_dataset(self): def test_add_single_file_to_existing_dataset(self): """Ensures that when a dataset is not empty, it can be added to""" files = [Datafile(path="path-within-dataset/a_test_file.csv")] - resource = Dataset(files=files, tags="one two") + resource = Dataset(files=files, labels="one two") resource.add(Datafile(path="path-within-dataset/a_test_file.csv")) self.assertEqual(len(resource.files), 2) @@ -149,53 +149,53 @@ def test_filter_name_with(self): files = resource.files.filter("name__ends_with", filter_value="other.csv") self.assertEqual(0, len(files)) - def test_filter_by_tag(self): - """Ensures that filter works with tag lookups""" + def test_filter_by_label(self): + """Ensures that filter works with label lookups""" resource = Dataset( files=[ - Datafile(path="path-within-dataset/a_my_file.csv", tags="one a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="two a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="three all"), + Datafile(path="path-within-dataset/a_my_file.csv", labels="one a:2 b:3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="two a:2 b:3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="three all"), ] ) - files = resource.files.filter("tags__contains", filter_value="a") + files = resource.files.filter("labels__contains", filter_value="a") self.assertEqual(0, len(files)) - files = resource.files.filter("tags__contains", filter_value="one") + files = resource.files.filter("labels__contains", filter_value="one") self.assertEqual(1, len(files)) - files = resource.files.filter("tags__contains", filter_value="all") + files = resource.files.filter("labels__contains", filter_value="all") self.assertEqual(3, len(files)) - files = resource.files.filter("tags__any_tag_starts_with", filter_value="b") + files = resource.files.filter("labels__any_label_starts_with", filter_value="b") self.assertEqual(2, len(files)) - files = resource.files.filter("tags__any_tag_ends_with", filter_value="3") + files = resource.files.filter("labels__any_label_ends_with", filter_value="3") self.assertEqual(2, len(files)) - # files = resource.files.filter("tags__contains", filter_value="hre") + # files = resource.files.filter("labels__contains", filter_value="hre") # self.assertEqual(1, len(files)) - def test_get_file_by_tag(self): - """Ensures that get_files works with tag lookups""" + def test_get_file_by_label(self): + """Ensures that get_files works with label lookups""" files = [ - Datafile(path="path-within-dataset/a_my_file.csv", tags="one a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="two a:2 b:3 all"), - Datafile(path="path-within-dataset/a_your_file.csv", tags="three all"), + Datafile(path="path-within-dataset/a_my_file.csv", labels="one a:2 b:3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="two a:2 b:3 all"), + Datafile(path="path-within-dataset/a_your_file.csv", labels="three all"), ] resource = Dataset(files=files) # Check working for single result - self.assertIs(resource.get_file_by_tag("three"), files[2]) + self.assertIs(resource.get_file_by_label("three"), files[2]) # Check raises for too many results with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - resource.get_file_by_tag("all") + resource.get_file_by_label("all") self.assertIn("More than one result found", e.exception.args[0]) # Check raises for no result with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - resource.get_file_by_tag("billyjeanisnotmylover") + resource.get_file_by_label("billyjeanisnotmylover") - self.assertIn("No files found with this tag", e.exception.args[0]) + self.assertIn("No files found with this label", e.exception.args[0]) def test_filter_by_sequence_not_none(self): """Ensures that filter works with sequence lookups""" @@ -316,8 +316,8 @@ def test_from_cloud(self): dataset = Dataset( name="dataset_0", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), }, ) @@ -333,7 +333,7 @@ def test_from_cloud(self): self.assertEqual(persisted_dataset.id, dataset.id) self.assertEqual(persisted_dataset.name, dataset.name) self.assertEqual(persisted_dataset.hash_value, dataset.hash_value) - self.assertEqual(persisted_dataset.tags, dataset.tags) + self.assertEqual(persisted_dataset.labels, dataset.labels) self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files}) for file in persisted_dataset: @@ -358,8 +358,8 @@ def test_to_cloud(self): dataset = Dataset( files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), } ) diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py new file mode 100644 index 000000000..d512f5cec --- /dev/null +++ b/tests/resources/test_label.py @@ -0,0 +1,204 @@ +from octue import exceptions +from octue.resources.filter_containers import FilterList, FilterSet +from octue.resources.label import Label, LabelSet +from tests.base import BaseTestCase + + +class TestLabel(BaseTestCase): + def test_invalid_labels_cause_error(self): + """Test that invalid labels cause an error to be raised.""" + for label in ":a", "@", "a_b", "-bah", "humbug:", r"back\slashy", {"not-a": "string"}, "/a", "a/", "blah:3.5.": + with self.assertRaises(exceptions.InvalidLabelException): + Label(label) + + def test_valid_labels(self): + """Test that valid labels instantiate as expected.""" + for label in "hello", "hello:world", "hello-world:goodbye", "HELLO-WORLD", "Asia/Pacific", "blah:3.5": + Label(label) + + def test_sublabels(self): + """ Test that sublabels are correctly parsed from labels. """ + self.assertEqual(Label("a:b:c").sublabels, FilterList([Label("a"), Label("b"), Label("c")])) + + def test_label_comparison(self): + """ Test that labels can be alphabetically compared. """ + self.assertTrue(Label("a") < Label("b")) + self.assertTrue(Label("b") > Label("a")) + self.assertTrue(Label("a") != Label("b")) + self.assertTrue(Label("a") == Label("a")) + + def test_label_comparison_with_strings(self): + """ Test that labels can be alphabetically compared with strings in both directions. """ + self.assertTrue(Label("a") < "b") + self.assertTrue(Label("b") > "a") + self.assertTrue(Label("a") != "b") + self.assertTrue(Label("a") == "a") + self.assertTrue("b" > Label("a")) + self.assertTrue("a" < Label("b")) + self.assertTrue("b" != Label("a")) + self.assertTrue("a" == Label("a")) + + def test_labels_compare_unequal_to_non_str_or_label_types(self): + """ Test that comparing for equality a Label with a non-string-or-Label type returns False. """ + self.assertFalse(Label("a") == 1) + self.assertTrue(Label("a") != 1) + + def test_contains(self): + """ Test that labels can be checked for containment. """ + self.assertIn("e", Label("hello")) + + def test_starts_with(self): + """ Test that the start of a label can be checked. """ + self.assertTrue(Label("hello").starts_with("h")) + self.assertFalse(Label("hello").starts_with("e")) + + def test_sublabels_starts_with(self): + """ Test that the start of sublabels can be checked. """ + self.assertTrue(LabelSet(Label("hello:world").sublabels).any_label_starts_with("w")) + self.assertFalse(LabelSet(Label("hello:world").sublabels).any_label_starts_with("e")) + + def test_ends_with(self): + """ Test that the end of a label can be checked. """ + self.assertTrue(Label("hello").ends_with("o")) + self.assertFalse(Label("hello").ends_with("e")) + + def test_sublabels_ends_with(self): + """ Test that the end of sublabels can be checked. """ + self.assertTrue(LabelSet(Label("hello:world").sublabels).any_label_ends_with("o")) + self.assertFalse(LabelSet(Label("hello:world").sublabels).any_label_ends_with("e")) + + +class TestLabelSet(BaseTestCase): + LABEL_SET = LabelSet(labels="a b:c d:e:f") + + def test_instantiation_from_space_delimited_string(self): + """ Test that a LabelSet can be instantiated from a space-delimited string of label names.""" + label_set = LabelSet(labels="a b:c d:e:f") + self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + + def test_instantiation_from_iterable_of_strings(self): + """ Test that a LabelSet can be instantiated from an iterable of strings.""" + label_set = LabelSet(labels=["a", "b:c", "d:e:f"]) + self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + + def test_instantiation_from_iterable_of_labels(self): + """ Test that a LabelSet can be instantiated from an iterable of labels.""" + label_set = LabelSet(labels=[Label("a"), Label("b:c"), Label("d:e:f")]) + self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + + def test_instantiation_from_filter_set_of_strings(self): + """ Test that a LabelSet can be instantiated from a FilterSet of strings.""" + label_set = LabelSet(labels=FilterSet({"a", "b:c", "d:e:f"})) + self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + + def test_instantiation_from_filter_set_of_labels(self): + """ Test that a LabelSet can be instantiated from a FilterSet of labels.""" + label_set = LabelSet(labels=FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + + def test_instantiation_from_label_set(self): + """ Test that a LabelSet can be instantiated from another LabelSet. """ + self.assertEqual(self.LABEL_SET, LabelSet(self.LABEL_SET)) + + def test_equality(self): + """ Ensure two LabelSets with the same labels compare equal. """ + self.assertTrue(self.LABEL_SET == LabelSet(labels="a b:c d:e:f")) + + def test_inequality(self): + """ Ensure two LabelSets with different labels compare unequal. """ + self.assertTrue(self.LABEL_SET != LabelSet(labels="a")) + + def test_non_label_sets_compare_unequal_to_label_sets(self): + """ Ensure a LabelSet and a non-LabelSet compare unequal. """ + self.assertFalse(self.LABEL_SET == "a") + self.assertTrue(self.LABEL_SET != "a") + + def test_iterating_over(self): + """ Ensure a LabelSet can be iterated over. """ + self.assertEqual(set(self.LABEL_SET), {Label("a"), Label("b:c"), Label("d:e:f")}) + + def test_contains_with_string(self): + """ Ensure we can check that a LabelSet has a certain label using a string form. """ + self.assertTrue("d:e:f" in self.LABEL_SET) + self.assertFalse("hello" in self.LABEL_SET) + + def test_contains_with_label(self): + """ Ensure we can check that a LabelSet has a certain label. """ + self.assertTrue(Label("d:e:f") in self.LABEL_SET) + self.assertFalse(Label("hello") in self.LABEL_SET) + + def test_contains_only_matches_full_labels(self): + """ Test that the has_label method only matches full labels (i.e. that it doesn't match sublabels or parts of labels.""" + for label in "a", "b:c", "d:e:f": + self.assertTrue(label in self.LABEL_SET) + + for label in "b", "c", "d", "e", "f": + self.assertFalse(label in self.LABEL_SET) + + def test_get_sublabels(self): + """ Test sublabels can be accessed as a new LabelSet. """ + self.assertEqual(LabelSet("meta:sys2:3456 blah").get_sublabels(), LabelSet("meta sys2 3456 blah")) + + def test_any_label_starts_with(self): + """ Ensure starts_with only checks the starts of labels, and doesn't check the starts of sublabels. """ + for label in "a", "b", "d": + self.assertTrue(self.LABEL_SET.any_label_starts_with(label)) + + for label in "c", "e", "f": + self.assertFalse(self.LABEL_SET.any_label_starts_with(label)) + + def test_any_label_ends_swith(self): + """ Ensure ends_with doesn't check ends of sublabels. """ + for label in "a", "c", "f": + self.assertTrue(self.LABEL_SET.any_label_ends_with(label)) + + for label in "b", "d", "e": + self.assertFalse(self.LABEL_SET.any_label_ends_with(label)) + + def test_any_label_contains_searches_for_labels_and_sublabels(self): + """ Ensure labels and sublabels can be searched for. """ + for label in "a", "b", "d": + self.assertTrue(self.LABEL_SET.any_label_contains(label)) + + for sublabel in "c", "e", "f": + self.assertTrue(self.LABEL_SET.any_label_contains(sublabel)) + + def test_filter(self): + """ Test that label sets can be filtered. """ + label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") + self.assertEqual( + label_set.labels.filter("name__starts_with", "meta"), + FilterSet({Label("meta:sys1:1234"), Label("meta:sys2:3456"), Label("meta:sys2:55")}), + ) + + def test_filter_chaining(self): + """ Test that filters can be chained. """ + label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") + + filtered_labels_1 = label_set.labels.filter("name__starts_with", "meta") + self.assertEqual(filtered_labels_1, LabelSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55").labels) + + filtered_labels_2 = filtered_labels_1.filter("name__contains", "sys2") + self.assertEqual(filtered_labels_2, LabelSet("meta:sys2:3456 meta:sys2:55").labels) + + filtered_labels_3 = filtered_labels_1.filter("name__equals", "meta:sys2:55") + self.assertEqual(filtered_labels_3, LabelSet("meta:sys2:55").labels) + + def test_serialise(self): + """ Ensure that LabelSets are serialised to the string form of a list. """ + self.assertEqual(self.LABEL_SET.serialise(), ["a", "b:c", "d:e:f"]) + + def test_serialise_orders_labels(self): + """Ensure that LabelSets serialise to a list.""" + label_set = LabelSet("z hello a c:no") + self.assertEqual(label_set.serialise(), ["a", "c:no", "hello", "z"]) + + def test_deserialise(self): + """Test that serialisation is reversible.""" + serialised_label_set = self.LABEL_SET.serialise() + deserialised_label_set = LabelSet.deserialise(serialised_label_set) + self.assertEqual(deserialised_label_set, self.LABEL_SET) + + def test_repr(self): + """Test the representation of a LabelSet appears as expected.""" + self.assertEqual(repr(self.LABEL_SET), f"") diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index 3ae3c6acd..e6b394da0 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -70,8 +70,8 @@ def test_to_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), }, ) @@ -109,8 +109,8 @@ def test_to_cloud_without_storing_datasets(self): name="my-dataset", path=temporary_directory, files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), }, ) @@ -148,8 +148,8 @@ def test_from_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, tags={"hello"}), - Datafile(path=file_1_path, sequence=1, tags={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), }, ) diff --git a/tests/resources/test_tag.py b/tests/resources/test_tag.py deleted file mode 100644 index 84578e099..000000000 --- a/tests/resources/test_tag.py +++ /dev/null @@ -1,204 +0,0 @@ -from octue import exceptions -from octue.resources.filter_containers import FilterList, FilterSet -from octue.resources.tag import Tag, TagSet -from tests.base import BaseTestCase - - -class TestTag(BaseTestCase): - def test_invalid_tags_cause_error(self): - """Test that invalid tags cause an error to be raised.""" - for tag in ":a", "@", "a_b", "-bah", "humbug:", r"back\slashy", {"not-a": "string"}, "/a", "a/", "blah:3.5.": - with self.assertRaises(exceptions.InvalidTagException): - Tag(tag) - - def test_valid_tags(self): - """Test that valid tags instantiate as expected.""" - for tag in "hello", "hello:world", "hello-world:goodbye", "HELLO-WORLD", "Asia/Pacific", "blah:3.5": - Tag(tag) - - def test_subtags(self): - """ Test that subtags are correctly parsed from tags. """ - self.assertEqual(Tag("a:b:c").subtags, FilterList([Tag("a"), Tag("b"), Tag("c")])) - - def test_tag_comparison(self): - """ Test that tags can be alphabetically compared. """ - self.assertTrue(Tag("a") < Tag("b")) - self.assertTrue(Tag("b") > Tag("a")) - self.assertTrue(Tag("a") != Tag("b")) - self.assertTrue(Tag("a") == Tag("a")) - - def test_tag_comparison_with_strings(self): - """ Test that tags can be alphabetically compared with strings in both directions. """ - self.assertTrue(Tag("a") < "b") - self.assertTrue(Tag("b") > "a") - self.assertTrue(Tag("a") != "b") - self.assertTrue(Tag("a") == "a") - self.assertTrue("b" > Tag("a")) - self.assertTrue("a" < Tag("b")) - self.assertTrue("b" != Tag("a")) - self.assertTrue("a" == Tag("a")) - - def test_tags_compare_unequal_to_non_str_or_tag_types(self): - """ Test that comparing for equality a Tag with a non-string-or-Tag type returns False. """ - self.assertFalse(Tag("a") == 1) - self.assertTrue(Tag("a") != 1) - - def test_contains(self): - """ Test that tags can be checked for containment. """ - self.assertIn("e", Tag("hello")) - - def test_starts_with(self): - """ Test that the start of a tag can be checked. """ - self.assertTrue(Tag("hello").starts_with("h")) - self.assertFalse(Tag("hello").starts_with("e")) - - def test_subtags_starts_with(self): - """ Test that the start of subtags can be checked. """ - self.assertTrue(TagSet(Tag("hello:world").subtags).any_tag_starts_with("w")) - self.assertFalse(TagSet(Tag("hello:world").subtags).any_tag_starts_with("e")) - - def test_ends_with(self): - """ Test that the end of a tag can be checked. """ - self.assertTrue(Tag("hello").ends_with("o")) - self.assertFalse(Tag("hello").ends_with("e")) - - def test_subtags_ends_with(self): - """ Test that the end of subtags can be checked. """ - self.assertTrue(TagSet(Tag("hello:world").subtags).any_tag_ends_with("o")) - self.assertFalse(TagSet(Tag("hello:world").subtags).any_tag_ends_with("e")) - - -class TestTagSet(BaseTestCase): - TAG_SET = TagSet(tags="a b:c d:e:f") - - def test_instantiation_from_space_delimited_string(self): - """ Test that a TagSet can be instantiated from a space-delimited string of tag names.""" - tag_set = TagSet(tags="a b:c d:e:f") - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_iterable_of_strings(self): - """ Test that a TagSet can be instantiated from an iterable of strings.""" - tag_set = TagSet(tags=["a", "b:c", "d:e:f"]) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_iterable_of_tags(self): - """ Test that a TagSet can be instantiated from an iterable of Tags.""" - tag_set = TagSet(tags=[Tag("a"), Tag("b:c"), Tag("d:e:f")]) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_filter_set_of_strings(self): - """ Test that a TagSet can be instantiated from a FilterSet of strings.""" - tag_set = TagSet(tags=FilterSet({"a", "b:c", "d:e:f"})) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_filter_set_of_tags(self): - """ Test that a TagSet can be instantiated from a FilterSet of Tags.""" - tag_set = TagSet(tags=FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - self.assertEqual(tag_set.tags, FilterSet({Tag("a"), Tag("b:c"), Tag("d:e:f")})) - - def test_instantiation_from_tag_set(self): - """ Test that a TagSet can be instantiated from another TagSet. """ - self.assertEqual(self.TAG_SET, TagSet(self.TAG_SET)) - - def test_equality(self): - """ Ensure two TagSets with the same tags compare equal. """ - self.assertTrue(self.TAG_SET == TagSet(tags="a b:c d:e:f")) - - def test_inequality(self): - """ Ensure two TagSets with different tags compare unequal. """ - self.assertTrue(self.TAG_SET != TagSet(tags="a")) - - def test_non_tag_sets_compare_unequal_to_tag_sets(self): - """ Ensure a TagSet and a non-TagSet compare unequal. """ - self.assertFalse(self.TAG_SET == "a") - self.assertTrue(self.TAG_SET != "a") - - def test_iterating_over(self): - """ Ensure a TagSet can be iterated over. """ - self.assertEqual(set(self.TAG_SET), {Tag("a"), Tag("b:c"), Tag("d:e:f")}) - - def test_contains_with_string(self): - """ Ensure we can check that a TagSet has a certain tag using a string form. """ - self.assertTrue("d:e:f" in self.TAG_SET) - self.assertFalse("hello" in self.TAG_SET) - - def test_contains_with_tag(self): - """ Ensure we can check that a TagSet has a certain tag. """ - self.assertTrue(Tag("d:e:f") in self.TAG_SET) - self.assertFalse(Tag("hello") in self.TAG_SET) - - def test_contains_only_matches_full_tags(self): - """ Test that the has_tag method only matches full tags (i.e. that it doesn't match subtags or parts of tags.""" - for tag in "a", "b:c", "d:e:f": - self.assertTrue(tag in self.TAG_SET) - - for tag in "b", "c", "d", "e", "f": - self.assertFalse(tag in self.TAG_SET) - - def test_get_subtags(self): - """ Test subtags can be accessed as a new TagSet. """ - self.assertEqual(TagSet("meta:sys2:3456 blah").get_subtags(), TagSet("meta sys2 3456 blah")) - - def test_any_tag_starts_with(self): - """ Ensure starts_with only checks the starts of tags, and doesn't check the starts of subtags. """ - for tag in "a", "b", "d": - self.assertTrue(self.TAG_SET.any_tag_starts_with(tag)) - - for tag in "c", "e", "f": - self.assertFalse(self.TAG_SET.any_tag_starts_with(tag)) - - def test_any_tag_ends_swith(self): - """ Ensure ends_with doesn't check ends of subtags. """ - for tag in "a", "c", "f": - self.assertTrue(self.TAG_SET.any_tag_ends_with(tag)) - - for tag in "b", "d", "e": - self.assertFalse(self.TAG_SET.any_tag_ends_with(tag)) - - def test_any_tag_contains_searches_for_tags_and_subtags(self): - """ Ensure tags and subtags can be searched for. """ - for tag in "a", "b", "d": - self.assertTrue(self.TAG_SET.any_tag_contains(tag)) - - for subtag in "c", "e", "f": - self.assertTrue(self.TAG_SET.any_tag_contains(subtag)) - - def test_filter(self): - """ Test that tag sets can be filtered. """ - tag_set = TagSet(tags="tag1 tag2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") - self.assertEqual( - tag_set.tags.filter("name__starts_with", "meta"), - FilterSet({Tag("meta:sys1:1234"), Tag("meta:sys2:3456"), Tag("meta:sys2:55")}), - ) - - def test_filter_chaining(self): - """ Test that filters can be chained. """ - tag_set = TagSet(tags="tag1 tag2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") - - filtered_tags_1 = tag_set.tags.filter("name__starts_with", "meta") - self.assertEqual(filtered_tags_1, TagSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55").tags) - - filtered_tags_2 = filtered_tags_1.filter("name__contains", "sys2") - self.assertEqual(filtered_tags_2, TagSet("meta:sys2:3456 meta:sys2:55").tags) - - filtered_tags_3 = filtered_tags_1.filter("name__equals", "meta:sys2:55") - self.assertEqual(filtered_tags_3, TagSet("meta:sys2:55").tags) - - def test_serialise(self): - """ Ensure that TagSets are serialised to the string form of a list. """ - self.assertEqual(self.TAG_SET.serialise(), ["a", "b:c", "d:e:f"]) - - def test_serialise_orders_tags(self): - """Ensure that TagSets serialise to a list.""" - tag_set = TagSet("z hello a c:no") - self.assertEqual(tag_set.serialise(), ["a", "c:no", "hello", "z"]) - - def test_deserialise(self): - """Test that serialisation is reversible.""" - serialised_tag_set = self.TAG_SET.serialise() - deserialised_tag_set = TagSet.deserialise(serialised_tag_set) - self.assertEqual(deserialised_tag_set, self.TAG_SET) - - def test_repr(self): - """Test the representation of a TagSet appears as expected.""" - self.assertEqual(repr(self.TAG_SET), f"") From ae8f0ef7226bde1f3f23271ef4e843ca107adde9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Thu, 13 May 2021 21:13:09 +0100 Subject: [PATCH 007/103] REF: Change filtering syntax to filter_name=value --- octue/mixins/filterable.py | 14 +- octue/resources/dataset.py | 14 +- octue/resources/filter_containers.py | 8 +- .../templates/template-using-manifests/app.py | 2 +- tests/mixins/test_filterable.py | 178 +++++++++--------- tests/resources/test_dataset.py | 70 +++---- tests/resources/test_label.py | 8 +- 7 files changed, 142 insertions(+), 152 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index b3a039f9c..b4ec103d6 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -75,8 +75,18 @@ class Filterable: - def satisfies(self, filter_name, filter_value): - """ Check that the instance satisfies the given filter for the given filter value. """ + def satisfies(self, **kwargs): + """Check that the instance satisfies the given filter for the given filter value. + + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the + value to filter for + :return mixed: + """ + if len(kwargs) != 1: + raise ValueError(f"The satisfies method only takes one keyword argument; received {kwargs!r}.") + + filter_name, filter_value = list(kwargs.items())[0] + attribute_name, filter_action = self._split_filter_name(filter_name) try: diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 07fa2c6e5..3b4f37ab2 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -168,7 +168,7 @@ def append(self, *args, **kwargs): ) self.files.add(*args, **kwargs) - def get_files(self, field_lookup, filter_value=None): + def get_files(self, **kwargs): warnings.warn( "The `Dataset.get_files` method has been deprecated and replaced with `Dataset.files.filter`, which has " "the same interface but with the `field_lookup` argument renamed to `filter_name`. Calls to " @@ -176,9 +176,9 @@ def get_files(self, field_lookup, filter_value=None): "in future.", DeprecationWarning, ) - return self.files.filter(filter_name=field_lookup, filter_value=filter_value) + return self.files.filter(**kwargs) - def get_file_sequence(self, filter_name=None, filter_value=None, strict=True): + def get_file_sequence(self, strict=True, **kwargs): """Get an ordered sequence of files matching a criterion Accepts the same search arguments as `get_files`. @@ -192,10 +192,10 @@ def get_file_sequence(self, filter_name=None, filter_value=None, strict=True): """ results = self.files - if filter_name is not None: - results = results.filter(filter_name=filter_name, filter_value=filter_value) + if kwargs: + results = results.filter(**kwargs) - results = results.filter("sequence__is_not", None) + results = results.filter(sequence__is_not=None) def get_sequence_number(file): return file.sequence @@ -221,7 +221,7 @@ def get_file_by_label(self, tag_string): :param tag_string: if this string appears as an exact match in the labels :return: DataFile object """ - results = self.files.filter(filter_name="labels__contains", filter_value=tag_string) + results = self.files.filter(labels__contains=tag_string) if len(results) > 1: raise UnexpectedNumberOfResultsException("More than one result found when searching for a file by label") elif len(results) == 0: diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 1c4244e62..d7328d85f 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,14 +1,14 @@ from octue import exceptions -def _filter(self, filter_name=None, filter_value=None): +def _filter(self, **kwargs): """Returns a new instance containing only the Filterables to which the given filter criteria apply. - :param str filter_name: - :param any filter_value: + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the value + to filter for :return octue.resources.filter_containers.FilterSet: """ - return self.__class__((item for item in self if item.satisfies(filter_name, filter_value))) + return self.__class__((item for item in self if item.satisfies(**kwargs))) def _order_by(self, attribute_name, reverse=False): diff --git a/octue/templates/template-using-manifests/app.py b/octue/templates/template-using-manifests/app.py index 41d5a1e16..91255af44 100644 --- a/octue/templates/template-using-manifests/app.py +++ b/octue/templates/template-using-manifests/app.py @@ -36,7 +36,7 @@ def run(analysis, *args, **kwargs): # # See the Dataset class help for more. metadata_file = input_dataset.get_file_by_label("meta") - timeseries_files = input_dataset.get_file_sequence("labels__contains", filter_value="timeseries") + timeseries_files = input_dataset.get_file_sequence(labels__contains="timeseries") # # We used these because they're special helpers - in this case ensuring that there's only one metadata file and # ensuring that the timeseries files come in a strictly ordered sequence. diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 8c4e7a6fa..0f1e53c33 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -17,136 +17,136 @@ class TestFilterable(BaseTestCase): def test_error_raised_when_invalid_filter_name_received(self): """ Ensure an error is raised when an invalid filter name is provided. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass().satisfies(filter_name="invalid_filter_name", filter_value=None) + FilterableSubclass().satisfies(invalid_filter_name=None) def test_error_raised_when_non_existent_attribute_name_received(self): """ Ensure an error is raised when a non-existent attribute name is used in the filter name. """ with self.assertRaises(AttributeError): - FilterableSubclass().satisfies(filter_name="boogaloo__is_a_dance", filter_value=True) + FilterableSubclass().satisfies(boogaloo__is_a_dance=True) def test_error_raised_when_valid_but_non_existent_filter_name_received(self): """ Ensure an error is raised when a valid but non-existent filter name is received. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass().satisfies(filter_name="age__is_secret", filter_value=True) + FilterableSubclass().satisfies(age__is_secret=True) def test_error_raised_when_attribute_type_has_no_filters_defined(self): """Ensure an error is raised when a filter for an attribute whose type doesn't have any filters defined is received. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass(age=lambda: None).satisfies(filter_name="age__equals", filter_value=True) + FilterableSubclass(age=lambda: None).satisfies(age__equals=True) def test_bool_filters(self): """ Test that the boolean filters work as expected. """ filterable_thing = FilterableSubclass(is_alive=True) - self.assertTrue(filterable_thing.satisfies("is_alive__is", True)) - self.assertFalse(filterable_thing.satisfies("is_alive__is", False)) - self.assertTrue(filterable_thing.satisfies("is_alive__is_not", False)) - self.assertFalse(filterable_thing.satisfies("is_alive__is_not", True)) + self.assertTrue(filterable_thing.satisfies(is_alive__is=True)) + self.assertFalse(filterable_thing.satisfies(is_alive__is=False)) + self.assertTrue(filterable_thing.satisfies(is_alive__is_not=False)) + self.assertFalse(filterable_thing.satisfies(is_alive__is_not=True)) def test_str_filters(self): """ Test that the string filters work as expected. """ filterable_thing = FilterableSubclass(name="Michael") - self.assertTrue(filterable_thing.satisfies("name__icontains", "m")) - self.assertFalse(filterable_thing.satisfies("name__icontains", "d")) - self.assertTrue(filterable_thing.satisfies("name__not_icontains", "d")) - self.assertFalse(filterable_thing.satisfies("name__not_icontains", "m")) - self.assertTrue(filterable_thing.satisfies("name__contains", "M")) - self.assertFalse(filterable_thing.satisfies("name__contains", "d")) - self.assertTrue(filterable_thing.satisfies("name__ends_with", "l")) - self.assertFalse(filterable_thing.satisfies("name__ends_with", "M")) - self.assertTrue(filterable_thing.satisfies("name__not_ends_with", "M")) - self.assertFalse(filterable_thing.satisfies("name__not_ends_with", "l")) - self.assertTrue(filterable_thing.satisfies("name__starts_with", "M")) - self.assertFalse(filterable_thing.satisfies("name__starts_with", "l")) - self.assertTrue(filterable_thing.satisfies("name__not_starts_with", "l")) - self.assertFalse(filterable_thing.satisfies("name__not_starts_with", "M")) - self.assertTrue(filterable_thing.satisfies("name__equals", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__equals", "Clive")) - self.assertTrue(filterable_thing.satisfies("name__not_equals", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__not_equals", "Michael")) - self.assertTrue(filterable_thing.satisfies("name__iequals", "michael")) - self.assertFalse(filterable_thing.satisfies("name__iequals", "James")) - self.assertTrue(filterable_thing.satisfies("name__not_iequals", "James")) - self.assertFalse(filterable_thing.satisfies("name__not_iequals", "michael")) - self.assertTrue(filterable_thing.satisfies("name__is", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__is", "Clive")) - self.assertTrue(filterable_thing.satisfies("name__is_not", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__is_not", "Michael")) - self.assertTrue(filterable_thing.satisfies("name__lt", "Noel")) - self.assertFalse(filterable_thing.satisfies("name__lt", "Harry")) - self.assertTrue(filterable_thing.satisfies("name__lte", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__lte", "Harry")) - self.assertTrue(filterable_thing.satisfies("name__gt", "Clive")) - self.assertFalse(filterable_thing.satisfies("name__gt", "Noel")) - self.assertTrue(filterable_thing.satisfies("name__gte", "Michael")) - self.assertFalse(filterable_thing.satisfies("name__gte", "Noel")) + self.assertTrue(filterable_thing.satisfies(name__icontains="m")) + self.assertFalse(filterable_thing.satisfies(name__icontains="d")) + self.assertTrue(filterable_thing.satisfies(name__not_icontains="d")) + self.assertFalse(filterable_thing.satisfies(name__not_icontains="m")) + self.assertTrue(filterable_thing.satisfies(name__contains="M")) + self.assertFalse(filterable_thing.satisfies(name__contains="d")) + self.assertTrue(filterable_thing.satisfies(name__ends_with="l")) + self.assertFalse(filterable_thing.satisfies(name__ends_with="M")) + self.assertTrue(filterable_thing.satisfies(name__not_ends_with="M")) + self.assertFalse(filterable_thing.satisfies(name__not_ends_with="l")) + self.assertTrue(filterable_thing.satisfies(name__starts_with="M")) + self.assertFalse(filterable_thing.satisfies(name__starts_with="l")) + self.assertTrue(filterable_thing.satisfies(name__not_starts_with="l")) + self.assertFalse(filterable_thing.satisfies(name__not_starts_with="M")) + self.assertTrue(filterable_thing.satisfies(name__equals="Michael")) + self.assertFalse(filterable_thing.satisfies(name__equals="Clive")) + self.assertTrue(filterable_thing.satisfies(name__not_equals="Clive")) + self.assertFalse(filterable_thing.satisfies(name__not_equals="Michael")) + self.assertTrue(filterable_thing.satisfies(name__iequals="michael")) + self.assertFalse(filterable_thing.satisfies(name__iequals="James")) + self.assertTrue(filterable_thing.satisfies(name__not_iequals="James")) + self.assertFalse(filterable_thing.satisfies(name__not_iequals="michael")) + self.assertTrue(filterable_thing.satisfies(name__is="Michael")) + self.assertFalse(filterable_thing.satisfies(name__is="Clive")) + self.assertTrue(filterable_thing.satisfies(name__is_not="Clive")) + self.assertFalse(filterable_thing.satisfies(name__is_not="Michael")) + self.assertTrue(filterable_thing.satisfies(name__lt="Noel")) + self.assertFalse(filterable_thing.satisfies(name__lt="Harry")) + self.assertTrue(filterable_thing.satisfies(name__lte="Michael")) + self.assertFalse(filterable_thing.satisfies(name__lte="Harry")) + self.assertTrue(filterable_thing.satisfies(name__gt="Clive")) + self.assertFalse(filterable_thing.satisfies(name__gt="Noel")) + self.assertTrue(filterable_thing.satisfies(name__gte="Michael")) + self.assertFalse(filterable_thing.satisfies(name__gte="Noel")) def test_none_filters(self): """ Test that the None filters work as expected. """ filterable_thing = FilterableSubclass(owner=None) - self.assertTrue(filterable_thing.satisfies("owner__is", None)) - self.assertFalse(filterable_thing.satisfies("owner__is", True)) - self.assertTrue(filterable_thing.satisfies("owner__is_not", True)) - self.assertFalse(filterable_thing.satisfies("owner__is_not", None)) + self.assertTrue(filterable_thing.satisfies(owner__is=None)) + self.assertFalse(filterable_thing.satisfies(owner__is=True)) + self.assertTrue(filterable_thing.satisfies(owner__is_not=True)) + self.assertFalse(filterable_thing.satisfies(owner__is_not=None)) def test_number_filters_with_integers_and_floats(self): """ Test that the number filters work as expected for integers and floats. """ for age in (5, 5.2): filterable_thing = FilterableSubclass(age=age) - self.assertTrue(filterable_thing.satisfies("age__equals", age)) - self.assertFalse(filterable_thing.satisfies("age__equals", 63)) - self.assertTrue(filterable_thing.satisfies("age__not_equals", 63)) - self.assertFalse(filterable_thing.satisfies("age__not_equals", age)) - self.assertTrue(filterable_thing.satisfies("age__lt", 6)) - self.assertFalse(filterable_thing.satisfies("age__lt", 0)) - self.assertTrue(filterable_thing.satisfies("age__lte", age)) - self.assertFalse(filterable_thing.satisfies("age__lte", 0)) - self.assertTrue(filterable_thing.satisfies("age__gt", 4)) - self.assertFalse(filterable_thing.satisfies("age__gt", 63)) - self.assertTrue(filterable_thing.satisfies("age__gte", age)) - self.assertFalse(filterable_thing.satisfies("age__gte", 63)) - self.assertTrue(filterable_thing.satisfies("age__is", age)) - self.assertFalse(filterable_thing.satisfies("age__is", 63)) - self.assertTrue(filterable_thing.satisfies("age__is_not", 63)) - self.assertFalse(filterable_thing.satisfies("age__is_not", age)) + self.assertTrue(filterable_thing.satisfies(age__equals=age)) + self.assertFalse(filterable_thing.satisfies(age__equals=63)) + self.assertTrue(filterable_thing.satisfies(age__not_equals=63)) + self.assertFalse(filterable_thing.satisfies(age__not_equals=age)) + self.assertTrue(filterable_thing.satisfies(age__lt=6)) + self.assertFalse(filterable_thing.satisfies(age__lt=0)) + self.assertTrue(filterable_thing.satisfies(age__lte=age)) + self.assertFalse(filterable_thing.satisfies(age__lte=0)) + self.assertTrue(filterable_thing.satisfies(age__gt=4)) + self.assertFalse(filterable_thing.satisfies(age__gt=63)) + self.assertTrue(filterable_thing.satisfies(age__gte=age)) + self.assertFalse(filterable_thing.satisfies(age__gte=63)) + self.assertTrue(filterable_thing.satisfies(age__is=age)) + self.assertFalse(filterable_thing.satisfies(age__is=63)) + self.assertTrue(filterable_thing.satisfies(age__is_not=63)) + self.assertFalse(filterable_thing.satisfies(age__is_not=age)) def test_iterable_filters(self): """ Test that the iterable filters work as expected with lists, sets, and tuples. """ for iterable in ([1, 2, 3], {1, 2, 3}, (1, 2, 3)): filterable_thing = FilterableSubclass(iterable=iterable) - self.assertTrue(filterable_thing.satisfies("iterable__contains", 1)) - self.assertFalse(filterable_thing.satisfies("iterable__contains", 5)) - self.assertTrue(filterable_thing.satisfies("iterable__not_contains", 5)) - self.assertFalse(filterable_thing.satisfies("iterable__not_contains", 1)) - self.assertTrue(filterable_thing.satisfies("iterable__is", iterable)) - self.assertFalse(filterable_thing.satisfies("iterable__is", None)) - self.assertTrue(filterable_thing.satisfies("iterable__is_not", None)) - self.assertFalse(filterable_thing.satisfies("iterable__is_not", iterable)) + self.assertTrue(filterable_thing.satisfies(iterable__contains=1)) + self.assertFalse(filterable_thing.satisfies(iterable__contains=5)) + self.assertTrue(filterable_thing.satisfies(iterable__not_contains=5)) + self.assertFalse(filterable_thing.satisfies(iterable__not_contains=1)) + self.assertTrue(filterable_thing.satisfies(iterable__is=iterable)) + self.assertFalse(filterable_thing.satisfies(iterable__is=None)) + self.assertTrue(filterable_thing.satisfies(iterable__is_not=None)) + self.assertFalse(filterable_thing.satisfies(iterable__is_not=iterable)) def test_label_set_filters(self): """ Test the filters for Labelset. """ filterable_thing = FilterableSubclass(iterable=LabelSet({"fred", "charlie"})) - self.assertTrue(filterable_thing.satisfies("iterable__any_label_contains", "a")) - self.assertFalse(filterable_thing.satisfies("iterable__any_label_contains", "z")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_contains", "z")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_contains", "a")) - self.assertTrue(filterable_thing.satisfies("iterable__any_label_starts_with", "f")) - self.assertFalse(filterable_thing.satisfies("iterable__any_label_starts_with", "e")) - self.assertTrue(filterable_thing.satisfies("iterable__any_label_ends_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__any_label_ends_with", "i")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_starts_with", "e")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_starts_with", "f")) - self.assertTrue(filterable_thing.satisfies("iterable__not_any_label_ends_with", "i")) - self.assertFalse(filterable_thing.satisfies("iterable__not_any_label_ends_with", "e")) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_contains="a")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_contains="z")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_contains="z")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_contains="a")) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_starts_with="f")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_starts_with="e")) + self.assertTrue(filterable_thing.satisfies(iterable__any_label_ends_with="e")) + self.assertFalse(filterable_thing.satisfies(iterable__any_label_ends_with="i")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_starts_with="e")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_starts_with="f")) + self.assertTrue(filterable_thing.satisfies(iterable__not_any_label_ends_with="i")) + self.assertFalse(filterable_thing.satisfies(iterable__not_any_label_ends_with="e")) def test_filtering_different_attributes_on_same_instance(self): """ Ensure all filterable attributes on an instance can be checked for filter satisfaction. """ filterable_thing = FilterableSubclass(name="Fred", is_alive=True, iterable={1, 2, 3}, age=5.2, owner=None) - self.assertTrue(filterable_thing.satisfies("name__icontains", "f")) - self.assertTrue(filterable_thing.satisfies("name__not_icontains", "j")) - self.assertFalse(filterable_thing.satisfies("is_alive__is", False)) - self.assertTrue(filterable_thing.satisfies("iterable__contains", 3)) - self.assertTrue(filterable_thing.satisfies("age__equals", 5.2)) - self.assertTrue(filterable_thing.satisfies("age__not_equals", 5)) - self.assertTrue(filterable_thing.satisfies("owner__is", None)) + self.assertTrue(filterable_thing.satisfies(name__icontains="f")) + self.assertTrue(filterable_thing.satisfies(name__not_icontains="j")) + self.assertFalse(filterable_thing.satisfies(is_alive__is=False)) + self.assertTrue(filterable_thing.satisfies(iterable__contains=3)) + self.assertTrue(filterable_thing.satisfies(age__equals=5.2)) + self.assertTrue(filterable_thing.satisfies(age__not_equals=5)) + self.assertTrue(filterable_thing.satisfies(owner__is=None)) diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 028a1dbaa..de05233d2 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -87,24 +87,6 @@ class NotADatafile: self.assertIn("must be of class Datafile to add it to a Dataset", e.exception.args[0]) - def test_filter_catches_single_underscore_mistake(self): - """Ensures that if the field name is a single underscore, that gets caught as an error""" - resource = Dataset( - files=[ - Datafile(path="path-within-dataset/A_Test_file.csv"), - Datafile(path="path-within-dataset/a_test_file.txt"), - ] - ) - - with self.assertRaises(exceptions.InvalidInputException) as e: - resource.files.filter("name_icontains", filter_value="Test") - - self.assertIn( - "Invalid filter name 'name_icontains'. Filter names should be in the form " - "'__'.", - e.exception.args[0], - ) - def test_filter_name_contains(self): """Ensures that filter works with the name_contains and name_icontains lookups""" resource = Dataset( @@ -113,15 +95,15 @@ def test_filter_name_contains(self): Datafile(path="path-within-dataset/a_test_file.txt"), ] ) - files = resource.files.filter("name__icontains", filter_value="Test") + files = resource.files.filter(name__icontains="Test") self.assertEqual(2, len(files)) - files = resource.files.filter("name__icontains", filter_value="A") + files = resource.files.filter(name__icontains="A") self.assertEqual(2, len(files)) - files = resource.files.filter("name__contains", filter_value="Test") + files = resource.files.filter(name__contains="Test") self.assertEqual(1, len(files)) - files = resource.files.filter("name__icontains", filter_value="test") + files = resource.files.filter(name__icontains="test") self.assertEqual(2, len(files)) - files = resource.files.filter("name__icontains", filter_value="file") + files = resource.files.filter(name__icontains="file") self.assertEqual(2, len(files)) def test_filter_name_with(self): @@ -132,21 +114,21 @@ def test_filter_name_with(self): Datafile(path="path-within-dataset/a_your_file.csv"), ] ) - files = resource.files.filter("name__starts_with", filter_value="a_my") + files = resource.files.filter(name__starts_with="a_my") self.assertEqual(1, len(files)) - files = resource.files.filter("name__starts_with", filter_value="a_your") + files = resource.files.filter(name__starts_with="a_your") self.assertEqual(1, len(files)) - files = resource.files.filter("name__starts_with", filter_value="a_") + files = resource.files.filter(name__starts_with="a_") self.assertEqual(2, len(files)) - files = resource.files.filter("name__starts_with", filter_value="b") + files = resource.files.filter(name__starts_with="b") self.assertEqual(0, len(files)) - files = resource.files.filter("name__ends_with", filter_value="_file.csv") + files = resource.files.filter(name__ends_with="_file.csv") self.assertEqual(2, len(files)) - files = resource.files.filter("name__ends_with", filter_value="r_file.csv") + files = resource.files.filter(name__ends_with="r_file.csv") self.assertEqual(1, len(files)) - files = resource.files.filter("name__ends_with", filter_value="y_file.csv") + files = resource.files.filter(name__ends_with="y_file.csv") self.assertEqual(1, len(files)) - files = resource.files.filter("name__ends_with", filter_value="other.csv") + files = resource.files.filter(name__ends_with="other.csv") self.assertEqual(0, len(files)) def test_filter_by_label(self): @@ -159,17 +141,17 @@ def test_filter_by_label(self): ] ) - files = resource.files.filter("labels__contains", filter_value="a") + files = resource.files.filter(labels__contains="a") self.assertEqual(0, len(files)) - files = resource.files.filter("labels__contains", filter_value="one") + files = resource.files.filter(labels__contains="one") self.assertEqual(1, len(files)) - files = resource.files.filter("labels__contains", filter_value="all") + files = resource.files.filter(labels__contains="all") self.assertEqual(3, len(files)) - files = resource.files.filter("labels__any_label_starts_with", filter_value="b") + files = resource.files.filter(labels__any_label_starts_with="b") self.assertEqual(2, len(files)) - files = resource.files.filter("labels__any_label_ends_with", filter_value="3") + files = resource.files.filter(labels__any_label_ends_with="3") self.assertEqual(2, len(files)) - # files = resource.files.filter("labels__contains", filter_value="hre") + # files = resource.files.filter(labels__contains="hre") # self.assertEqual(1, len(files)) def test_get_file_by_label(self): @@ -206,7 +188,7 @@ def test_filter_by_sequence_not_none(self): Datafile(path="path-within-dataset/a_your_file.csv", sequence=None), ] ) - files = resource.files.filter("sequence__is_not", None) + files = resource.files.filter(sequence__is_not=None) self.assertEqual(2, len(files)) def test_get_file_sequence(self): @@ -217,7 +199,7 @@ def test_get_file_sequence(self): Datafile(path="path-within-dataset/a_your_file.csv", sequence=None), ] - got_files = Dataset(files=files).get_file_sequence("name__ends_with", filter_value=".csv", strict=True) + got_files = Dataset(files=files).get_file_sequence(name__ends_with=".csv", strict=True) self.assertEqual(got_files, files[:2]) def test_get_broken_file_sequence(self): @@ -230,7 +212,7 @@ def test_get_broken_file_sequence(self): ] ) with self.assertRaises(exceptions.BrokenSequenceException): - resource.get_file_sequence("name__ends_with", filter_value=".csv", strict=True) + resource.get_file_sequence(name__ends_with=".csv", strict=True) def test_filter_name_filters_include_extension(self): """Ensures that filters applied to the name will catch terms in the extension""" @@ -239,9 +221,7 @@ def test_filter_name_filters_include_extension(self): Datafile(path="path-within-dataset/a_test_file.txt"), ] - self.assertEqual( - Dataset(files=files).files.filter("name__icontains", filter_value="txt"), FilterSet({files[1]}) - ) + self.assertEqual(Dataset(files=files).files.filter(name__icontains="txt"), FilterSet({files[1]})) def test_filter_name_filters_exclude_path(self): """Ensures that filters applied to the name will not catch terms in the extension""" @@ -251,7 +231,7 @@ def test_filter_name_filters_exclude_path(self): Datafile(path="second-path-within-dataset/a_test_file.txt"), ] ) - files = resource.files.filter("name__icontains", filter_value="second") + files = resource.files.filter(name__icontains="second") self.assertEqual(0, len(files)) def test_using_get_files_raises_deprecation_warning(self): @@ -264,7 +244,7 @@ def test_using_get_files_raises_deprecation_warning(self): ) with warnings.catch_warnings(record=True) as warning: - filtered_files = resource.get_files("name__icontains", filter_value="second") + filtered_files = resource.get_files(name__icontains="second") self.assertEqual(len(warning), 1) self.assertTrue(issubclass(warning[-1].category, DeprecationWarning)) self.assertIn("deprecated", str(warning[-1].message)) diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index d512f5cec..9322fbaa8 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -167,7 +167,7 @@ def test_filter(self): """ Test that label sets can be filtered. """ label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") self.assertEqual( - label_set.labels.filter("name__starts_with", "meta"), + label_set.labels.filter(name__starts_with="meta"), FilterSet({Label("meta:sys1:1234"), Label("meta:sys2:3456"), Label("meta:sys2:55")}), ) @@ -175,13 +175,13 @@ def test_filter_chaining(self): """ Test that filters can be chained. """ label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") - filtered_labels_1 = label_set.labels.filter("name__starts_with", "meta") + filtered_labels_1 = label_set.labels.filter(name__starts_with="meta") self.assertEqual(filtered_labels_1, LabelSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55").labels) - filtered_labels_2 = filtered_labels_1.filter("name__contains", "sys2") + filtered_labels_2 = filtered_labels_1.filter(name__contains="sys2") self.assertEqual(filtered_labels_2, LabelSet("meta:sys2:3456 meta:sys2:55").labels) - filtered_labels_3 = filtered_labels_1.filter("name__equals", "meta:sys2:55") + filtered_labels_3 = filtered_labels_1.filter(name__equals="meta:sys2:55") self.assertEqual(filtered_labels_3, LabelSet("meta:sys2:55").labels) def test_serialise(self): From dbc94608855575bf38c218d1af63a6460f2d28c3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 12:52:29 +0100 Subject: [PATCH 008/103] IMP: Add ability to filter by nested attributes/dicts --- octue/mixins/filterable.py | 52 ++++++++++++++++++++++++--------- tests/mixins/test_filterable.py | 40 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index b4ec103d6..9c9d89304 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -1,4 +1,5 @@ import collections.abc +import functools import numbers from octue import exceptions @@ -76,9 +77,10 @@ class Filterable: def satisfies(self, **kwargs): - """Check that the instance satisfies the given filter for the given filter value. + """Check that the instance satisfies the given filter for the given filter value. The filter should be provided + as a single keyword argument such as `name__first__equals="Joe"` - :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the value to filter for :return mixed: """ @@ -88,11 +90,7 @@ def satisfies(self, **kwargs): filter_name, filter_value = list(kwargs.items())[0] attribute_name, filter_action = self._split_filter_name(filter_name) - - try: - attribute = getattr(self, attribute_name) - except AttributeError: - raise AttributeError(f"An attribute named {attribute_name!r} does not exist on {self!r}.") + attribute = self._get_nested_attribute(self, attribute_name) filter_ = self._get_filter(attribute, filter_action) return filter_(attribute, filter_value) @@ -101,15 +99,42 @@ def _split_filter_name(self, filter_name): """Split the filter name into the attribute name and filter action, raising an error if it the attribute name and filter action aren't delimited by a double underscore i.e. "__". """ - try: - attribute_name, filter_action = filter_name.split("__", 1) - except ValueError: + *attribute_names, filter_action = filter_name.split("__") + + if not attribute_names: raise exceptions.InvalidInputException( f"Invalid filter name {filter_name!r}. Filter names should be in the form " - f"'__'." + f"'____<...>__' with at least one attribute name" + f"included." ) - return attribute_name, filter_action + return ".".join(attribute_names), filter_action + + def _get_nested_attribute(self, instance, nested_attribute_name): + """Get the value of a nested attribute from a class instance or dictionary, with each level of nesting being + another dictionary or class instance. + + :param dict|object instance: + :param str nested_attribute_names: dot-separated nested attribute name e.g. "a.b.c", "a.b", or "a" + :return any: + """ + nested_attribute_names = nested_attribute_name.split(".") + return functools.reduce(self._getattr_or_subscribe, nested_attribute_names, instance) + + def _getattr_or_subscribe(self, instance, name): + """Get an attribute from a class instance or a value from a dictionary. + + :param dict|object instance: + :param str name: name of attribute or dictionary key + :return any: + """ + try: + return getattr(instance, name) + except AttributeError: + try: + return instance[name] + except TypeError: + raise AttributeError(f"{instance!r} does not have an attribute or key named {name!r}.") def _get_filter(self, attribute, filter_action): """Get the filter for the attribute and filter action, raising an error if there is no filter action of that @@ -119,9 +144,8 @@ def _get_filter(self, attribute, filter_action): return self._get_filter_actions_for_attribute(attribute)[filter_action] except KeyError as error: - attribute_type = type(attribute) raise exceptions.InvalidInputException( - f"There is no filter called {error.args[0]!r} for attributes of type {attribute_type}. The options " + f"There is no filter called {error.args[0]!r} for attributes of type {type(attribute)}. The options " f"are {self._get_filter_actions_for_attribute(attribute).keys()!r}" ) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 0f1e53c33..ddac4d805 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -1,3 +1,5 @@ +from unittest.mock import Mock + from octue import exceptions from octue.mixins.filterable import Filterable from octue.resources.label import LabelSet @@ -150,3 +152,41 @@ def test_filtering_different_attributes_on_same_instance(self): self.assertTrue(filterable_thing.satisfies(age__equals=5.2)) self.assertTrue(filterable_thing.satisfies(age__not_equals=5)) self.assertTrue(filterable_thing.satisfies(owner__is=None)) + + def test_getattr_or_subscribe_with_dictionary(self): + """Test that the Filterable._getattr_or_subscribe method can get values from a dictionary.""" + filterable = Filterable() + self.assertEqual(filterable._getattr_or_subscribe(instance={"hello": "world"}, name="hello"), "world") + + def test_getattr_or_subscribe_with_object(self): + """Test that the Filterable._getattr_or_subscribe method can get attribute values from a class instance.""" + self.assertEqual(Filterable()._getattr_or_subscribe(instance=Mock(a=3), name="a"), 3) + + def test_get_nested_attribute(self): + """Test that nested attributes can be accessed.""" + inner_mock = Mock(b=3) + outer_mock = Mock(a=inner_mock) + self.assertEqual(Filterable()._get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b"), 3) + + def test_get_nested_dictionary_attribute(self): + """Test that nested attributes ending in a dictionary key can be accessed.""" + inner_mock = Mock(b={"hello": "world"}) + outer_mock = Mock(a=inner_mock) + self.assertEqual( + Filterable()._get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b.hello"), "world" + ) + + def test_filtering_with_nested_attributes(self): + """Test that Filterable subclasses can be checked for satisfaction of a filter of nested attributes.""" + inner_mock = Mock(b=3) + outer_mock = Mock(a=inner_mock) + filterable_thing = FilterableSubclass(name=outer_mock) + self.assertTrue(filterable_thing.satisfies(name__a__b__equals=3)) + + def test_filtering_with_nested_attributes_ending_in_dictionary_key(self): + """Test that Filterable subclasses can be checked for satisfaction of a filter of nested attributes that ends + with a dictionary key. + """ + filterable_thing = FilterableSubclass(name={"first": "Joe", "last": "Bloggs"}) + self.assertTrue(filterable_thing.satisfies(name__first__equals="Joe")) + self.assertTrue(filterable_thing.satisfies(name__last__equals="Bloggs")) From 4cc19ebc79022666a86d7fcee31789ac26d6acc3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 13:53:37 +0100 Subject: [PATCH 009/103] IMP: Add FilterDict --- octue/resources/filter_containers.py | 9 ++++++++ tests/resources/test_filter_containers.py | 28 ++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index d7328d85f..f716423bf 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,3 +1,5 @@ +from collections import UserDict + from octue import exceptions @@ -31,3 +33,10 @@ class FilterSet(set): class FilterList(list): filter = _filter order_by = _order_by + + +class FilterDict(UserDict): + order_by = _order_by + + def filter(self, **kwargs): + return self.__class__({key: value for key, value in self.items() if value.satisfies(**kwargs)}) diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index a0059e8e7..38d65c4c5 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -1,6 +1,6 @@ from octue import exceptions from octue.mixins import Filterable -from octue.resources.filter_containers import FilterList, FilterSet +from octue.resources.filter_containers import FilterDict, FilterList, FilterSet from tests.base import BaseTestCase @@ -41,3 +41,29 @@ def test_order_by_in_reverse(self): cats = [Cat(age=5), Cat(age=3), Cat(age=4)] sorted_filter_set = FilterSet(cats).order_by("age", reverse=True) self.assertEqual(sorted_filter_set, FilterList([cats[0], cats[2], cats[1]])) + + +class TestFilterDict(BaseTestCase): + def test_instantiate(self): + """Test that a FilterDict can be instantiated like a dictionary.""" + filter_dict = FilterDict(a=1, b=3) + self.assertEqual(filter_dict["a"], 1) + self.assertEqual(filter_dict["b"], 3) + + filter_dict = FilterDict({"a": 1, "b": 3}) + self.assertEqual(filter_dict["a"], 1) + self.assertEqual(filter_dict["b"], 3) + + filter_dict = FilterDict(**{"a": 1, "b": 3}) + self.assertEqual(filter_dict["a"], 1) + self.assertEqual(filter_dict["b"], 3) + + def test_filter(self): + """Test that a FilterDict can be filtered on its values when they are all filterables.""" + filterables = {"first-filterable": Filterable(), "second-filterable": Filterable()} + filterables["first-filterable"].value = 3 + filterables["second-filterable"].value = 90 + + filter_dict = FilterDict(filterables) + self.assertEqual(filter_dict.filter(value__equals=90).keys(), {"second-filterable"}) + self.assertEqual(filter_dict.filter(value__gt=2), filterables) From f7d8feab6a742030dfac5c085ec8a3de8ca8dcd1 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 14:01:21 +0100 Subject: [PATCH 010/103] IMP: Add TagDict and use in Taggable --- octue/mixins/__init__.py | 3 +- octue/mixins/taggable.py | 15 +- octue/resources/tag.py | 293 +++++++++++++----------------------- tests/resources/test_tag.py | 54 +++++++ 4 files changed, 164 insertions(+), 201 deletions(-) create mode 100644 tests/resources/test_tag.py diff --git a/octue/mixins/__init__.py b/octue/mixins/__init__.py index 2f96d472e..79166c404 100644 --- a/octue/mixins/__init__.py +++ b/octue/mixins/__init__.py @@ -7,6 +7,7 @@ from .loggable import Loggable from .pathable import Pathable from .serialisable import Serialisable +from .taggable import Taggable __all__ = ( @@ -19,5 +20,5 @@ "MixinBase", "Pathable", "Serialisable", - "Labelable", + "Taggable", ) diff --git a/octue/mixins/taggable.py b/octue/mixins/taggable.py index 0e4c0fc85..18731fed3 100644 --- a/octue/mixins/taggable.py +++ b/octue/mixins/taggable.py @@ -1,17 +1,16 @@ -from octue.resources.tag import TagSet +from octue.resources.tag import TagDict class Taggable: - """ A mixin class allowing objects to be tagged. """ + """A mixin class allowing objects to be tagged.""" def __init__(self, *args, tags=None, **kwargs): - """Constructor for Taggable mixins""" super().__init__(*args, **kwargs) - self._tags = TagSet(tags) + self.tags = tags - def add_tags(self, *args): + def add_tags(self, tags): """ Adds one or more new tag strings to the object tags. New tags will be cleaned and validated. """ - self._tags.add_tags(*args) + self.tags.update(tags) @property def tags(self): @@ -19,5 +18,5 @@ def tags(self): @tags.setter def tags(self, tags): - """ Overwrite any existing tag set and assign new tags. """ - self._tags = TagSet(tags) + """ Overwrite any existing tag set and assign new tag. """ + self._tags = TagDict(tags) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 375d2779e..b49e6149c 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,195 +1,104 @@ -import json +# import json import re -from functools import lru_cache from octue.exceptions import InvalidTagException -from octue.mixins import Filterable, Serialisable -from octue.resources.filter_containers import FilterList, FilterSet -from octue.utils.encoders import OctueJSONEncoder - - -TAG_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other - elif isinstance(other, Tag): - return self.name > other.name - - def __hash__(self): - """ Allow Tags to be contained in a set. """ - return hash(f"{type(self).__name__}{self.name}") - - def __contains__(self, item): - return item in self.name - - def __repr__(self): - return repr(self.name) - - def starts_with(self, value): - """ Does the tag start with the given value? """ - return self.name.startswith(value) - - def ends_with(self, value): - """ Does the tag end with the given value? """ - return self.name.endswith(value) - - @staticmethod - def _clean(name): - """ Ensure the tag name is a string and conforms to the tag regex pattern. """ - if not isinstance(name, str): - raise InvalidTagException("Tags must be expressed as a string.") - - cleaned_name = name.strip() - - if not re.match(TAG_PATTERN, cleaned_name): - raise InvalidTagException( - f"Invalid tag '{cleaned_name}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " - f"and '-'. They must not start with '-', ':', '/' or '.'" - ) - - return cleaned_name - - -class TagSet(Serialisable): - """ Class to handle a set of tags as a string. """ - - _FILTERSET_ATTRIBUTE = "tags" - - def __init__(self, tags=None, *args, **kwargs): - """ Construct a TagSet. """ - # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase - tags = tags or FilterSet() - - # JSON-encoded list of tag names, or space-delimited string of tag names. - if isinstance(tags, str): - try: - self.tags = FilterSet(Tag(tag) for tag in json.loads(tags)) - except json.decoder.JSONDecodeError: - self.tags = FilterSet(Tag(tag) for tag in tags.strip().split()) - - elif isinstance(tags, TagSet): - self.tags = FilterSet(tags.tags) - - # Tags can be some other iterable than a list, but each tag must be a Tag or string. - elif hasattr(tags, "__iter__"): - self.tags = FilterSet(tag if isinstance(tag, Tag) else Tag(tag) for tag in tags) - - else: - raise InvalidTagException( - "Tags must be expressed as a whitespace-delimited string or an iterable of strings or Tag instances." - ) - - def __eq__(self, other): - """ Does this TagSet have the same tags as another TagSet? """ - if not isinstance(other, TagSet): - return False - return self.tags == other.tags - - def __iter__(self): - """ Iterate over the tags in the TagSet. """ - yield from self.tags - - def __len__(self): - return len(self.tags) - - def __contains__(self, tag): - """ Return True if any of the tags exactly matches value, allowing test like `if 'a' in TagSet('a b')`. """ - if isinstance(tag, str): - return Tag(tag) in self.tags - if isinstance(tag, Tag): - return tag in self.tags - - def __repr__(self): - return f"" - - def add_tags(self, *args): - """Adds one or more new tag strings to the object tags. New tags will be cleaned and validated.""" - self.tags |= {Tag(arg) for arg in args} - - def get_subtags(self): - """ Return a new TagSet instance with all the subtags. """ - return TagSet(subtag for tag in self for subtag in tag.subtags) - - def any_tag_starts_with(self, value): - """ Implement a startswith method that returns true if any of the tags starts with value """ - return any(tag.starts_with(value) for tag in self) - - def any_tag_ends_with(self, value): - """ Implement an endswith method that returns true if any of the tags endswith value. """ - return any(tag.ends_with(value) for tag in self) - - def any_tag_contains(self, value): - """ Return True if any of the tags contains value. """ - return any(value in tag for tag in self) - - def filter(self, filter_name=None, filter_value=None): - """Filter the tags with the given filter for the given value. - - :param str filter_name: - :param any filter_value: - :return octue.resources.filter_containers.FilterSet: - """ - return self.tags.filter(filter_name=filter_name, filter_value=filter_value) - - def serialise(self, to_string=False, **kwargs): - """Serialise to a sorted list of tag names. - - :param bool to_string: - :return list|str: - """ - string = json.dumps( - sorted(tag.name for tag in self.tags), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs - ) - - if to_string: - return string - - return json.loads(string) - - @classmethod - def deserialise(cls, serialised_tagset): - """Deserialise from a sorted list of tag names. - - :param list serialised_tagset: - :return TagSet: - """ - return cls(tags=serialised_tagset) +from octue.mixins import Serialisable +from octue.resources.filter_containers import FilterDict + + +# from collections import UserDict + +# from octue.utils.encoders import OctueJSONEncoder + + +TAG_NAME_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other +# elif isinstance(other, Tag): +# return self.name > other.name +# +# def __hash__(self): +# """ Allow Tags to be contained in a set. """ +# return hash(f"{type(self).__name__}{self.name}") +# +# def __contains__(self, item): +# return item in self.name +# +# def __repr__(self): +# return repr(self.name) +# +# def starts_with(self, value): +# """ Does the tag start with the given value? """ +# return self.name.startswith(value) +# +# def ends_with(self, value): +# """ Does the tag end with the given value? """ +# return self.name.endswith(value) +# +# @staticmethod +# def _clean(name): +# """ Ensure the tag name is a string and conforms to the tag regex pattern. """ +# if not isinstance(name, str): +# raise InvalidTagException("Tags must be expressed as a string.") +# +# cleaned_name = name.strip() +# +# if not re.match(TAG_NAME_PATTERN, cleaned_name): +# raise InvalidTagException( +# f"Invalid tag '{cleaned_name}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " +# f"and '-'. They must not start with '-', ':', '/' or '.'" +# ) +# +# return cleaned_name + + +class TagDict(Serialisable, FilterDict): + + _FILTERABLE_ATTRIBUTE = "data" + + def __setitem__(self, tag, value): + self._check_tag_format(tag) + self.data[tag] = value + + def update(self, tags, **kwargs): + self._check_tag_format(*tags) + super().update(tags, **kwargs) + + def _check_tag_format(self, *tags): + for tag in tags: + if not re.match(TAG_NAME_PATTERN, tag): + raise InvalidTagException( + f"Invalid tag '{tag}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " + f"and '-'. They must not start with '-', ':', '/' or '.'" + ) diff --git a/tests/resources/test_tag.py b/tests/resources/test_tag.py new file mode 100644 index 000000000..63da04035 --- /dev/null +++ b/tests/resources/test_tag.py @@ -0,0 +1,54 @@ +from unittest import TestCase + +from octue import exceptions +from octue.mixins import Filterable +from octue.resources.tag import TagDict + + +class TestTagDict(TestCase): + def test_instantiate_from_dict(self): + """Test that a TagDict can be instantiated from a dictionary.""" + tag_dict = TagDict({"a": 1, "b": 2}) + self.assertEqual(tag_dict["a"], 1) + self.assertEqual(tag_dict["b"], 2) + + def test_instantiate_from_kwargs(self): + """Test that a TagDict can be instantiated from kwargs.""" + tag_dict = TagDict(**{"a": 1, "b": 2}) + self.assertEqual(tag_dict["a"], 1) + self.assertEqual(tag_dict["b"], 2) + + def test_instantiation_fails_if_tag_name_fails_validation(self): + """Test that TagDict instantiation fails if any keys don't conform to the tag name pattern.""" + with self.assertRaises(exceptions.InvalidTagException): + TagDict({".blah.": "blue"}) + + def test_update(self): + """Test that TagDicts can be updated with tags with valid names.""" + tag_dict = TagDict({"a": 1, "b": 2}) + tag_dict.update({"c": 3, "d": 4}) + self.assertEqual(tag_dict["c"], 3) + self.assertEqual(tag_dict["d"], 4) + + def test_update_fails_if_tag_name_fails_validation(self): + """Test that updating fails if any keys don't conform to the tag name pattern.""" + tag_dict = TagDict({"a": 1, "b": 2}) + + with self.assertRaises(exceptions.InvalidTagException): + tag_dict.update({"@": 3, "d": 4}) + + self.assertEqual(tag_dict, {"a": 1, "b": 2}) + + def test_equality_to_dict(self): + """Test that TagDicts compare equal to dictionaries with the same contents.""" + tag_dict = TagDict({"a": 1, "b": 2}) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) + + def test_filter(self): + """Test that TagDicts can be filtered for their values.""" + filterables = {"first-filterable": Filterable(), "second-filterable": Filterable()} + filterables["first-filterable"].my_data = 3 + filterables["second-filterable"].my_data = 90 + + tag_dict = TagDict(filterables) + self.assertEqual(tag_dict.filter(my_data__equals=3).keys(), {"first-filterable"}) From 5aad8398f6d42e223f5168f6dafc8dd06ab74ec7 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 14:17:47 +0100 Subject: [PATCH 011/103] IMP: Make Datafiles and Datasets taggable again --- octue/resources/datafile.py | 5 +++-- octue/resources/dataset.py | 6 +++--- tests/resources/test_datafile.py | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 9136f382d..109a7411f 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -9,7 +9,7 @@ from octue.cloud.storage import GoogleCloudStorageClient from octue.cloud.storage.path import CLOUD_STORAGE_PROTOCOL from octue.exceptions import AttributeConflict, CloudLocationNotSpecified, FileNotFoundException, InvalidInputException -from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable +from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.mixins.hashable import EMPTY_STRING_HASH_VALUE from octue.utils import isfile from octue.utils.time import convert_from_posix_time, convert_to_posix_time @@ -27,7 +27,7 @@ LABELS_DEFAULT = None -class Datafile(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): +class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): """Class for representing data files on the Octue system. Files in a manifest look like this: @@ -72,6 +72,7 @@ class Datafile(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashab "path", "sequence", "labels", + "tags", "timestamp", "_cloud_metadata", ) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 3b4f37ab2..3cb92767e 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -7,7 +7,7 @@ from octue.cloud import storage from octue.cloud.storage import GoogleCloudStorageClient from octue.exceptions import BrokenSequenceException, InvalidInputException, UnexpectedNumberOfResultsException -from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.resources.datafile import Datafile from octue.resources.filter_containers import FilterSet from octue.resources.label import LabelSet @@ -19,7 +19,7 @@ DATAFILES_DIRECTORY = "datafiles" -class Dataset(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashable): +class Dataset(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable): """A representation of a dataset, containing files, labels, etc This is used to read a list of files (and their associated properties) into octue analysis, or to compile a @@ -28,7 +28,7 @@ class Dataset(Labelable, Serialisable, Pathable, Loggable, Identifiable, Hashabl _FILTERSET_ATTRIBUTE = "files" _ATTRIBUTES_TO_HASH = ("files",) - _SERIALISE_FIELDS = "files", "name", "labels", "id", "path" + _SERIALISE_FIELDS = "files", "name", "labels", "tags", "id", "path" def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, labels=None, **kwargs): """Construct a Dataset""" diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index b34cb5d27..c004b559c 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -150,6 +150,7 @@ def test_serialisable(self): "path", "timestamp", "sequence", + "tags", "labels", "_cloud_metadata", } From 3e9823263dd7a8ac3c0b7cf2e37af7a7330bb6fe Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 14:18:33 +0100 Subject: [PATCH 012/103] IMP: Stop logging in Serialisable; always exclude logger field in Serialisable --- octue/mixins/serialisable.py | 9 ++++----- tests/mixins/test_serialisable.py | 8 -------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/octue/mixins/serialisable.py b/octue/mixins/serialisable.py index 304138ab0..a55fa8e1e 100644 --- a/octue/mixins/serialisable.py +++ b/octue/mixins/serialisable.py @@ -6,16 +6,18 @@ class Serialisable: """Mixin class to make resources serialisable to JSON. - Objects must have a `.logger` and a `.id` property + The `logger` field is always excluded from serialisation if it is present. """ _SERIALISE_FIELDS = None _EXCLUDE_SERIALISE_FIELDS = ("logger",) def __init__(self, *args, **kwargs): - """Constructor for serialisable mixin""" super().__init__(*args, **kwargs) + if "logger" not in self._EXCLUDE_SERIALISE_FIELDS: + self._EXCLUDE_SERIALISE_FIELDS = (*self._EXCLUDE_SERIALISE_FIELDS, "logger") + @classmethod def deserialise(cls, serialised_object, from_string=False): """Deserialise the given JSON-serialised object. @@ -35,7 +37,6 @@ def to_file(self, file_name, **kwargs): :parameter str file_name: file to write to, including relative or absolute path and .json extension :return None: """ - self.logger.debug("Writing %s %s to file %s", self.__class__.__name__, self.id, file_name) with open(file_name, "w") as fp: fp.write(self.serialise(**kwargs, to_string=True)) @@ -62,8 +63,6 @@ def __init__(self): :return: json string or dict containing a serialised/primitive version of the resource. :rtype: str, dict """ - self.logger.debug("Serialising %s %s", self.__class__.__name__, self.id) - # Get all non-private and non-protected attributes except those excluded specifically names_of_attributes_to_serialise = self._SERIALISE_FIELDS or ( field_name diff --git a/tests/mixins/test_serialisable.py b/tests/mixins/test_serialisable.py index a8ed723c9..7d844a362 100644 --- a/tests/mixins/test_serialisable.py +++ b/tests/mixins/test_serialisable.py @@ -25,14 +25,6 @@ def test_instantiates_with_no_args(self): """Ensures the class instantiates without arguments""" Serialisable() - def test_raises_attribute_error_with_missing_logger(self): - """Ensures class instantiates with a string uuid""" - resource = Serialisable() - with self.assertRaises(AttributeError) as error: - resource.serialise() - - self.assertIn("'Serialisable' object has no attribute 'logger'", error.exception.args[0]) - def test_returns_primitive_without_logger_or_protected_fields(self): """Ensures class instantiates with a UUID()""" resource = Inherit() From 31f6d22ef6e076037786ab8e3b253abcefdff521 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 14:19:06 +0100 Subject: [PATCH 013/103] FIX: Remove Serialisable mixin from LabelSet --- octue/resources/label.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index c0781b75c..359d23cc1 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -3,7 +3,7 @@ from functools import lru_cache from octue.exceptions import InvalidLabelException -from octue.mixins import Filterable, Serialisable +from octue.mixins import Filterable from octue.resources.filter_containers import FilterList, FilterSet from octue.utils.encoders import OctueJSONEncoder @@ -89,7 +89,7 @@ def _clean(name): return cleaned_name -class LabelSet(Serialisable): +class LabelSet: """ Class to handle a set of labels as a string. """ _FILTERSET_ATTRIBUTE = "labels" From 26d40d779246f16058d58ca2f0019660cf6a1548 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 15:19:04 +0100 Subject: [PATCH 014/103] DEP: Use correct twined branch --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1b60fc2ca..bfbe3343c 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "google-cloud-storage>=1.35.1", "google-crc32c>=1.1.2", "gunicorn", - "twined @ https://github.com/octue/twined/archive/feature/tag-templates.zip", + "twined @ https://github.com/octue/twined/archive/feature/tag-templates-2.zip", ], url="https://www.github.com/octue/octue-sdk-python", license="MIT", From 82995632c403c6ccc916372fb5742ad2f74fd8fc Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 15:50:44 +0100 Subject: [PATCH 015/103] IMP: Allow tags to be added to Taggable as kwargs --- octue/mixins/taggable.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/octue/mixins/taggable.py b/octue/mixins/taggable.py index 18731fed3..f304eca7d 100644 --- a/octue/mixins/taggable.py +++ b/octue/mixins/taggable.py @@ -4,13 +4,12 @@ class Taggable: """A mixin class allowing objects to be tagged.""" - def __init__(self, *args, tags=None, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, tags=None): self.tags = tags - def add_tags(self, tags): + def add_tags(self, tags=None, **kwargs): """ Adds one or more new tag strings to the object tags. New tags will be cleaned and validated. """ - self.tags.update(tags) + self.tags.update({**(tags or {}), **kwargs}) @property def tags(self): From 5556be98eee7f07289cbab6a5badfcc46145cfe3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 15:51:09 +0100 Subject: [PATCH 016/103] TST: Test Taggable --- tests/mixins/test_taggable.py | 88 +++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/mixins/test_taggable.py diff --git a/tests/mixins/test_taggable.py b/tests/mixins/test_taggable.py new file mode 100644 index 000000000..01953706f --- /dev/null +++ b/tests/mixins/test_taggable.py @@ -0,0 +1,88 @@ +from octue import exceptions +from octue.mixins import Taggable +from octue.resources.tag import TagDict +from ..base import BaseTestCase + + +class MyTaggable(Taggable): + pass + + +class TaggableTestCase(BaseTestCase): + def test_instantiates(self): + """Ensures the class instantiates without arguments.""" + taggable = Taggable() + self.assertEqual(taggable.tags, {}) + + def test_instantiating_with_no_tags(self): + self.assertEqual(MyTaggable().tags, TagDict()) + + def test_fails_to_instantiates_with_non_iterable(self): + """Test that instantiation with a non-iterable fails.""" + + class NoIter: + pass + + with self.assertRaises(TypeError): + MyTaggable(tags=NoIter()) + + def test_instantiates_with_dict(self): + """Test instantiation with a dictionary.""" + tags = {"height": 9, "width": 8.7, "depth": 100} + taggable = MyTaggable(tags) + self.assertEqual(taggable.tags, tags) + + def test_instantiates_with_tag_dict(self): + """Test instantiation with a TagDict.""" + taggable_1 = MyTaggable(tags={"a": 2}) + self.assertIsInstance(taggable_1.tags, TagDict) + taggable_2 = MyTaggable(tags=taggable_1.tags) + self.assertFalse(taggable_1 is taggable_2) + + def test_setting_tags_overwrites_previous_tags(self): + """Ensure tags can be overwritten with new ones.""" + taggable = MyTaggable(tags={"a": 1, "b": 2}) + taggable.tags = {"c": 3, "d": 4} + self.assertEqual(taggable.tags, {"c": 3, "d": 4}) + + def test_add_valid_tags(self): + """Ensures adding valid tags works.""" + taggable = MyTaggable() + + taggable.add_tags({"a-valid-tag": "blah"}) + taggable.add_tags({"a:tag": "blah"}) + taggable.add_tags({"a:-tag": "blah"}) # <--- yes, this is valid deliberately as it allows people to do negation + taggable.add_tags({"a1829tag": "blah"}) + taggable.add_tags({"multiple:discriminators:used": "blah"}) + taggable.add_tags({"1829": "blah", "number:1829": "blah"}) # Add multiple tags at once. + + self.assertEqual( + taggable.tags, + { + "a-valid-tag": "blah", + "a:tag": "blah", + "a:-tag": "blah", + "a1829tag": "blah", + "1829": "blah", + "number:1829": "blah", + "multiple:discriminators:used": "blah", + }, + ) + + def test_add_tags_via_kwargs(self): + """Test tags can be added via kwargs.""" + taggable = MyTaggable() + taggable.add_tags(hello="blib", hi="glib") + self.assertEqual(taggable.tags, {"hello": "blib", "hi": "glib"}) + + def test_adding_mixture_of_valid_and_invalid_tags_fails_completely(self): + """Ensure that adding a variety of tags, some of which are invalid, doesn't partially add the set including the + invalid tags to the object. + """ + taggable = MyTaggable() + taggable.add_tags({"first-valid-should-be-added": "hello"}) + + with self.assertRaises(exceptions.InvalidTagException): + taggable.add_tags({"second-valid-should-not-be-added-because": 1, "-the-third-is-invalid:": 2}) + + self.assertEqual(taggable.tags, {"first-valid-should-be-added": "hello"}) From b1ca0c9cd06964effea149561b96fce3fb479318 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:11:10 +0100 Subject: [PATCH 017/103] TST: Test setting items on TagDict --- tests/resources/test_tag.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/resources/test_tag.py b/tests/resources/test_tag.py index 63da04035..b481dc594 100644 --- a/tests/resources/test_tag.py +++ b/tests/resources/test_tag.py @@ -23,6 +23,15 @@ def test_instantiation_fails_if_tag_name_fails_validation(self): with self.assertRaises(exceptions.InvalidTagException): TagDict({".blah.": "blue"}) + def test_update_fails_if_tag_name_fails_validation(self): + """Test that updating fails if any keys don't conform to the tag name pattern.""" + tag_dict = TagDict({"a": 1, "b": 2}) + + with self.assertRaises(exceptions.InvalidTagException): + tag_dict.update({"@": 3, "d": 4}) + + self.assertEqual(tag_dict, {"a": 1, "b": 2}) + def test_update(self): """Test that TagDicts can be updated with tags with valid names.""" tag_dict = TagDict({"a": 1, "b": 2}) @@ -30,14 +39,18 @@ def test_update(self): self.assertEqual(tag_dict["c"], 3) self.assertEqual(tag_dict["d"], 4) - def test_update_fails_if_tag_name_fails_validation(self): - """Test that updating fails if any keys don't conform to the tag name pattern.""" - tag_dict = TagDict({"a": 1, "b": 2}) + def test_setitem_fails_if_tag_name_fails_validation(self): + """Test that setting an item on a TagDict fails if the name fails validation.""" + tag_dict = TagDict() with self.assertRaises(exceptions.InvalidTagException): - tag_dict.update({"@": 3, "d": 4}) + tag_dict["@@@"] = 9 - self.assertEqual(tag_dict, {"a": 1, "b": 2}) + def test_setitem(self): + """Test setting an item on a TagDict.""" + tag_dict = TagDict() + tag_dict["hello"] = 9 + self.assertEqual(tag_dict["hello"], 9) def test_equality_to_dict(self): """Test that TagDicts compare equal to dictionaries with the same contents.""" From 0f34a006a290a5491358867129d0df16982dd5ae Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:20:10 +0100 Subject: [PATCH 018/103] TST: Test chaining filters on FilterDict --- tests/resources/test_filter_containers.py | 50 ++++++++++++++++------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index 38d65c4c5..dc76e7271 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -4,41 +4,44 @@ from tests.base import BaseTestCase -class Cat(Filterable): - def __init__(self, name=None, previous_names=None, age=None): - self.name = name - self.previous_names = previous_names - self.age = age +class FilterableThing(Filterable): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) class TestFilterSet(BaseTestCase): def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ - filter_set = FilterSet([Cat(age=5), Cat(age=4), Cat(age=3)]) + filter_set = FilterSet([FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)]) with self.assertRaises(exceptions.InvalidInputException): filter_set.order_by("dog-likeness") def test_order_by_with_string_attribute(self): """ Test ordering a FilterSet by a string attribute returns an appropriately ordered FilterList. """ - cats = [Cat(name="Zorg"), Cat(name="James"), Cat(name="Princess Carolyn")] + cats = [FilterableThing(name="Zorg"), FilterableThing(name="James"), FilterableThing(name="Princess Carolyn")] sorted_filter_set = FilterSet(cats).order_by("name") self.assertEqual(sorted_filter_set, FilterList([cats[1], cats[2], cats[0]])) def test_order_by_with_int_attribute(self): """ Test ordering a FilterSet by an integer attribute returns an appropriately ordered FilterList. """ - cats = [Cat(age=5), Cat(age=4), Cat(age=3)] + cats = [FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)] sorted_filter_set = FilterSet(cats).order_by("age") self.assertEqual(sorted_filter_set, FilterList(reversed(cats))) def test_order_by_list_attribute(self): """ Test that ordering by list attributes orders by the size of the list. """ - cats = [Cat(previous_names=["Scatta", "Catta"]), Cat(previous_names=["Kitty"]), Cat(previous_names=[])] + cats = [ + FilterableThing(previous_names=["Scatta", "Catta"]), + FilterableThing(previous_names=["Kitty"]), + FilterableThing(previous_names=[]), + ] sorted_filter_set = FilterSet(cats).order_by("previous_names") self.assertEqual(sorted_filter_set, FilterList(reversed(cats))) def test_order_by_in_reverse(self): """ Test ordering in reverse works correctly. """ - cats = [Cat(age=5), Cat(age=3), Cat(age=4)] + cats = [FilterableThing(age=5), FilterableThing(age=3), FilterableThing(age=4)] sorted_filter_set = FilterSet(cats).order_by("age", reverse=True) self.assertEqual(sorted_filter_set, FilterList([cats[0], cats[2], cats[1]])) @@ -60,10 +63,27 @@ def test_instantiate(self): def test_filter(self): """Test that a FilterDict can be filtered on its values when they are all filterables.""" - filterables = {"first-filterable": Filterable(), "second-filterable": Filterable()} - filterables["first-filterable"].value = 3 - filterables["second-filterable"].value = 90 + filterables = { + "first-filterable": FilterableThing(my_value=3), + "second-filterable": FilterableThing(my_value=90), + } filter_dict = FilterDict(filterables) - self.assertEqual(filter_dict.filter(value__equals=90).keys(), {"second-filterable"}) - self.assertEqual(filter_dict.filter(value__gt=2), filterables) + self.assertEqual(filter_dict.filter(my_value__equals=90).keys(), {"second-filterable"}) + self.assertEqual(filter_dict.filter(my_value__gt=2), filterables) + + def test_filter_chaining(self): + """Test that filters can be chained to filter a FilterDict multiple times.""" + animals = FilterDict( + { + "cat": FilterableThing(age=3, size="small"), + "dog": FilterableThing(age=90, size="big"), + "another_dog": FilterableThing(age=90, size="small"), + } + ) + + animals_with_age_90 = animals.filter(age__equals=90) + self.assertEqual({"dog", "another_dog"}, animals_with_age_90.keys()) + + animals_with_age_90_and_size_small = animals_with_age_90.filter(size__equals="small") + self.assertEqual(animals_with_age_90_and_size_small.keys(), {"another_dog"}) From 1dd629e8e864dbaf49744e97bc2c20ea213de0fd Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:22:42 +0100 Subject: [PATCH 019/103] TST: Test filters for a TagDict --- tests/mixins/test_filterable.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index ddac4d805..75648413f 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -3,16 +3,14 @@ from octue import exceptions from octue.mixins.filterable import Filterable from octue.resources.label import LabelSet +from octue.resources.tag import TagDict from tests.base import BaseTestCase class FilterableSubclass(Filterable): - def __init__(self, name=None, is_alive=None, iterable=None, age=None, owner=None): - self.name = name - self.is_alive = is_alive - self.iterable = iterable - self.age = age - self.owner = owner + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) class TestFilterable(BaseTestCase): @@ -29,7 +27,7 @@ def test_error_raised_when_non_existent_attribute_name_received(self): def test_error_raised_when_valid_but_non_existent_filter_name_received(self): """ Ensure an error is raised when a valid but non-existent filter name is received. """ with self.assertRaises(exceptions.InvalidInputException): - FilterableSubclass().satisfies(age__is_secret=True) + FilterableSubclass(age=23).satisfies(age__is_secret=True) def test_error_raised_when_attribute_type_has_no_filters_defined(self): """Ensure an error is raised when a filter for an attribute whose type doesn't have any filters defined is @@ -190,3 +188,9 @@ def test_filtering_with_nested_attributes_ending_in_dictionary_key(self): filterable_thing = FilterableSubclass(name={"first": "Joe", "last": "Bloggs"}) self.assertTrue(filterable_thing.satisfies(name__first__equals="Joe")) self.assertTrue(filterable_thing.satisfies(name__last__equals="Bloggs")) + + def test_tag_dict_filters(self): + """Test some filters that apply to a TagDict. These should behave just the same as for a dictionary.""" + filterable_thing = FilterableSubclass(tags=TagDict({"first": "Joe", "middle": "Horatio", "last": "Bloggs"})) + self.assertTrue(filterable_thing.satisfies(tags__last__lt="Kevin")) + self.assertFalse(filterable_thing.satisfies(tags__middle__is="Boratio")) From b7f79bc12bfc27dd7f1d6b08f82f62b88fed5184 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:29:39 +0100 Subject: [PATCH 020/103] FIX: Add tags parameter back to Datafile and Dataset constructors --- octue/resources/datafile.py | 8 +++++++- octue/resources/dataset.py | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 109a7411f..a76bceb24 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -24,6 +24,7 @@ ID_DEFAULT = None CLUSTER_DEFAULT = 0 SEQUENCE_DEFAULT = None +TAGS_DEFAULT = None LABELS_DEFAULT = None @@ -56,6 +57,7 @@ class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiab :param Pathable path_from: The root Pathable object (typically a Dataset) that this Datafile's path is relative to. :param int cluster: The cluster of files, within a dataset, to which this belongs (default 0) :param int sequence: A sequence number of this file within its cluster (if sequences are appropriate) + :param dict|TagDict tags: key-value pairs with string keys conforming to the Octue tag format (see TagDict) :param str labels: Space-separated string of labels relevant to this file :param bool skip_checks: :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode @@ -71,8 +73,8 @@ class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiab "name", "path", "sequence", - "labels", "tags", + "labels", "timestamp", "_cloud_metadata", ) @@ -86,6 +88,7 @@ def __init__( path_from=None, cluster=CLUSTER_DEFAULT, sequence=SEQUENCE_DEFAULT, + tags=TAGS_DEFAULT, labels=LABELS_DEFAULT, skip_checks=True, mode="r", @@ -97,6 +100,7 @@ def __init__( name=kwargs.pop("name", None), immutable_hash_value=kwargs.pop("immutable_hash_value", None), logger=logger, + tags=tags, labels=labels, path=path, path_from=path_from, @@ -204,6 +208,7 @@ def from_cloud( datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) datafile.cluster = kwargs.pop("cluster", custom_metadata.get("cluster", CLUSTER_DEFAULT)) datafile.sequence = kwargs.pop("sequence", custom_metadata.get("sequence", SEQUENCE_DEFAULT)) + datafile.tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) datafile.labels = kwargs.pop("labels", custom_metadata.get("labels", LABELS_DEFAULT)) datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} return datafile @@ -490,6 +495,7 @@ def metadata(self): "timestamp": self.timestamp, "cluster": self.cluster, "sequence": self.sequence, + "tags": self.tags.serialise(to_string=True), "labels": self.labels.serialise(to_string=True), } diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 3cb92767e..ee2c3cd7b 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -11,6 +11,7 @@ from octue.resources.datafile import Datafile from octue.resources.filter_containers import FilterSet from octue.resources.label import LabelSet +from octue.resources.tag import TagDict module_logger = logging.getLogger(__name__) @@ -30,9 +31,9 @@ class Dataset(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiabl _ATTRIBUTES_TO_HASH = ("files",) _SERIALISE_FIELDS = "files", "name", "labels", "tags", "id", "path" - def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, labels=None, **kwargs): + def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, tags=None, labels=None, **kwargs): """Construct a Dataset""" - super().__init__(name=name, id=id, logger=logger, labels=labels, path=path, path_from=path_from) + super().__init__(name=name, id=id, logger=logger, tags=None, labels=labels, path=path, path_from=path_from) # TODO The decoders aren't being used; utils.decoders.OctueJSONDecoder should be used in twined # so that resources get automatically instantiated. @@ -85,6 +86,7 @@ def from_cloud(cls, project_name, bucket_name, path_to_dataset_directory): id=serialised_dataset["id"], name=serialised_dataset["name"], path=storage.path.generate_gs_path(bucket_name, path_to_dataset_directory), + tags=TagDict(serialised_dataset["tags"]), labels=LabelSet(serialised_dataset["labels"]), files=datafiles, ) From e43ea636b34233707a0ed023c1372d27760aa1ec Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:57:14 +0100 Subject: [PATCH 021/103] FIX: Initialise superclass in Taggable mixin --- octue/mixins/taggable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/octue/mixins/taggable.py b/octue/mixins/taggable.py index f304eca7d..2d43ff956 100644 --- a/octue/mixins/taggable.py +++ b/octue/mixins/taggable.py @@ -4,8 +4,9 @@ class Taggable: """A mixin class allowing objects to be tagged.""" - def __init__(self, tags=None): + def __init__(self, *args, tags=None, **kwargs): self.tags = tags + super().__init__(*args, **kwargs) def add_tags(self, tags=None, **kwargs): """ Adds one or more new tag strings to the object tags. New tags will be cleaned and validated. """ From ea66c28c6597266d9bf3c3c201e9b93ea968d423 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 16:58:07 +0100 Subject: [PATCH 022/103] CLN: Remove extra parameters from Label --- octue/mixins/labelable.py | 2 +- octue/resources/label.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/octue/mixins/labelable.py b/octue/mixins/labelable.py index 8c5f0c48d..8679f56a7 100644 --- a/octue/mixins/labelable.py +++ b/octue/mixins/labelable.py @@ -5,8 +5,8 @@ class Labelable: """ A mixin class allowing objects to be labelled. """ def __init__(self, *args, labels=None, **kwargs): - super().__init__(*args, **kwargs) self._labels = LabelSet(labels) + super().__init__(*args, **kwargs) def add_labels(self, *args): """ Adds one or more new label strings to the object labels. New labels will be cleaned and validated. """ diff --git a/octue/resources/label.py b/octue/resources/label.py index 359d23cc1..542b576a6 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -94,7 +94,7 @@ class LabelSet: _FILTERSET_ATTRIBUTE = "labels" - def __init__(self, labels=None, *args, **kwargs): + def __init__(self, labels=None): """ Construct a LabelSet. """ # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase labels = labels or FilterSet() From 515fe70fc886e0433664e1ceac35209b0ef5f34f Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:00:00 +0100 Subject: [PATCH 023/103] FIX: Add serialise method to TagDict --- octue/resources/tag.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index b49e6149c..a0881b0bf 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,14 +1,10 @@ -# import json +import json import re from octue.exceptions import InvalidTagException from octue.mixins import Serialisable from octue.resources.filter_containers import FilterDict - - -# from collections import UserDict - -# from octue.utils.encoders import OctueJSONEncoder +from octue.utils.encoders import OctueJSONEncoder TAG_NAME_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? Date: Mon, 17 May 2021 17:01:13 +0100 Subject: [PATCH 024/103] FIX: Deserialise TagDicts properly in Datafile.from_cloud --- octue/resources/datafile.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index a76bceb24..2f294809a 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -11,6 +11,7 @@ from octue.exceptions import AttributeConflict, CloudLocationNotSpecified, FileNotFoundException, InvalidInputException from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.mixins.hashable import EMPTY_STRING_HASH_VALUE +from octue.resources.tag import TagDict from octue.utils import isfile from octue.utils.time import convert_from_posix_time, convert_to_posix_time @@ -202,15 +203,21 @@ def from_cloud( if isinstance(timestamp, str): timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") + tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) + + if isinstance(tags, str): + tags = TagDict.deserialise(tags, from_string=True) + datafile._set_id(kwargs.pop("id", custom_metadata.get("id", ID_DEFAULT))) datafile.path = storage.path.generate_gs_path(bucket_name, datafile_path) datafile.timestamp = timestamp datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) datafile.cluster = kwargs.pop("cluster", custom_metadata.get("cluster", CLUSTER_DEFAULT)) datafile.sequence = kwargs.pop("sequence", custom_metadata.get("sequence", SEQUENCE_DEFAULT)) - datafile.tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) + datafile.tags = tags datafile.labels = kwargs.pop("labels", custom_metadata.get("labels", LABELS_DEFAULT)) datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} + return datafile def to_cloud(self, project_name=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True): From a2c5d00a75289d982ceeff371790f8fdd2d482d9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:01:58 +0100 Subject: [PATCH 025/103] TST: Fix Taggable test --- tests/mixins/test_taggable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mixins/test_taggable.py b/tests/mixins/test_taggable.py index 01953706f..949a3e3ac 100644 --- a/tests/mixins/test_taggable.py +++ b/tests/mixins/test_taggable.py @@ -29,7 +29,7 @@ class NoIter: def test_instantiates_with_dict(self): """Test instantiation with a dictionary.""" tags = {"height": 9, "width": 8.7, "depth": 100} - taggable = MyTaggable(tags) + taggable = MyTaggable(tags=tags) self.assertEqual(taggable.tags, tags) def test_instantiates_with_tag_dict(self): From 1d2dcffb5dc1c27dd7a60f66585b77db34ca7032 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:02:24 +0100 Subject: [PATCH 026/103] CLN: Simplify test method --- tests/resources/test_datafile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index c004b559c..e9c2f9ce0 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -50,8 +50,7 @@ def create_datafile_in_cloud( with tempfile.NamedTemporaryFile("w", delete=False) as temporary_file: temporary_file.write(contents) - timestamp = kwargs.pop("timestamp", None) - datafile = Datafile(path=temporary_file.name, timestamp=timestamp, **kwargs) + datafile = Datafile(path=temporary_file.name, **kwargs) datafile.to_cloud(project_name=project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) return datafile, project_name, bucket_name, path_in_bucket, contents From 580d432c1c7db35de8d6010ee2daf2f753c1aa41 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:16:09 +0100 Subject: [PATCH 027/103] FIX: Fix Dataset tags parameter --- octue/resources/datafile.py | 2 +- octue/resources/dataset.py | 3 +-- tests/resources/test_dataset.py | 13 +++++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 2f294809a..018e577bd 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -59,7 +59,7 @@ class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiab :param int cluster: The cluster of files, within a dataset, to which this belongs (default 0) :param int sequence: A sequence number of this file within its cluster (if sequences are appropriate) :param dict|TagDict tags: key-value pairs with string keys conforming to the Octue tag format (see TagDict) - :param str labels: Space-separated string of labels relevant to this file + :param iter(str) labels: Space-separated string of labels relevant to this file :param bool skip_checks: :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode options are the same as for the builtin open function) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index ee2c3cd7b..a5e58a64d 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -32,8 +32,7 @@ class Dataset(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiabl _SERIALISE_FIELDS = "files", "name", "labels", "tags", "id", "path" def __init__(self, name=None, id=None, logger=None, path=None, path_from=None, tags=None, labels=None, **kwargs): - """Construct a Dataset""" - super().__init__(name=name, id=id, logger=logger, tags=None, labels=labels, path=path, path_from=path_from) + super().__init__(name=name, id=id, logger=logger, tags=tags, labels=labels, path=path, path_from=path_from) # TODO The decoders aren't being used; utils.decoders.OctueJSONDecoder should be used in twined # so that resources get automatically instantiated. diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index de05233d2..d4a5f366c 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -55,7 +55,7 @@ def test_add_single_file_to_empty_dataset(self): def test_add_single_file_to_existing_dataset(self): """Ensures that when a dataset is not empty, it can be added to""" files = [Datafile(path="path-within-dataset/a_test_file.csv")] - resource = Dataset(files=files, labels="one two") + resource = Dataset(files=files, labels="one two", tags={"a": "b"}) resource.add(Datafile(path="path-within-dataset/a_test_file.csv")) self.assertEqual(len(resource.files), 2) @@ -296,9 +296,10 @@ def test_from_cloud(self): dataset = Dataset( name="dataset_0", files={ - Datafile(path=file_0_path, sequence=0, labels={"hello"}), - Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), + Datafile(path=file_0_path, sequence=0, labels={"hello"}, tags={"a": "b"}), + Datafile(path=file_1_path, sequence=1, labels={"goodbye"}, tags={"a": "b"}), }, + tags={"a": "b", "c": 1}, ) dataset.to_cloud(project_name=project_name, bucket_name=TEST_BUCKET_NAME, output_directory="a_directory") @@ -313,6 +314,7 @@ def test_from_cloud(self): self.assertEqual(persisted_dataset.id, dataset.id) self.assertEqual(persisted_dataset.name, dataset.name) self.assertEqual(persisted_dataset.hash_value, dataset.hash_value) + self.assertEqual(persisted_dataset.tags, dataset.tags) self.assertEqual(persisted_dataset.labels, dataset.labels) self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files}) @@ -340,7 +342,8 @@ def test_to_cloud(self): files={ Datafile(path=file_0_path, sequence=0, labels={"hello"}), Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), - } + }, + tags={"a": "b", "c": 1}, ) dataset.to_cloud(project_name, TEST_BUCKET_NAME, output_directory) @@ -374,3 +377,5 @@ def test_to_cloud(self): "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_1.txt", ], ) + + self.assertEqual(persisted_dataset["tags"], dataset.tags.serialise()) From d92de3dbb246a88053efc8f6159d0690d06791c0 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:18:54 +0100 Subject: [PATCH 028/103] CLN: Remove unused _FILTERSET_ATTRIBUTE class variables --- octue/resources/dataset.py | 1 - octue/resources/label.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index a5e58a64d..28462ec36 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -27,7 +27,6 @@ class Dataset(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiabl list of output files (results) and their properties that will be sent back to the octue system. """ - _FILTERSET_ATTRIBUTE = "files" _ATTRIBUTES_TO_HASH = ("files",) _SERIALISE_FIELDS = "files", "name", "labels", "tags", "id", "path" diff --git a/octue/resources/label.py b/octue/resources/label.py index 542b576a6..399319b6d 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -92,8 +92,6 @@ def _clean(name): class LabelSet: """ Class to handle a set of labels as a string. """ - _FILTERSET_ATTRIBUTE = "labels" - def __init__(self, labels=None): """ Construct a LabelSet. """ # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase From 29e92fef7df7aafdc8e0caa81bf0562e0b8cabf2 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 17:47:03 +0100 Subject: [PATCH 029/103] REF: Base LabelSet on FilterSet --- octue/resources/label.py | 49 ++++++++++++++--------------------- tests/resources/test_label.py | 22 ++++++++-------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index 399319b6d..a00184bd8 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -1,5 +1,6 @@ import json import re +from collections.abc import Iterable from functools import lru_cache from octue.exceptions import InvalidLabelException @@ -72,6 +73,9 @@ def ends_with(self, value): """ Does the label end with the given value? """ return self.name.endswith(value) + def serialise(self): + return self.name + @staticmethod def _clean(name): """ Ensure the label name is a string and conforms to the label regex pattern. """ @@ -89,7 +93,7 @@ def _clean(name): return cleaned_name -class LabelSet: +class LabelSet(FilterSet): """ Class to handle a set of labels as a string. """ def __init__(self, labels=None): @@ -100,48 +104,44 @@ def __init__(self, labels=None): # JSON-encoded list of label names, or space-delimited string of label names. if isinstance(labels, str): try: - self.labels = FilterSet(Label(label) for label in json.loads(labels)) + labels = FilterSet(Label(label) for label in json.loads(labels)) except json.decoder.JSONDecodeError: - self.labels = FilterSet(Label(label) for label in labels.strip().split()) + labels = FilterSet(Label(label) for label in labels.strip().split()) elif isinstance(labels, LabelSet): - self.labels = FilterSet(labels.labels) + labels = FilterSet(labels) # Labels can be some other iterable than a list, but each label must be a Label or string. elif hasattr(labels, "__iter__"): - self.labels = FilterSet(label if isinstance(label, Label) else Label(label) for label in labels) + labels = FilterSet(label if isinstance(label, Label) else Label(label) for label in labels) else: raise InvalidLabelException( "Labels must be expressed as a whitespace-delimited string or an iterable of strings or Label instances." ) + super().__init__(labels) + def __eq__(self, other): - """ Does this LabelSet have the same labels as another LabelSet? """ - if not isinstance(other, LabelSet): + """Does this LabelSet have the same labels as another LabelSet?""" + if not isinstance(other, Iterable): return False - return self.labels == other.labels - def __iter__(self): - """ Iterate over the labels in the LabelSet. """ - yield from self.labels + if not all(isinstance(item, Label) for item in other): + other = {Label(item) for item in other} - def __len__(self): - return len(self.labels) + return set(self) == set(other) def __contains__(self, label): """ Return True if any of the labels exactly matches value, allowing test like `if 'a' in LabelSet('a b')`. """ if isinstance(label, str): - return Label(label) in self.labels + return Label(label) in set(self) if isinstance(label, Label): - return label in self.labels - - def __repr__(self): - return f"<{type(self).__name__}({self.labels})>" + return label in set(self) def add_labels(self, *args): """Adds one or more new label strings to the object labels. New labels will be cleaned and validated.""" - self.labels |= {Label(arg) for arg in args} + self.update({Label(arg) for arg in args}) def get_sublabels(self): """ Return a new LabelSet instance with all the sublabels. """ @@ -159,15 +159,6 @@ def any_label_contains(self, value): """ Return True if any of the labels contains value. """ return any(value in label for label in self) - def filter(self, filter_name=None, filter_value=None): - """Filter the labels with the given filter for the given value. - - :param str filter_name: - :param any filter_value: - :return octue.resources.filter_containers.FilterSet: - """ - return self.labels.filter(filter_name=filter_name, filter_value=filter_value) - def serialise(self, to_string=False, **kwargs): """Serialise to a sorted list of label names. @@ -175,7 +166,7 @@ def serialise(self, to_string=False, **kwargs): :return list|str: """ string = json.dumps( - sorted(label.name for label in self.labels), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs + sorted(label.name for label in self), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs ) if to_string: diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index 9322fbaa8..fbbf75e21 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -74,27 +74,27 @@ class TestLabelSet(BaseTestCase): def test_instantiation_from_space_delimited_string(self): """ Test that a LabelSet can be instantiated from a space-delimited string of label names.""" label_set = LabelSet(labels="a b:c d:e:f") - self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) def test_instantiation_from_iterable_of_strings(self): """ Test that a LabelSet can be instantiated from an iterable of strings.""" label_set = LabelSet(labels=["a", "b:c", "d:e:f"]) - self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) def test_instantiation_from_iterable_of_labels(self): """ Test that a LabelSet can be instantiated from an iterable of labels.""" label_set = LabelSet(labels=[Label("a"), Label("b:c"), Label("d:e:f")]) - self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) def test_instantiation_from_filter_set_of_strings(self): """ Test that a LabelSet can be instantiated from a FilterSet of strings.""" label_set = LabelSet(labels=FilterSet({"a", "b:c", "d:e:f"})) - self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) def test_instantiation_from_filter_set_of_labels(self): """ Test that a LabelSet can be instantiated from a FilterSet of labels.""" label_set = LabelSet(labels=FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) - self.assertEqual(label_set.labels, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) + self.assertEqual(label_set, FilterSet({Label("a"), Label("b:c"), Label("d:e:f")})) def test_instantiation_from_label_set(self): """ Test that a LabelSet can be instantiated from another LabelSet. """ @@ -167,7 +167,7 @@ def test_filter(self): """ Test that label sets can be filtered. """ label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") self.assertEqual( - label_set.labels.filter(name__starts_with="meta"), + label_set.filter(name__starts_with="meta"), FilterSet({Label("meta:sys1:1234"), Label("meta:sys2:3456"), Label("meta:sys2:55")}), ) @@ -175,14 +175,14 @@ def test_filter_chaining(self): """ Test that filters can be chained. """ label_set = LabelSet(labels="label1 label2 meta:sys1:1234 meta:sys2:3456 meta:sys2:55") - filtered_labels_1 = label_set.labels.filter(name__starts_with="meta") - self.assertEqual(filtered_labels_1, LabelSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55").labels) + filtered_labels_1 = label_set.filter(name__starts_with="meta") + self.assertEqual(filtered_labels_1, LabelSet("meta:sys1:1234 meta:sys2:3456 meta:sys2:55")) filtered_labels_2 = filtered_labels_1.filter(name__contains="sys2") - self.assertEqual(filtered_labels_2, LabelSet("meta:sys2:3456 meta:sys2:55").labels) + self.assertEqual(filtered_labels_2, LabelSet("meta:sys2:3456 meta:sys2:55")) filtered_labels_3 = filtered_labels_1.filter(name__equals="meta:sys2:55") - self.assertEqual(filtered_labels_3, LabelSet("meta:sys2:55").labels) + self.assertEqual(filtered_labels_3, LabelSet("meta:sys2:55")) def test_serialise(self): """ Ensure that LabelSets are serialised to the string form of a list. """ @@ -201,4 +201,4 @@ def test_deserialise(self): def test_repr(self): """Test the representation of a LabelSet appears as expected.""" - self.assertEqual(repr(self.LABEL_SET), f"") + self.assertEqual(repr(self.LABEL_SET), f"LabelSet({set(self.LABEL_SET)})") From 73b32b0fef13df585b051abceebad77af53d49ed Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 18:01:47 +0100 Subject: [PATCH 030/103] REF: Base Label on str --- octue/resources/label.py | 57 +++++------------------------------ tests/resources/test_label.py | 8 ++--- 2 files changed, 12 insertions(+), 53 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index a00184bd8..50d8279dd 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -1,5 +1,6 @@ import json import re +from collections import UserString from collections.abc import Iterable from functools import lru_cache @@ -12,7 +13,7 @@ LABEL_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other - elif isinstance(other, Label): - return self.name > other.name - - def __hash__(self): - """ Allow Labels to be contained in a set. """ - return hash(f"{type(self).__name__}{self.name}") - - def __contains__(self, item): - return item in self.name - - def __repr__(self): - return repr(self.name) - - def starts_with(self, value): - """ Does the label start with the given value? """ - return self.name.startswith(value) - - def ends_with(self, value): - """ Does the label end with the given value? """ - return self.name.endswith(value) + return FilterList(Label(sublabel_name) for sublabel_name in self.split(":")) def serialise(self): return self.name @@ -149,11 +110,11 @@ def get_sublabels(self): def any_label_starts_with(self, value): """ Implement a startswith method that returns true if any of the labels starts with value """ - return any(label.starts_with(value) for label in self) + return any(label.startswith(value) for label in self) def any_label_ends_with(self, value): """ Implement an endswith method that returns true if any of the labels endswith value. """ - return any(label.ends_with(value) for label in self) + return any(label.endswith(value) for label in self) def any_label_contains(self, value): """ Return True if any of the labels contains value. """ @@ -165,9 +126,7 @@ def serialise(self, to_string=False, **kwargs): :param bool to_string: :return list|str: """ - string = json.dumps( - sorted(label.name for label in self), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs - ) + string = json.dumps(sorted(self), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs) if to_string: return string diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index fbbf75e21..874097cf2 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -49,8 +49,8 @@ def test_contains(self): def test_starts_with(self): """ Test that the start of a label can be checked. """ - self.assertTrue(Label("hello").starts_with("h")) - self.assertFalse(Label("hello").starts_with("e")) + self.assertTrue(Label("hello").startswith("h")) + self.assertFalse(Label("hello").startswith("e")) def test_sublabels_starts_with(self): """ Test that the start of sublabels can be checked. """ @@ -59,8 +59,8 @@ def test_sublabels_starts_with(self): def test_ends_with(self): """ Test that the end of a label can be checked. """ - self.assertTrue(Label("hello").ends_with("o")) - self.assertFalse(Label("hello").ends_with("e")) + self.assertTrue(Label("hello").endswith("o")) + self.assertFalse(Label("hello").endswith("e")) def test_sublabels_ends_with(self): """ Test that the end of sublabels can be checked. """ From 4a16676fe27edabaa13c9ad91b6010c34cd7ef29 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 18:34:44 +0100 Subject: [PATCH 031/103] IMP: Allow FilterDicts to be ordered by their values --- octue/mixins/filterable.py | 30 +------------ octue/resources/filter_containers.py | 32 +++++++++++-- octue/utils/objects.py | 29 ++++++++++++ tests/mixins/test_filterable.py | 23 ---------- tests/resources/test_filter_containers.py | 55 ++++++++++++++++++----- tests/utils/test_objects.py | 26 +++++++++++ 6 files changed, 131 insertions(+), 64 deletions(-) create mode 100644 octue/utils/objects.py create mode 100644 tests/utils/test_objects.py diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index 9c9d89304..5fb344591 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -1,8 +1,8 @@ import collections.abc -import functools import numbers from octue import exceptions +from octue.utils.objects import get_nested_attribute IS_FILTER_ACTIONS = { @@ -90,7 +90,7 @@ def satisfies(self, **kwargs): filter_name, filter_value = list(kwargs.items())[0] attribute_name, filter_action = self._split_filter_name(filter_name) - attribute = self._get_nested_attribute(self, attribute_name) + attribute = get_nested_attribute(self, attribute_name) filter_ = self._get_filter(attribute, filter_action) return filter_(attribute, filter_value) @@ -110,32 +110,6 @@ def _split_filter_name(self, filter_name): return ".".join(attribute_names), filter_action - def _get_nested_attribute(self, instance, nested_attribute_name): - """Get the value of a nested attribute from a class instance or dictionary, with each level of nesting being - another dictionary or class instance. - - :param dict|object instance: - :param str nested_attribute_names: dot-separated nested attribute name e.g. "a.b.c", "a.b", or "a" - :return any: - """ - nested_attribute_names = nested_attribute_name.split(".") - return functools.reduce(self._getattr_or_subscribe, nested_attribute_names, instance) - - def _getattr_or_subscribe(self, instance, name): - """Get an attribute from a class instance or a value from a dictionary. - - :param dict|object instance: - :param str name: name of attribute or dictionary key - :return any: - """ - try: - return getattr(instance, name) - except AttributeError: - try: - return instance[name] - except TypeError: - raise AttributeError(f"{instance!r} does not have an attribute or key named {name!r}.") - def _get_filter(self, attribute, filter_action): """Get the filter for the attribute and filter action, raising an error if there is no filter action of that name. diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index f716423bf..3361c3191 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,10 +1,11 @@ from collections import UserDict from octue import exceptions +from octue.utils.objects import get_nested_attribute def _filter(self, **kwargs): - """Returns a new instance containing only the Filterables to which the given filter criteria apply. + """Return a new instance containing only the Filterables to which the given filter criteria apply. :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the value to filter for @@ -36,7 +37,32 @@ class FilterList(list): class FilterDict(UserDict): - order_by = _order_by - def filter(self, **kwargs): + """Return a new instance containing only the Filterables for which the given filter criteria apply are + satisfied. + + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the + value to filter for + :return FilterDict: + """ return self.__class__({key: value for key, value in self.items() if value.satisfies(**kwargs)}) + + def order_by(self, attribute_name, reverse=False): + """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a + FilterSet. + + :param str attribute_name: a dot-separated (optionally nested) attribute name e.g. "a", "a.b", "a.b.c" + :param bool reverse: + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the + FilterDict's values + :return FilterList: + """ + try: + return FilterList( + sorted(self.values(), key=lambda item: get_nested_attribute(item, attribute_name), reverse=reverse) + ) + + except AttributeError: + raise exceptions.InvalidInputException( + f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." + ) diff --git a/octue/utils/objects.py b/octue/utils/objects.py new file mode 100644 index 000000000..b0b1d4be9 --- /dev/null +++ b/octue/utils/objects.py @@ -0,0 +1,29 @@ +import functools + + +def get_nested_attribute(instance, nested_attribute_name): + """Get the value of a nested attribute from a class instance or dictionary, with each level of nesting being + another dictionary or class instance. + + :param dict|object instance: + :param str nested_attribute_names: dot-separated nested attribute name e.g. "a.b.c", "a.b", or "a" + :return any: + """ + nested_attribute_names = nested_attribute_name.split(".") + return functools.reduce(getattr_or_subscribe, nested_attribute_names, instance) + + +def getattr_or_subscribe(instance, name): + """Get an attribute from a class instance or a value from a dictionary. + + :param dict|object instance: + :param str name: name of attribute or dictionary key + :return any: + """ + try: + return getattr(instance, name) + except AttributeError: + try: + return instance[name] + except TypeError: + raise AttributeError(f"{instance!r} does not have an attribute or key named {name!r}.") diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 75648413f..5c1b60140 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -151,29 +151,6 @@ def test_filtering_different_attributes_on_same_instance(self): self.assertTrue(filterable_thing.satisfies(age__not_equals=5)) self.assertTrue(filterable_thing.satisfies(owner__is=None)) - def test_getattr_or_subscribe_with_dictionary(self): - """Test that the Filterable._getattr_or_subscribe method can get values from a dictionary.""" - filterable = Filterable() - self.assertEqual(filterable._getattr_or_subscribe(instance={"hello": "world"}, name="hello"), "world") - - def test_getattr_or_subscribe_with_object(self): - """Test that the Filterable._getattr_or_subscribe method can get attribute values from a class instance.""" - self.assertEqual(Filterable()._getattr_or_subscribe(instance=Mock(a=3), name="a"), 3) - - def test_get_nested_attribute(self): - """Test that nested attributes can be accessed.""" - inner_mock = Mock(b=3) - outer_mock = Mock(a=inner_mock) - self.assertEqual(Filterable()._get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b"), 3) - - def test_get_nested_dictionary_attribute(self): - """Test that nested attributes ending in a dictionary key can be accessed.""" - inner_mock = Mock(b={"hello": "world"}) - outer_mock = Mock(a=inner_mock) - self.assertEqual( - Filterable()._get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b.hello"), "world" - ) - def test_filtering_with_nested_attributes(self): """Test that Filterable subclasses can be checked for satisfaction of a filter of nested attributes.""" inner_mock = Mock(b=3) diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index dc76e7271..e74c3136b 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -47,6 +47,16 @@ def test_order_by_in_reverse(self): class TestFilterDict(BaseTestCase): + ANIMALS = FilterDict( + { + "cat": FilterableThing(name="Princess Carolyn", age=3, size="small", previous_names=["scatta", "catta"]), + "dog": FilterableThing(name="Spot", age=90, size="big", previous_names=[]), + "another_dog": FilterableThing( + name="Ranger", age=91, size="small", previous_names=["doggo", "oggo", "loggo"] + ), + } + ) + def test_instantiate(self): """Test that a FilterDict can be instantiated like a dictionary.""" filter_dict = FilterDict(a=1, b=3) @@ -74,16 +84,41 @@ def test_filter(self): def test_filter_chaining(self): """Test that filters can be chained to filter a FilterDict multiple times.""" - animals = FilterDict( - { - "cat": FilterableThing(age=3, size="small"), - "dog": FilterableThing(age=90, size="big"), - "another_dog": FilterableThing(age=90, size="small"), - } + animals_with_age_of_at_least_90 = self.ANIMALS.filter(age__gte=90) + self.assertEqual({"dog", "another_dog"}, animals_with_age_of_at_least_90.keys()) + + animals_with_age_of_at_least_90_and_size_small = animals_with_age_of_at_least_90.filter(size__equals="small") + self.assertEqual(animals_with_age_of_at_least_90_and_size_small.keys(), {"another_dog"}) + + def test_ordering_by_a_non_existent_attribute(self): + """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ + with self.assertRaises(exceptions.InvalidInputException): + self.ANIMALS.order_by("dog-likeness") + + def test_order_by_with_string_attribute(self): + """ Test ordering a FilterSet by a string attribute returns an appropriately ordered FilterList. """ + self.assertEqual( + self.ANIMALS.order_by("name"), + FilterList((self.ANIMALS["cat"], self.ANIMALS["another_dog"], self.ANIMALS["dog"])), ) - animals_with_age_90 = animals.filter(age__equals=90) - self.assertEqual({"dog", "another_dog"}, animals_with_age_90.keys()) + def test_order_by_with_int_attribute(self): + """ Test ordering a FilterSet by an integer attribute returns an appropriately ordered FilterList. """ + self.assertEqual( + self.ANIMALS.order_by("age"), + FilterList((self.ANIMALS["cat"], self.ANIMALS["dog"], self.ANIMALS["another_dog"])), + ) - animals_with_age_90_and_size_small = animals_with_age_90.filter(size__equals="small") - self.assertEqual(animals_with_age_90_and_size_small.keys(), {"another_dog"}) + def test_order_by_list_attribute(self): + """ Test that ordering by list attributes orders alphabetically by the first element of each list. """ + self.assertEqual( + self.ANIMALS.order_by("previous_names"), + FilterList((self.ANIMALS["dog"], self.ANIMALS["another_dog"], self.ANIMALS["cat"])), + ) + + def test_order_by_in_reverse(self): + """ Test ordering in reverse works correctly. """ + self.assertEqual( + self.ANIMALS.order_by("age", reverse=True), + FilterList((self.ANIMALS["another_dog"], self.ANIMALS["dog"], self.ANIMALS["cat"])), + ) diff --git a/tests/utils/test_objects.py b/tests/utils/test_objects.py new file mode 100644 index 000000000..6c7c48db4 --- /dev/null +++ b/tests/utils/test_objects.py @@ -0,0 +1,26 @@ +from unittest import TestCase +from unittest.mock import Mock + +from octue.utils.objects import get_nested_attribute, getattr_or_subscribe + + +class TestObjects(TestCase): + def test_getattr_or_subscribe_with_dictionary(self): + """Test that the getattr_or_subscribe function can get values from a dictionary.""" + self.assertEqual(getattr_or_subscribe(instance={"hello": "world"}, name="hello"), "world") + + def test_getattr_or_subscribe_with_object(self): + """Test that the getattr_or_subscribe function can get attribute values from a class instance.""" + self.assertEqual(getattr_or_subscribe(instance=Mock(a=3), name="a"), 3) + + def test_get_nested_attribute(self): + """Test that nested attributes can be accessed.""" + inner_mock = Mock(b=3) + outer_mock = Mock(a=inner_mock) + self.assertEqual(get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b"), 3) + + def test_get_nested_dictionary_attribute(self): + """Test that nested attributes ending in a dictionary key can be accessed.""" + inner_mock = Mock(b={"hello": "world"}) + outer_mock = Mock(a=inner_mock) + self.assertEqual(get_nested_attribute(instance=outer_mock, nested_attribute_name="a.b.hello"), "world") From b6742be76bb9eb75644190ca612d2ef63595ffe3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 19:28:36 +0100 Subject: [PATCH 032/103] IMP: Allow ignoring of filterables without filtered-for attribute --- octue/mixins/filterable.py | 13 +++++++-- octue/resources/filter_containers.py | 22 +++++++++++++--- tests/resources/test_filter_containers.py | 32 +++++++++++++++++++++++ 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index 5fb344591..e392ba8be 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -76,10 +76,11 @@ class Filterable: - def satisfies(self, **kwargs): + def satisfies(self, raise_error_if_filter_is_invalid=True, **kwargs): """Check that the instance satisfies the given filter for the given filter value. The filter should be provided as a single keyword argument such as `name__first__equals="Joe"` + :param bool raise_error_if_filter_is_invalid: :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the value to filter for :return mixed: @@ -90,9 +91,17 @@ def satisfies(self, **kwargs): filter_name, filter_value = list(kwargs.items())[0] attribute_name, filter_action = self._split_filter_name(filter_name) - attribute = get_nested_attribute(self, attribute_name) + + try: + attribute = get_nested_attribute(self, attribute_name) + + except AttributeError as error: + if raise_error_if_filter_is_invalid: + raise error + return False filter_ = self._get_filter(attribute, filter_action) + return filter_(attribute, filter_value) def _split_filter_name(self, filter_name): diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 3361c3191..1f7f408f0 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -4,14 +4,21 @@ from octue.utils.objects import get_nested_attribute -def _filter(self, **kwargs): +def _filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables to which the given filter criteria apply. + :param bool ignore_items_without_attribute: :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the value to filter for :return octue.resources.filter_containers.FilterSet: """ - return self.__class__((item for item in self if item.satisfies(**kwargs))) + return self.__class__( + ( + item + for item in self + if item.satisfies(raise_error_if_filter_is_invalid=not ignore_items_without_attribute, **kwargs) + ) + ) def _order_by(self, attribute_name, reverse=False): @@ -37,15 +44,22 @@ class FilterList(list): class FilterDict(UserDict): - def filter(self, **kwargs): + def filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables for which the given filter criteria apply are satisfied. + :param bool ignore_items_without_attribute: :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the value to filter for :return FilterDict: """ - return self.__class__({key: value for key, value in self.items() if value.satisfies(**kwargs)}) + return self.__class__( + { + key: value + for key, value in self.items() + if value.satisfies(raise_error_if_filter_is_invalid=not ignore_items_without_attribute, **kwargs) + } + ) def order_by(self, attribute_name, reverse=False): """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index e74c3136b..5dccc4ca8 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -9,8 +9,25 @@ def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) + def __eq__(self, other): + return vars(self) == vars(other) + + def __hash__(self): + return hash(str(vars(self).items())) + class TestFilterSet(BaseTestCase): + def test_filter_with_filterables_of_differing_attributes(self): + """Test filtering with filterables of differing attributes ignores the filterables lacking the filtered-for + attribute. + """ + filterables = {FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)} + + filter_dict = FilterSet(filterables) + self.assertEqual(set(filter_dict.filter(a__gt=2)), {FilterableThing(a=3), FilterableThing(a=77)}) + self.assertEqual(set(filter_dict.filter(b__equals=90)), {FilterableThing(b=90)}) + self.assertEqual(set(filter_dict.filter(b__equals=0)), set()) + def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ filter_set = FilterSet([FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)]) @@ -82,6 +99,21 @@ def test_filter(self): self.assertEqual(filter_dict.filter(my_value__equals=90).keys(), {"second-filterable"}) self.assertEqual(filter_dict.filter(my_value__gt=2), filterables) + def test_filter_with_filterables_of_differing_attributes(self): + """Test filtering with filterables of differing attributes ignores the filterables lacking the filtered-for + attribute. + """ + filterables = { + "first-filterable": FilterableThing(a=3), + "second-filterable": FilterableThing(b=90), + "third-filterable": FilterableThing(a=77), + } + + filter_dict = FilterDict(filterables) + self.assertEqual(set(filter_dict.filter(a__gt=2).keys()), {"first-filterable", "third-filterable"}) + self.assertEqual(set(filter_dict.filter(b__equals=90).keys()), {"second-filterable"}) + self.assertEqual(set(filter_dict.filter(b__equals=0).keys()), set()) + def test_filter_chaining(self): """Test that filters can be chained to filter a FilterDict multiple times.""" animals_with_age_of_at_least_90 = self.ANIMALS.filter(age__gte=90) From 187ac5ed49416fc4fbe202c70ecd9e45c3a72adc Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 20:05:16 +0100 Subject: [PATCH 033/103] IMP: Allow multiple filters in filter containers' filter methods --- octue/resources/filter_containers.py | 44 ++++++++++++++++------- tests/resources/test_filter_containers.py | 17 ++++++--- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 1f7f408f0..85a984a55 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -12,13 +12,22 @@ def _filter(self, ignore_items_without_attribute=True, **kwargs): to filter for :return octue.resources.filter_containers.FilterSet: """ - return self.__class__( - ( - item - for item in self - if item.satisfies(raise_error_if_filter_is_invalid=not ignore_items_without_attribute, **kwargs) + raise_error_if_filter_is_invalid = not ignore_items_without_attribute + + if len(kwargs) == 1: + return type(self)( + ( + item + for item in self + if item.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) + ) ) - ) + + filter_names = list(kwargs) + + for filter_name in filter_names: + filter_value = kwargs.pop(filter_name) + return _filter(self, raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) def _order_by(self, attribute_name, reverse=False): @@ -53,13 +62,22 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): value to filter for :return FilterDict: """ - return self.__class__( - { - key: value - for key, value in self.items() - if value.satisfies(raise_error_if_filter_is_invalid=not ignore_items_without_attribute, **kwargs) - } - ) + raise_error_if_filter_is_invalid = not ignore_items_without_attribute + + if len(kwargs) == 1: + return type(self)( + { + key: value + for key, value in self.items() + if value.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) + } + ) + + filter_names = list(kwargs) + + for filter_name in filter_names: + filter_value = kwargs.pop(filter_name) + return self.filter(raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) def order_by(self, attribute_name, reverse=False): """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index 5dccc4ca8..7d0668627 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -23,10 +23,15 @@ def test_filter_with_filterables_of_differing_attributes(self): """ filterables = {FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)} - filter_dict = FilterSet(filterables) - self.assertEqual(set(filter_dict.filter(a__gt=2)), {FilterableThing(a=3), FilterableThing(a=77)}) - self.assertEqual(set(filter_dict.filter(b__equals=90)), {FilterableThing(b=90)}) - self.assertEqual(set(filter_dict.filter(b__equals=0)), set()) + filter_set = FilterSet(filterables) + self.assertEqual(set(filter_set.filter(a__gt=2)), {FilterableThing(a=3), FilterableThing(a=77)}) + self.assertEqual(set(filter_set.filter(b__equals=90)), {FilterableThing(b=90)}) + self.assertEqual(set(filter_set.filter(b__equals=0)), set()) + + def test_filtering_with_multiple_filters(self): + """Test that multiple filters can be specified in FilterSet.filter at once.""" + filterables = {FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)} + self.assertEqual(FilterSet(filterables).filter(a__equals=3, b__gt=80), {FilterableThing(a=3, b=99)}) def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ @@ -122,6 +127,10 @@ def test_filter_chaining(self): animals_with_age_of_at_least_90_and_size_small = animals_with_age_of_at_least_90.filter(size__equals="small") self.assertEqual(animals_with_age_of_at_least_90_and_size_small.keys(), {"another_dog"}) + def test_filtering_with_multiple_filters(self): + """Test that multiple filters can be specified in FilterDict.filter at once.""" + self.assertEqual(self.ANIMALS.filter(size__equals="small", age__lt=5), {"cat": self.ANIMALS["cat"]}) + def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ with self.assertRaises(exceptions.InvalidInputException): From 8f3546898b98704f6134bd3beb05a703673bdef9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 21:17:19 +0100 Subject: [PATCH 034/103] REF: Use lambda for filter instead of def function --- octue/resources/dataset.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 28462ec36..ae879c44c 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -197,11 +197,8 @@ def get_file_sequence(self, strict=True, **kwargs): results = results.filter(sequence__is_not=None) - def get_sequence_number(file): - return file.sequence - # Sort the results on ascending sequence number - results = sorted(results, key=get_sequence_number) + results = sorted(results, key=lambda file: file.sequence) # Check sequence is unique and sequential if strict: From 4c75cb0795a59acfdc95f3c80016f5f3596bb49b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 21:52:42 +0100 Subject: [PATCH 035/103] IMP: Add Dataset.get_file_by_tag --- octue/resources/dataset.py | 36 ++++++++++++++++++++++++++------- tests/resources/test_dataset.py | 29 ++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index ae879c44c..a8a7e0545 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -210,18 +210,40 @@ def get_file_sequence(self, strict=True, **kwargs): return results - def get_file_by_label(self, tag_string): - """Gets a data file from a manifest by searching for files with the provided label(s) + def get_file_by_label(self, label_string): + """Get a single datafile from a dataset by searching for files with the provided label(s). Gets exclusively one file; if no file or more than one file is found this results in an error. - :param tag_string: if this string appears as an exact match in the labels - :return: DataFile object + :param str label_string: + :return octue.resources.datafile.DataFile: """ - results = self.files.filter(labels__contains=tag_string) + results = self.files.filter(labels__contains=label_string) + + if len(results) > 1: + raise UnexpectedNumberOfResultsException( + f"More than one result found when searching for a file by label {label_string!r}." + ) + elif len(results) == 0: + raise UnexpectedNumberOfResultsException(f"No files found with label {label_string!r}.") + + return results.pop() + + def get_file_by_tag(self, tag_name): + """Get a single datafile from a dataset by searching for files with the provided tag name. + + Gets exclusively one file; if no file or more than one file is found, an error is raised. + + :param str tag_name: + :return octue.resources.datafile.DataFile: + """ + results = self.files.filter(tags__contains=tag_name) + if len(results) > 1: - raise UnexpectedNumberOfResultsException("More than one result found when searching for a file by label") + raise UnexpectedNumberOfResultsException( + f"More than one result found when searching for a file by tag {tag_name!r}." + ) elif len(results) == 0: - raise UnexpectedNumberOfResultsException("No files found with this label") + raise UnexpectedNumberOfResultsException(f"No files found with tag {tag_name!r}.") return results.pop() diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index d4a5f366c..39a645aa5 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -155,7 +155,7 @@ def test_filter_by_label(self): # self.assertEqual(1, len(files)) def test_get_file_by_label(self): - """Ensures that get_files works with label lookups""" + """Ensure files can be accessed by label from the dataset.""" files = [ Datafile(path="path-within-dataset/a_my_file.csv", labels="one a:2 b:3 all"), Datafile(path="path-within-dataset/a_your_file.csv", labels="two a:2 b:3 all"), @@ -177,7 +177,32 @@ def test_get_file_by_label(self): with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: resource.get_file_by_label("billyjeanisnotmylover") - self.assertIn("No files found with this label", e.exception.args[0]) + self.assertIn("No files found with label", e.exception.args[0]) + + def test_get_file_by_tag(self): + """Ensure files can be accessed by tag from the dataset.""" + files = [ + Datafile(path="path-within-dataset/my_file.csv", tags={"one": False, "a": 2, "b": 3, "all": True}), + Datafile(path="path-within-dataset/your_file.csv", tags={"two": True, "a": 2, "b": 3, "all": True}), + Datafile(path="path-within-dataset/our_file.csv", tags={"three": False, "all": True}), + ] + + dataset = Dataset(files=files) + + # Check working for single result. + self.assertIs(dataset.get_file_by_tag("three"), files[2]) + + # Check raises for too many results. + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: + dataset.get_file_by_tag("all") + + self.assertIn("More than one result found", e.exception.args[0]) + + # Check raises for no result. + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: + dataset.get_file_by_tag("billyjeanisnotmylover") + + self.assertIn("No files found with tag", e.exception.args[0]) def test_filter_by_sequence_not_none(self): """Ensures that filter works with sequence lookups""" From 65be6850fa174d50d61e22d3372154cac4fb30ae Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 21:59:48 +0100 Subject: [PATCH 036/103] CLN: Remvove unnecessary class variable; use more pythonic method override --- octue/resources/tag.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index a0881b0bf..3b3358a77 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -80,12 +80,9 @@ class TagDict(Serialisable, FilterDict): - - _FILTERABLE_ATTRIBUTE = "data" - def __setitem__(self, tag, value): self._check_tag_format(tag) - self.data[tag] = value + super().__setitem__(tag, value) def update(self, tags, **kwargs): self._check_tag_format(*tags) From 7da5b6439589a405458f1216149f9792686c6f3e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 17 May 2021 22:01:25 +0100 Subject: [PATCH 037/103] CLN: Remove commented-out code --- octue/resources/tag.py | 69 ------------------------------------------ 1 file changed, 69 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 3b3358a77..ba472ef35 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -9,75 +9,6 @@ TAG_NAME_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? other -# elif isinstance(other, Tag): -# return self.name > other.name -# -# def __hash__(self): -# """ Allow Tags to be contained in a set. """ -# return hash(f"{type(self).__name__}{self.name}") -# -# def __contains__(self, item): -# return item in self.name -# -# def __repr__(self): -# return repr(self.name) -# -# def starts_with(self, value): -# """ Does the tag start with the given value? """ -# return self.name.startswith(value) -# -# def ends_with(self, value): -# """ Does the tag end with the given value? """ -# return self.name.endswith(value) -# -# @staticmethod -# def _clean(name): -# """ Ensure the tag name is a string and conforms to the tag regex pattern. """ -# if not isinstance(name, str): -# raise InvalidTagException("Tags must be expressed as a string.") -# -# cleaned_name = name.strip() -# -# if not re.match(TAG_NAME_PATTERN, cleaned_name): -# raise InvalidTagException( -# f"Invalid tag '{cleaned_name}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " -# f"and '-'. They must not start with '-', ':', '/' or '.'" -# ) -# -# return cleaned_name - class TagDict(Serialisable, FilterDict): def __setitem__(self, tag, value): From 51b5be4541d019bae7bebce6de082798aaa8e011 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 17:32:58 +0100 Subject: [PATCH 038/103] DEP: Use latest GCS emulator --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 57d3e2115..734379fc3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ # Testing # ------------------------------------------------------------------------------ pluggy -gcp-storage-emulator>=2021.3.28 +gcp-storage-emulator>=2021.5.5 tox>=3.23.0 # Code quality From 9ed62c75d749e9a765b7edb1fcfce827b64b5cec Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 17:41:57 +0100 Subject: [PATCH 039/103] FIX: Handle timestamps from cloud with/without timezone information --- octue/resources/datafile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 018e577bd..e02208a40 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -201,7 +201,10 @@ def from_cloud( timestamp = kwargs.get("timestamp", custom_metadata.get("timestamp")) if isinstance(timestamp, str): - timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") + try: + timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f") + except ValueError: + timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) From d53f518ff6f508c263859256978b915de75246da Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 19:18:10 +0100 Subject: [PATCH 040/103] IMP: Limit allowed tag name and label patterns --- octue/resources/label.py | 18 +--- octue/resources/tag.py | 2 +- .../fractal/fractal.py | 4 +- .../data/input/manifest.json | 4 +- tests/mixins/test_labellable.py | 10 +-- tests/mixins/test_taggable.py | 23 ++--- tests/resources/test_datafile.py | 2 +- tests/resources/test_dataset.py | 8 +- tests/resources/test_label.py | 89 +++++++++---------- tests/resources/test_tag.py | 8 +- 10 files changed, 66 insertions(+), 102 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index 50d8279dd..78c7fd22e 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -2,15 +2,14 @@ import re from collections import UserString from collections.abc import Iterable -from functools import lru_cache from octue.exceptions import InvalidLabelException from octue.mixins import Filterable -from octue.resources.filter_containers import FilterList, FilterSet +from octue.resources.filter_containers import FilterSet from octue.utils.encoders import OctueJSONEncoder -LABEL_PATTERN = re.compile(r"^$|^[A-Za-z0-9][A-Za-z0-9:.\-/]*(? Date: Tue, 18 May 2021 19:34:28 +0100 Subject: [PATCH 041/103] IMP: Raise error if non-Filterables are put into filter containers --- octue/resources/filter_containers.py | 7 +++++++ tests/resources/test_filter_containers.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 85a984a55..c43ce11ca 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,6 +1,7 @@ from collections import UserDict from octue import exceptions +from octue.mixins import Filterable from octue.utils.objects import get_nested_attribute @@ -12,6 +13,9 @@ def _filter(self, ignore_items_without_attribute=True, **kwargs): to filter for :return octue.resources.filter_containers.FilterSet: """ + if any(not isinstance(item, Filterable) for item in self): + raise TypeError(f"All items in a {type(self).__name__} must be of type {Filterable.__name__}.") + raise_error_if_filter_is_invalid = not ignore_items_without_attribute if len(kwargs) == 1: @@ -62,6 +66,9 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): value to filter for :return FilterDict: """ + if any(not isinstance(item, Filterable) for item in self.values()): + raise TypeError(f"All values in a {type(self).__name__} must be of type {Filterable.__name__}.") + raise_error_if_filter_is_invalid = not ignore_items_without_attribute if len(kwargs) == 1: diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index 7d0668627..d4733a2a2 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -17,6 +17,13 @@ def __hash__(self): class TestFilterSet(BaseTestCase): + def test_error_raised_if_any_items_are_not_filterable(self): + """Test that an error is raised if any items in the FilterSet are not of type Filterable.""" + filter_set = FilterSet([1, 2, 3]) + + with self.assertRaises(TypeError): + filter_set.filter(a__equals=2) + def test_filter_with_filterables_of_differing_attributes(self): """Test filtering with filterables of differing attributes ignores the filterables lacking the filtered-for attribute. @@ -93,6 +100,13 @@ def test_instantiate(self): self.assertEqual(filter_dict["a"], 1) self.assertEqual(filter_dict["b"], 3) + def test_error_raised_if_any_items_are_not_filterable(self): + """Test that an error is raised if any values in the FilterDict are not of type Filterable.""" + filter_dict = FilterDict({"a": 1, "b": 2, "c": 3}) + + with self.assertRaises(TypeError): + filter_dict.filter(a__equals=2) + def test_filter(self): """Test that a FilterDict can be filtered on its values when they are all filterables.""" filterables = { From 0f826b4a72d54025d9ff23a99e5e86da9e017dab Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 19:58:39 +0100 Subject: [PATCH 042/103] IMP: Store tags in separate custom metadata fields on GCS --- octue/resources/datafile.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index e02208a40..2b4824cbf 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -21,13 +21,14 @@ TEMPORARY_LOCAL_FILE_CACHE = {} - ID_DEFAULT = None CLUSTER_DEFAULT = 0 SEQUENCE_DEFAULT = None TAGS_DEFAULT = None LABELS_DEFAULT = None +NON_TAG_METADATA = {"id", "timestamp", "cluster", "sequence", "labels"} + class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): """Class for representing data files on the Octue system. @@ -206,10 +207,11 @@ def from_cloud( except ValueError: timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") - tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) - - if isinstance(tags, str): - tags = TagDict.deserialise(tags, from_string=True) + tags = ( + kwargs.pop("tags", None) + or TagDict({tag_name: custom_metadata[tag_name] for tag_name in custom_metadata.keys() - NON_TAG_METADATA}) + or TAGS_DEFAULT + ) datafile._set_id(kwargs.pop("id", custom_metadata.get("id", ID_DEFAULT))) datafile.path = storage.path.generate_gs_path(bucket_name, datafile_path) @@ -505,8 +507,8 @@ def metadata(self): "timestamp": self.timestamp, "cluster": self.cluster, "sequence": self.sequence, - "tags": self.tags.serialise(to_string=True), "labels": self.labels.serialise(to_string=True), + **{tag_name: tag_value for tag_name, tag_value in self.tags.items()}, } From b0b879d65434c19c0d650a99e78f137f828b5489 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 20:09:20 +0100 Subject: [PATCH 043/103] DOC: Fix incorrect/outdated information in docs skip_ci_tests --- docs/source/analysis_objects.rst | 17 ++--------------- docs/source/cloud_storage_advanced_usage.rst | 6 +++--- docs/source/conf.py | 2 +- docs/source/datafile.rst | 15 ++++++++++----- docs/source/dataset.rst | 7 ++++--- 5 files changed, 20 insertions(+), 27 deletions(-) diff --git a/docs/source/analysis_objects.rst b/docs/source/analysis_objects.rst index 60e8105f5..52a9d33f3 100644 --- a/docs/source/analysis_objects.rst +++ b/docs/source/analysis_objects.rst @@ -27,18 +27,5 @@ your app can always be verified. These hashes exist on the following attributes: - ``configuration_values_hash`` - ``configuration_manifest_hash`` -If an input or configuration attribute is ``None``, so will its hash attribute be. For ``Manifests``, some metadata -about the ``Datafiles`` and ``Datasets`` within them, and about the ``Manifest`` itself, is included when calculating -the hash: - -- For a ``Datafile``, the content of its on-disk file is hashed, along with the following metadata: - - - ``name`` - - ``cluster`` - - ``sequence`` - - ``timestamp`` - - ``labels`` - -- For a ``Dataset``, the hashes of its ``Datafiles`` are included, along with its ``labels``. - -- For a ``Manifest``, the hashes of its ``Datasets`` are included, along with its ``keys``. +If a strand is ``None``, so will its corresponding hash attribute be. The hash of a datafile is the hash of +its file, while the hash of a manifest or dataset is the cumulative hash of the files it refers to. diff --git a/docs/source/cloud_storage_advanced_usage.rst b/docs/source/cloud_storage_advanced_usage.rst index ec25e2fb3..a1033df55 100644 --- a/docs/source/cloud_storage_advanced_usage.rst +++ b/docs/source/cloud_storage_advanced_usage.rst @@ -26,14 +26,14 @@ to any of these methods. local_path=, bucket_name=, path_in_bucket=, - metadata={"labels": ["blah", "glah", "jah"], "cleaned": True, "id": 3} + metadata={"id": 3, "labels": ["blah", "glah", "jah"], "cleaned": True, "colour": "blue"} ) storage_client.upload_from_string( string='[{"height": 99, "width": 72}, {"height": 12, "width": 103}]', bucket_name=, path_in_bucket=, - metadata={"labels": ["dimensions"], "cleaned": True, "id": 96} + metadata={"id": 96, "labels": ["dimensions"], "cleaned": True, "colour": "red", "size": "small"} ) **Downloading** @@ -61,7 +61,7 @@ to any of these methods. bucket_name=, path_in_bucket=, ) - >>> {"labels": ["dimensions"], "cleaned": True, "id": 96} + >>> {"id": 96, "labels": ["dimensions"], "cleaned": True, "colour": "red", "size": "small"} **Deleting** diff --git a/docs/source/conf.py b/docs/source/conf.py index 0c2d54be4..e3c7f6af4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -50,7 +50,7 @@ # # The short X.Y version. version = "1.0" -# The full version, including alpha/beta/rc labels. +# The full version, including alpha/beta/rc tags. release = "1.0" # The language for content autogenerated by Sphinx. Refer to documentation diff --git a/docs/source/datafile.rst b/docs/source/datafile.rst index 712df7a53..32703245a 100644 --- a/docs/source/datafile.rst +++ b/docs/source/datafile.rst @@ -10,6 +10,7 @@ the following main attributes: - ``path`` - the path of this file, which may include folders or subfolders, within the dataset. - ``cluster`` - the integer cluster of files, within a dataset, to which this belongs (default 0) - ``sequence`` - a sequence number of this file within its cluster (if sequences are appropriate) +- ``tags`` - key-value pairs of metadata relevant to this file - ``labels`` - a space-separated string or iterable of labels relevant to this file - ``timestamp`` - a posix timestamp associated with the file, in seconds since epoch, typically when it was created but could relate to a relevant time point for the data @@ -50,6 +51,7 @@ Example A datafile.timestamp = new_metadata["timestamp"] datafile.cluster = new_metadata["cluster"] datafile.sequence = new_metadata["sequence"] + datafile.tags = new_metadata["tags"] datafile.labels = new_metadata["labels"] @@ -76,7 +78,8 @@ Example B datafile.timestamp = datetime.now() datafile.cluster = 0 datafile.sequence = 3 - datafile.labels = {"manufacturer:Vestas", "output:1MW"} + datafile.tags = {"manufacturer": "Vestas", "output": "1MW"} + datafile.labels = {"new"} datafile.to_cloud() # Or, datafile.update_cloud_metadata() @@ -122,10 +125,11 @@ For creating new data in a new local file: sequence = 2 - labels = {"cleaned:True", "type:linear"} + tags = {"cleaned": True, "type": "linear"} + labels = {"Vestas"} - with Datafile(path="path/to/local/file.dat", sequence=sequence, labels=labels, mode="w") as datafile, f: + with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels, mode="w") as datafile, f: f.write("This is some cleaned data.") datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") @@ -139,7 +143,8 @@ For existing data in an existing local file: sequence = 2 - labels = {"cleaned:True", "type:linear"} + tags = {"cleaned": True, "type": "linear"} + labels = {"Vestas"} - datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, labels=labels) + datafile = Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels) datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") diff --git a/docs/source/dataset.rst b/docs/source/dataset.rst index a1bf3a660..bdc770168 100644 --- a/docs/source/dataset.rst +++ b/docs/source/dataset.rst @@ -7,6 +7,7 @@ Dataset A ``Dataset`` contains any number of ``Datafiles`` along with the following metadata: - ``name`` +- ``tags`` - ``labels`` The files are stored in a ``FilterSet``, meaning they can be easily filtered according to any attribute of the @@ -23,8 +24,8 @@ You can filter a ``Dataset``'s files as follows: dataset = Dataset( files=[ - Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", labels="one a:2 b:3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", labels="two a:2 b:3 all"), + Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", labels="one a2 b3 all"), + Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", labels="two a2 b3 all"), Datafile(timestamp=time.time(), path="path-within-dataset/another_file.csv", labels="three all"), ] ) @@ -39,7 +40,7 @@ You can also chain filters indefinitely: .. code-block:: python - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("labels__contains", filter_value="a:2") + dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("labels__contains", filter_value="a2") >>> })> Find out more about ``FilterSets`` :doc:`here `, including all the possible filters available for each type of object stored on From a2750a8b53ebd1a0710593ea971076f694546d5a Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 20:19:42 +0100 Subject: [PATCH 044/103] REF: Slightly simplify Taggable and Labelable --- octue/mixins/labelable.py | 10 +++++----- octue/mixins/taggable.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/octue/mixins/labelable.py b/octue/mixins/labelable.py index 8679f56a7..b50500dbd 100644 --- a/octue/mixins/labelable.py +++ b/octue/mixins/labelable.py @@ -2,15 +2,15 @@ class Labelable: - """ A mixin class allowing objects to be labelled. """ + """A mixin class allowing objects to be labelled.""" def __init__(self, *args, labels=None, **kwargs): - self._labels = LabelSet(labels) + self.labels = labels super().__init__(*args, **kwargs) def add_labels(self, *args): - """ Adds one or more new label strings to the object labels. New labels will be cleaned and validated. """ - self._labels.add_labels(*args) + """Add one or more new labels to the object. New labels will be cleaned and validated.""" + self.labels.add_labels(*args) @property def labels(self): @@ -18,5 +18,5 @@ def labels(self): @labels.setter def labels(self, labels): - """ Overwrite any existing label set and assign new label. """ + """Overwrite any existing label set and assign new labels.""" self._labels = LabelSet(labels) diff --git a/octue/mixins/taggable.py b/octue/mixins/taggable.py index 2d43ff956..0ca2d6999 100644 --- a/octue/mixins/taggable.py +++ b/octue/mixins/taggable.py @@ -9,7 +9,7 @@ def __init__(self, *args, tags=None, **kwargs): super().__init__(*args, **kwargs) def add_tags(self, tags=None, **kwargs): - """ Adds one or more new tag strings to the object tags. New tags will be cleaned and validated. """ + """Add one or more new tags to the object. New tags will be cleaned and validated.""" self.tags.update({**(tags or {}), **kwargs}) @property @@ -18,5 +18,5 @@ def tags(self): @tags.setter def tags(self, tags): - """ Overwrite any existing tag set and assign new tag. """ + """Overwrite any existing tags and assign the new ones.""" self._tags = TagDict(tags) From 84f0a038c73944051636a2df38f4769b4670a4e9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 20:24:43 +0100 Subject: [PATCH 045/103] FIX: Make Analysis taggable again --- octue/resources/analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/octue/resources/analysis.py b/octue/resources/analysis.py index c08104610..20c1da3b7 100644 --- a/octue/resources/analysis.py +++ b/octue/resources/analysis.py @@ -2,7 +2,7 @@ import logging from octue.definitions import OUTPUT_STRANDS -from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Serialisable +from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Serialisable, Taggable from octue.resources.manifest import Manifest from octue.utils.encoders import OctueJSONEncoder from octue.utils.folders import get_file_name_from_strand @@ -23,7 +23,7 @@ CLASS_MAP = {"configuration_manifest": Manifest, "input_manifest": Manifest, "output_manifest": Manifest} -class Analysis(Identifiable, Loggable, Serialisable, Labelable): +class Analysis(Identifiable, Loggable, Serialisable, Labelable, Taggable): """Analysis class, holding references to all input and output data ## The Analysis Instance From 0a570b3a8a9523e459970db53f5fc476ecdfb6dd Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 18 May 2021 20:29:51 +0100 Subject: [PATCH 046/103] DOC: Update templates with labels/tags --- .../template-python-fractal/fractal/fractal.py | 18 +++++++++--------- .../templates/template-using-manifests/app.py | 13 +++++-------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/octue/templates/template-python-fractal/fractal/fractal.py b/octue/templates/template-python-fractal/fractal/fractal.py index 37c992379..328099df8 100644 --- a/octue/templates/template-python-fractal/fractal/fractal.py +++ b/octue/templates/template-python-fractal/fractal/fractal.py @@ -41,16 +41,15 @@ def fractal(analysis): "height": analysis.configuration_values["height"], } - # We'll add some labels, which will help to improve searchability and allow - # other apps, reports, users and analyses to automatically find figures and - # use them. + # We'll add some labels and tags, which will help to improve searchability and allow other apps, reports, users and + # analyses to automatically find figures and use them. # - # Get descriptive with labels... they are whitespace-delimited and colons can be - # used to provide sublabels. Labels are case insensitive, and accept a-z, 0-9, - # hyphens and underscores (which can be used literally in search and are also - # used to separate words in natural language search). Other special characters - # will be stripped. - tags = {"contents": "fractal:mandelbrot", "type": "figure:surface"} + # Get descriptive with labels... they are whitespace-delimited. Labels are case insensitive, and accept a-z, 0-9, + # and hyphens which can be used literally in search and are also used to separate words in natural language search). + # Other special characters will be stripped. Tags are key value pairs where the values can be anything but the keys + # only accept a-z, 0-9, and underscores. + labels = {"complex-figure"} + tags = {"contents": "fractal:mandelbrot"} # Get the output dataset which will be used for storing the figure file(s) output_dataset = analysis.output_manifest.get_dataset("fractal_figure_files") @@ -65,6 +64,7 @@ def fractal(analysis): local_path_prefix=output_dataset.path, # TODO set up for the right paths Destination (root of the output dataset folder on the present machine) skip_checks=True, # We haven't created the actual file yet, so it'll definitely fail checks! tags=tags, + labels=labels, ) # Actually write the contents to the file specified by the Datafile diff --git a/octue/templates/template-using-manifests/app.py b/octue/templates/template-using-manifests/app.py index 91255af44..1bb035395 100644 --- a/octue/templates/template-using-manifests/app.py +++ b/octue/templates/template-using-manifests/app.py @@ -63,15 +63,12 @@ def run(analysis, *args, **kwargs): # course, because we haven't done the processing yet)... output_dataset = analysis.output_manifest.get_dataset("cleaned_met_mast_data") - # We'll add labels to the output dataset, which will help to improve searchability and allow - # other apps, reports, users and analyses to automatically find figures and - # use them. + # We'll add some labels, which will help to improve searchability and allow other apps, reports, users and + # analyses to automatically find figures and use them. # - # Get descriptive with labels... they are whitespace-delimited and colons can be - # used to provide sublabels. Labels are case insensitive, and accept a-z, 0-9, - # hyphens and underscores (which can be used literally in search and are also - # used to separate words in natural language search). Other special characters - # will be stripped. + # Get descriptive with labels... they are whitespace-delimited. Labels are case insensitive, and accept a-z, 0-9, + # and hyphens which can be used literally in search and are also used to separate words in natural language search). + # Other special characters will be stripped. output_dataset.labels = "met mast cleaned" # Create a Datafile to hold the concatenated, cleaned output data. We could put it in the current directory From 2aa0ac31775386dbbbe7c1b617fae5a13d3c0fb1 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 13:21:18 +0100 Subject: [PATCH 047/103] REF: Simplify Datafile.metadata method --- octue/resources/datafile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 2b4824cbf..74e0835c9 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -508,7 +508,7 @@ def metadata(self): "cluster": self.cluster, "sequence": self.sequence, "labels": self.labels.serialise(to_string=True), - **{tag_name: tag_value for tag_name, tag_value in self.tags.items()}, + **self.tags, } From ebc11b54e509e2c49311badbbb43795573b54c5a Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 13:25:25 +0100 Subject: [PATCH 048/103] TST: Test that datafile tags are stored as separate pieces of custom metadata --- tests/resources/test_datafile.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 406e0cf5b..bacdc1553 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -202,6 +202,7 @@ def test_from_cloud_with_datafile(self): cluster=0, sequence=1, labels={"blah-shah-nah", "blib", "glib"}, + tags={"good": True, "how_good": "very"}, ) downloaded_datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) @@ -212,6 +213,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) + self.assertEqual(downloaded_datafile.tags, {"good": "True", "how_good": "very"}) self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -235,6 +237,17 @@ def test_from_cloud_with_overwrite(self): self.assertEqual(downloaded_datafile.id, new_id) self.assertNotEqual(datafile.id, downloaded_datafile.id) + def test_each_tag_is_stored_as_custom_metadata_entry_in_cloud(self): + """Test that each tag on a datafile is stored as a separate piece of custom metadata on the Google Cloud + Storage file.""" + datafile, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud( + tags={"good": True, "how_good": "very"}, + ) + + datafile.get_cloud_metadata() + self.assertEqual(datafile._cloud_metadata["custom_metadata"]["good"], "True") + self.assertEqual(datafile._cloud_metadata["custom_metadata"]["how_good"], "very") + def test_from_cloud_with_overwrite_when_disallowed_results_in_error(self): """Test that attempting to overwrite the attributes of a datafile instantiated from the cloud when not allowed results in an error. From 19e0864d22446bcf589ecc3cc2a09d0319b3de81 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 13:34:02 +0100 Subject: [PATCH 049/103] REV: Remove Dataset.get_file_by_tag method --- octue/resources/dataset.py | 19 ------------------- tests/resources/test_dataset.py | 25 ------------------------- 2 files changed, 44 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index a8a7e0545..dd19fa2de 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -228,22 +228,3 @@ def get_file_by_label(self, label_string): raise UnexpectedNumberOfResultsException(f"No files found with label {label_string!r}.") return results.pop() - - def get_file_by_tag(self, tag_name): - """Get a single datafile from a dataset by searching for files with the provided tag name. - - Gets exclusively one file; if no file or more than one file is found, an error is raised. - - :param str tag_name: - :return octue.resources.datafile.DataFile: - """ - results = self.files.filter(tags__contains=tag_name) - - if len(results) > 1: - raise UnexpectedNumberOfResultsException( - f"More than one result found when searching for a file by tag {tag_name!r}." - ) - elif len(results) == 0: - raise UnexpectedNumberOfResultsException(f"No files found with tag {tag_name!r}.") - - return results.pop() diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index f2f274f1f..087c256b8 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -179,31 +179,6 @@ def test_get_file_by_label(self): self.assertIn("No files found with label", e.exception.args[0]) - def test_get_file_by_tag(self): - """Ensure files can be accessed by tag from the dataset.""" - files = [ - Datafile(path="path-within-dataset/my_file.csv", tags={"one": False, "a": 2, "b": 3, "all": True}), - Datafile(path="path-within-dataset/your_file.csv", tags={"two": True, "a": 2, "b": 3, "all": True}), - Datafile(path="path-within-dataset/our_file.csv", tags={"three": False, "all": True}), - ] - - dataset = Dataset(files=files) - - # Check working for single result. - self.assertIs(dataset.get_file_by_tag("three"), files[2]) - - # Check raises for too many results. - with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - dataset.get_file_by_tag("all") - - self.assertIn("More than one result found", e.exception.args[0]) - - # Check raises for no result. - with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: - dataset.get_file_by_tag("billyjeanisnotmylover") - - self.assertIn("No files found with tag", e.exception.args[0]) - def test_filter_by_sequence_not_none(self): """Ensures that filter works with sequence lookups""" resource = Dataset( From de967dd028e8150d420257473220e9207bdb743e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 14:31:56 +0100 Subject: [PATCH 050/103] DOC: Update docstrings and error messages --- octue/resources/filter_containers.py | 2 +- octue/resources/label.py | 20 +++++++++----------- octue/resources/tag.py | 23 ++++++++++++++++++++--- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index c43ce11ca..28c2148b4 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -9,7 +9,7 @@ def _filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables to which the given filter criteria apply. :param bool ignore_items_without_attribute: - :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whos value is the value + :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the value to filter for :return octue.resources.filter_containers.FilterSet: """ diff --git a/octue/resources/label.py b/octue/resources/label.py index 78c7fd22e..0c6911973 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -13,11 +13,10 @@ class Label(Filterable, UserString): - """A label starts and ends with a character in [A-Za-z0-9]. It can contain the colon discriminator, forward slashes - or hyphens. Empty strings are also valid. More valid examples: - system:32 - angry-marmaduke - mega-man:torso:component:12 + """A label starts and ends with a character in [A-Za-z0-9] and can contain hyphens e.g. angry-marmaduke + + :param str name: + :return None: """ def __init__(self, name): @@ -29,7 +28,7 @@ def serialise(self): @staticmethod def _clean(name): - """ Ensure the label name is a string and conforms to the label regex pattern. """ + """Ensure the label name is a string and conforms to the label regex pattern.""" if not isinstance(name, str): raise InvalidLabelException("Labels must be expressed as a string.") @@ -37,18 +36,17 @@ def _clean(name): if not re.match(LABEL_PATTERN, cleaned_name): raise InvalidLabelException( - f"Invalid label '{cleaned_name}'. Labels must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " - f"and '-'. They must not start with '-', ':', '/' or '.'" + f"Invalid label '{cleaned_name}'. Labels must contain only characters 'a-z', 'A-Z', '0-9', and '-'. " + f"They must not start with '-'." ) return cleaned_name class LabelSet(FilterSet): - """ Class to handle a set of labels as a string. """ + """Class to handle a set of labels as a string.""" def __init__(self, labels=None): - """ Construct a LabelSet. """ # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase labels = labels or FilterSet() @@ -112,7 +110,7 @@ def serialise(self, to_string=False, **kwargs): :param bool to_string: :return list|str: """ - string = json.dumps(sorted(self), cls=OctueJSONEncoder, sort_keys=True, indent=4, **kwargs) + string = json.dumps(sorted(self), cls=OctueJSONEncoder, indent=4, **kwargs) if to_string: return string diff --git a/octue/resources/tag.py b/octue/resources/tag.py index a8bde73db..9be2684d2 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -12,23 +12,40 @@ class TagDict(Serialisable, FilterDict): def __setitem__(self, tag, value): + """Add a tag to the TagDict via subscription. + + :param str tag: + :param any value: + :return None: + """ self._check_tag_format(tag) super().__setitem__(tag, value) def update(self, tags, **kwargs): + """Add multiple tags to the TagDict from another dictionary or as keyword arguments. + + :param dict|TagDict tags: + :param **kwargs: {str: any} pairs + :return None: + """ self._check_tag_format(*tags) super().update(tags, **kwargs) def _check_tag_format(self, *tags): + """Check if each tag conforms to the tag name pattern. + + :param tags: any number of str items + :return: + """ for tag in tags: if not re.match(TAG_NAME_PATTERN, tag): raise InvalidTagException( - f"Invalid tag '{tag}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', ':', '.', '/' " - f"and '-'. They must not start with '-', ':', '/' or '.'" + f"Invalid tag '{tag}'. Tags must contain only characters 'a-z', 'A-Z', '0-9', and '_'. They must " + f"not start with '_'." ) def serialise(self, to_string=False, **kwargs): - """Serialise a TagDict to a JSON dictionary or a string version of one. + """Serialise a TagDict to a JSON dictionary or string. :param bool to_string: :return str|dict: From d04f57981fd7d3016db362d9a573f7eba902668c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 14:35:58 +0100 Subject: [PATCH 051/103] REV: Unbase TagDict from FilterDict --- octue/resources/tag.py | 4 ++-- tests/resources/test_tag.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 9be2684d2..f342f4e4f 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,16 +1,16 @@ import json import re +from collections import UserDict from octue.exceptions import InvalidTagException from octue.mixins import Serialisable -from octue.resources.filter_containers import FilterDict from octue.utils.encoders import OctueJSONEncoder TAG_NAME_PATTERN = re.compile(r"^[a-z0-9][a-z0-9_]*(? Date: Fri, 21 May 2021 14:42:34 +0100 Subject: [PATCH 052/103] REV: Unbase Label from Filterable and LabelSet from FilterSet The LabelSet doesn't need to be filtered itself. --- octue/resources/label.py | 5 ++--- tests/resources/test_label.py | 21 --------------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index 0c6911973..782d240b7 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -4,7 +4,6 @@ from collections.abc import Iterable from octue.exceptions import InvalidLabelException -from octue.mixins import Filterable from octue.resources.filter_containers import FilterSet from octue.utils.encoders import OctueJSONEncoder @@ -12,7 +11,7 @@ LABEL_PATTERN = re.compile(r"^[a-z0-9][a-z0-9-]*(? Date: Fri, 21 May 2021 14:47:00 +0100 Subject: [PATCH 053/103] TST: Remove unneeded base class from test class --- tests/mixins/test_labellable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mixins/test_labellable.py b/tests/mixins/test_labellable.py index 624219f03..a616101d6 100644 --- a/tests/mixins/test_labellable.py +++ b/tests/mixins/test_labellable.py @@ -1,10 +1,10 @@ from octue import exceptions -from octue.mixins import Labelable, MixinBase +from octue.mixins import Labelable from octue.resources.label import Label, LabelSet from ..base import BaseTestCase -class MyLabelable(Labelable, MixinBase): +class MyLabelable(Labelable): pass From fbc058aab9bfe62187b181a71c85bb6abfc60302 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 14:49:30 +0100 Subject: [PATCH 054/103] TST: Remove unnecessary casting to set --- tests/mixins/test_labellable.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/mixins/test_labellable.py b/tests/mixins/test_labellable.py index a616101d6..0a6260051 100644 --- a/tests/mixins/test_labellable.py +++ b/tests/mixins/test_labellable.py @@ -22,7 +22,7 @@ def test_instantiates_with_labels(self): self.assertEqual(len(labelable.labels), 0) labelable = MyLabelable(labels="a b c") - self.assertEqual(set(labelable.labels), {Label("a"), Label("b"), Label("c")}) + self.assertEqual(labelable.labels, {Label("a"), Label("b"), Label("c")}) def test_instantiates_with_label_set(self): """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" @@ -49,7 +49,7 @@ def test_reset_labels(self): """Ensures datafile inherits correctly from the Labelable class and passes arguments through""" labelable = MyLabelable(labels="a b") labelable.labels = "b c" - self.assertEqual(set(labelable.labels), {Label("b"), Label("c")}) + self.assertEqual(labelable.labels, {Label("b"), Label("c")}) def test_valid_labels(self): """Ensures valid labels do not raise an error""" @@ -59,7 +59,7 @@ def test_valid_labels(self): labelable.add_labels("a1829tag") labelable.add_labels("1829") self.assertEqual( - set(labelable.labels), + labelable.labels, { Label("a-valid-label"), Label("label"), @@ -78,4 +78,4 @@ def test_mixture_valid_invalid(self): except exceptions.InvalidLabelException: pass - self.assertEqual({Label("first-valid-should-be-added")}, set(labelable.labels)) + self.assertEqual({Label("first-valid-should-be-added")}, labelable.labels) From ab893a23090e9ee3b8673743bd06335d866657b9 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 14:58:58 +0100 Subject: [PATCH 055/103] TST: Add wrongly-removed test back in --- tests/resources/test_dataset.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 087c256b8..9b4c8ffa8 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -87,6 +87,20 @@ class NotADatafile: self.assertIn("must be of class Datafile to add it to a Dataset", e.exception.args[0]) + def test_filter_catches_single_underscore_mistake(self): + """Ensure that if the filter name contains only single underscores, an error is raised.""" + resource = Dataset( + files=[ + Datafile(path="path-within-dataset/A_Test_file.csv"), + Datafile(path="path-within-dataset/a_test_file.txt"), + ] + ) + + with self.assertRaises(exceptions.InvalidInputException) as e: + resource.files.filter(name_icontains="Test") + + self.assertIn("Invalid filter name 'name_icontains'. Filter names should be in the form", e.exception.args[0]) + def test_filter_name_contains(self): """Ensures that filter works with the name_contains and name_icontains lookups""" resource = Dataset( From 1cd5d779813c2b21995fba790a8d4d826ed932b4 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 14:59:24 +0100 Subject: [PATCH 056/103] DOC: Fix error string --- octue/mixins/filterable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index e392ba8be..c4c1a28b9 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -113,7 +113,7 @@ def _split_filter_name(self, filter_name): if not attribute_names: raise exceptions.InvalidInputException( f"Invalid filter name {filter_name!r}. Filter names should be in the form " - f"'____<...>__' with at least one attribute name" + f"'____<...>__' with at least one attribute name " f"included." ) From bf6dec7ab6963b9824329c1543fd2be7d7d2f63b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 15:06:39 +0100 Subject: [PATCH 057/103] TST: Test failing of filtering Filterables with differing attributes; simplify tests --- tests/resources/test_filter_containers.py | 63 ++++++++++++++++------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index d4733a2a2..9c859ae9e 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -29,11 +29,21 @@ def test_filter_with_filterables_of_differing_attributes(self): attribute. """ filterables = {FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)} - filter_set = FilterSet(filterables) - self.assertEqual(set(filter_set.filter(a__gt=2)), {FilterableThing(a=3), FilterableThing(a=77)}) - self.assertEqual(set(filter_set.filter(b__equals=90)), {FilterableThing(b=90)}) - self.assertEqual(set(filter_set.filter(b__equals=0)), set()) + + self.assertEqual(filter_set.filter(a__gt=2), {FilterableThing(a=3), FilterableThing(a=77)}) + self.assertEqual(filter_set.filter(b__equals=90), {FilterableThing(b=90)}) + self.assertEqual(filter_set.filter(b__equals=0), set()) + + def test_filter_with_filterables_of_differing_attributes_fails_if_setting_enabled(self): + """Test filtering with filterables of differing attributes raises an error if any filterables lack the + filtered-for attribute when `ignore_items_without_attribute` is False. + """ + filter_set = FilterSet({FilterableThing(a=3), FilterableThing(b=90), FilterableThing(a=77)}) + + for kwarg in {"a__gt": 2}, {"b__equals": 90}, {"b__equals": 0}: + with self.assertRaises(AttributeError): + filter_set.filter(**kwarg, ignore_items_without_attribute=False) def test_filtering_with_multiple_filters(self): """Test that multiple filters can be specified in FilterSet.filter at once.""" @@ -89,26 +99,23 @@ class TestFilterDict(BaseTestCase): def test_instantiate(self): """Test that a FilterDict can be instantiated like a dictionary.""" filter_dict = FilterDict(a=1, b=3) - self.assertEqual(filter_dict["a"], 1) - self.assertEqual(filter_dict["b"], 3) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) filter_dict = FilterDict({"a": 1, "b": 3}) - self.assertEqual(filter_dict["a"], 1) - self.assertEqual(filter_dict["b"], 3) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) filter_dict = FilterDict(**{"a": 1, "b": 3}) - self.assertEqual(filter_dict["a"], 1) - self.assertEqual(filter_dict["b"], 3) + self.assertEqual(filter_dict, {"a": 1, "b": 3}) - def test_error_raised_if_any_items_are_not_filterable(self): + def test_error_raised_when_filtering_if_any_items_are_not_filterable(self): """Test that an error is raised if any values in the FilterDict are not of type Filterable.""" filter_dict = FilterDict({"a": 1, "b": 2, "c": 3}) with self.assertRaises(TypeError): - filter_dict.filter(a__equals=2) + filter_dict.filter(my_attribute__equals=2) def test_filter(self): - """Test that a FilterDict can be filtered on its values when they are all filterables.""" + """Test that a FilterDict can be filtered on its values when they are all Filterables.""" filterables = { "first-filterable": FilterableThing(my_value=3), "second-filterable": FilterableThing(my_value=90), @@ -129,9 +136,25 @@ def test_filter_with_filterables_of_differing_attributes(self): } filter_dict = FilterDict(filterables) - self.assertEqual(set(filter_dict.filter(a__gt=2).keys()), {"first-filterable", "third-filterable"}) - self.assertEqual(set(filter_dict.filter(b__equals=90).keys()), {"second-filterable"}) - self.assertEqual(set(filter_dict.filter(b__equals=0).keys()), set()) + self.assertEqual(filter_dict.filter(a__gt=2).keys(), {"first-filterable", "third-filterable"}) + self.assertEqual(filter_dict.filter(b__equals=90).keys(), {"second-filterable"}) + self.assertEqual(filter_dict.filter(b__equals=0).keys(), set()) + + def test_filter_with_filterables_of_differing_attributes_fails_if_setting_enabled(self): + """Test filtering with filterables of differing attributes raises an error if any filterables lack the + filtered-for attribute when `ignore_items_without_attribute` is False. + """ + filterables = { + "first-filterable": FilterableThing(a=3), + "second-filterable": FilterableThing(b=90), + "third-filterable": FilterableThing(a=77), + } + + filter_dict = FilterDict(filterables) + + for kwarg in {"a__gt": 2}, {"b__equals": 90}, {"b__equals": 0}: + with self.assertRaises(AttributeError): + filter_dict.filter(**kwarg, ignore_items_without_attribute=False) def test_filter_chaining(self): """Test that filters can be chained to filter a FilterDict multiple times.""" @@ -146,26 +169,26 @@ def test_filtering_with_multiple_filters(self): self.assertEqual(self.ANIMALS.filter(size__equals="small", age__lt=5), {"cat": self.ANIMALS["cat"]}) def test_ordering_by_a_non_existent_attribute(self): - """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ + """Ensure an error is raised if ordering is attempted by a non-existent attribute.""" with self.assertRaises(exceptions.InvalidInputException): self.ANIMALS.order_by("dog-likeness") def test_order_by_with_string_attribute(self): - """ Test ordering a FilterSet by a string attribute returns an appropriately ordered FilterList. """ + """Test that ordering a FilterDict by a string attribute returns an appropriately ordered FilterList.""" self.assertEqual( self.ANIMALS.order_by("name"), FilterList((self.ANIMALS["cat"], self.ANIMALS["another_dog"], self.ANIMALS["dog"])), ) def test_order_by_with_int_attribute(self): - """ Test ordering a FilterSet by an integer attribute returns an appropriately ordered FilterList. """ + """ Test ordering a FilterDict by an integer attribute returns an appropriately ordered FilterList. """ self.assertEqual( self.ANIMALS.order_by("age"), FilterList((self.ANIMALS["cat"], self.ANIMALS["dog"], self.ANIMALS["another_dog"])), ) def test_order_by_list_attribute(self): - """ Test that ordering by list attributes orders alphabetically by the first element of each list. """ + """Test that ordering by list attributes orders members alphabetically by the first element of each list.""" self.assertEqual( self.ANIMALS.order_by("previous_names"), FilterList((self.ANIMALS["dog"], self.ANIMALS["another_dog"], self.ANIMALS["cat"])), From ad7afa53288fc26b294ea7640275814777d7649a Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 15:24:02 +0100 Subject: [PATCH 058/103] TST: Simplify label tests --- tests/resources/test_label.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index 378081f46..ab03bed7e 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -73,27 +73,27 @@ class TestLabelSet(BaseTestCase): def test_instantiation_from_space_delimited_string(self): """ Test that a LabelSet can be instantiated from a space-delimited string of label names.""" label_set = LabelSet(labels="a b-c d-e-f") - self.assertEqual(label_set, FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) def test_instantiation_from_iterable_of_strings(self): """ Test that a LabelSet can be instantiated from an iterable of strings.""" label_set = LabelSet(labels=["a", "b-c", "d-e-f"]) - self.assertEqual(label_set, FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) def test_instantiation_from_iterable_of_labels(self): """ Test that a LabelSet can be instantiated from an iterable of labels.""" label_set = LabelSet(labels=[Label("a"), Label("b-c"), Label("d-e-f")]) - self.assertEqual(label_set, FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) def test_instantiation_from_filter_set_of_strings(self): """ Test that a LabelSet can be instantiated from a FilterSet of strings.""" label_set = LabelSet(labels=FilterSet({"a", "b-c", "d-e-f"})) - self.assertEqual(label_set, FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) def test_instantiation_from_filter_set_of_labels(self): """ Test that a LabelSet can be instantiated from a FilterSet of labels.""" label_set = LabelSet(labels=FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) - self.assertEqual(label_set, FilterSet({Label("a"), Label("b-c"), Label("d-e-f")})) + self.assertEqual(label_set, self.LABEL_SET) def test_instantiation_from_label_set(self): """ Test that a LabelSet can be instantiated from another LabelSet. """ @@ -150,20 +150,12 @@ def test_any_label_ends_swith(self): for label in "b", "d", "e": self.assertFalse(self.LABEL_SET.any_label_ends_with(label)) - def test_any_label_contains_searches_for_labels_and_sublabels(self): - """ Ensure labels and sublabels can be searched for. """ - for label in "a", "b", "d": - self.assertTrue(self.LABEL_SET.any_label_contains(label)) - - for sublabel in "c", "e", "f": - self.assertTrue(self.LABEL_SET.any_label_contains(sublabel)) - def test_serialise(self): - """ Ensure that LabelSets are serialised to the string form of a list. """ + """Ensure that LabelSets serialise to a list.""" self.assertEqual(self.LABEL_SET.serialise(), ["a", "b-c", "d-e-f"]) def test_serialise_orders_labels(self): - """Ensure that LabelSets serialise to a list.""" + """Ensure that serialising a LabelSet results in a sorted list.""" label_set = LabelSet("z hello a c-no") self.assertEqual(label_set.serialise(), ["a", "c-no", "hello", "z"]) From 9b0725a4c0d1235ee1f78a877b3a3192a78b9416 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 15:29:17 +0100 Subject: [PATCH 059/103] TST: Add tags to datasets in manifest tests --- tests/resources/test_manifest.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index e6b394da0..dc36e0352 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -70,8 +70,8 @@ def test_to_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, labels={"hello"}), - Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) @@ -109,8 +109,8 @@ def test_to_cloud_without_storing_datasets(self): name="my-dataset", path=temporary_directory, files={ - Datafile(path=file_0_path, sequence=0, labels={"hello"}), - Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) @@ -148,8 +148,8 @@ def test_from_cloud(self): dataset = Dataset( name="my-dataset", files={ - Datafile(path=file_0_path, sequence=0, labels={"hello"}), - Datafile(path=file_1_path, sequence=1, labels={"goodbye"}), + Datafile(path=file_0_path, sequence=0, tags={"a": 1, "b": 2}, labels={"hello"}), + Datafile(path=file_1_path, sequence=1, tags={"a": 1, "b": 2}, labels={"goodbye"}), }, ) From 7e16fe8e79f175ddb5d9c7c3e956af5933eeca9c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 15:33:55 +0100 Subject: [PATCH 060/103] TST: Improve and simplify some more tests --- tests/mixins/test_taggable.py | 3 ++- tests/resources/test_datafile.py | 2 ++ tests/resources/test_tag.py | 11 ++++------- tests/utils/test_objects.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/mixins/test_taggable.py b/tests/mixins/test_taggable.py index 9d2efc585..48b23f4a7 100644 --- a/tests/mixins/test_taggable.py +++ b/tests/mixins/test_taggable.py @@ -15,6 +15,7 @@ def test_instantiates(self): self.assertEqual(taggable.tags, {}) def test_instantiating_with_no_tags(self): + """Test that instantiating a Taggable with no tags results in an empty TagDict on the tags attribute.""" self.assertEqual(MyTaggable().tags, TagDict()) def test_fails_to_instantiates_with_non_iterable(self): @@ -27,7 +28,7 @@ class NoIter: MyTaggable(tags=NoIter()) def test_instantiates_with_dict(self): - """Test instantiation with a dictionary.""" + """Test instantiation with a dictionary works.""" tags = {"height": 9, "width": 8.7, "depth": 100} taggable = MyTaggable(tags=tags) self.assertEqual(taggable.tags, tags) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index bacdc1553..90b42f358 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -11,6 +11,7 @@ from octue.mixins import MixinBase, Pathable from octue.resources.datafile import TEMPORARY_LOCAL_FILE_CACHE, Datafile from octue.resources.label import LabelSet +from octue.resources.tag import TagDict from tests import TEST_BUCKET_NAME, TEST_PROJECT_NAME from ..base import BaseTestCase @@ -190,6 +191,7 @@ def test_from_cloud_with_bare_file(self): self.assertEqual(datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}") self.assertEqual(datafile.cluster, 0) self.assertEqual(datafile.sequence, None) + self.assertEqual(datafile.tags, TagDict()) self.assertEqual(datafile.labels, LabelSet()) self.assertTrue(isinstance(datafile.size_bytes, int)) self.assertTrue(isinstance(datafile._last_modified, float)) diff --git a/tests/resources/test_tag.py b/tests/resources/test_tag.py index 496ab8dbd..7d6fd2442 100644 --- a/tests/resources/test_tag.py +++ b/tests/resources/test_tag.py @@ -8,14 +8,12 @@ class TestTagDict(TestCase): def test_instantiate_from_dict(self): """Test that a TagDict can be instantiated from a dictionary.""" tag_dict = TagDict({"a": 1, "b": 2}) - self.assertEqual(tag_dict["a"], 1) - self.assertEqual(tag_dict["b"], 2) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) def test_instantiate_from_kwargs(self): """Test that a TagDict can be instantiated from kwargs.""" tag_dict = TagDict(**{"a": 1, "b": 2}) - self.assertEqual(tag_dict["a"], 1) - self.assertEqual(tag_dict["b"], 2) + self.assertEqual(tag_dict, {"a": 1, "b": 2}) def test_instantiation_fails_if_tag_name_fails_validation(self): """Test that TagDict instantiation fails if any keys don't conform to the tag name pattern.""" @@ -35,8 +33,7 @@ def test_update(self): """Test that TagDicts can be updated with tags with valid names.""" tag_dict = TagDict({"a": 1, "b": 2}) tag_dict.update({"c": 3, "d": 4}) - self.assertEqual(tag_dict["c"], 3) - self.assertEqual(tag_dict["d"], 4) + self.assertEqual(tag_dict, {"a": 1, "b": 2, "c": 3, "d": 4}) def test_setitem_fails_if_tag_name_fails_validation(self): """Test that setting an item on a TagDict fails if the name fails validation.""" @@ -49,7 +46,7 @@ def test_setitem(self): """Test setting an item on a TagDict.""" tag_dict = TagDict() tag_dict["hello"] = 9 - self.assertEqual(tag_dict["hello"], 9) + self.assertEqual(tag_dict, {"hello": 9}) def test_equality_to_dict(self): """Test that TagDicts compare equal to dictionaries with the same contents.""" diff --git a/tests/utils/test_objects.py b/tests/utils/test_objects.py index 6c7c48db4..ab077db25 100644 --- a/tests/utils/test_objects.py +++ b/tests/utils/test_objects.py @@ -11,7 +11,7 @@ def test_getattr_or_subscribe_with_dictionary(self): def test_getattr_or_subscribe_with_object(self): """Test that the getattr_or_subscribe function can get attribute values from a class instance.""" - self.assertEqual(getattr_or_subscribe(instance=Mock(a=3), name="a"), 3) + self.assertEqual(getattr_or_subscribe(instance=Mock(hello="world"), name="hello"), "world") def test_get_nested_attribute(self): """Test that nested attributes can be accessed.""" From 19c1891facaf07131675705632e3cd66e0b32532 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 15:43:51 +0100 Subject: [PATCH 061/103] TST: Test uncovered areas --- tests/mixins/test_filterable.py | 5 +++++ tests/mixins/test_serialisable.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 5c1b60140..27809ce76 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -36,6 +36,11 @@ def test_error_raised_when_attribute_type_has_no_filters_defined(self): with self.assertRaises(exceptions.InvalidInputException): FilterableSubclass(age=lambda: None).satisfies(age__equals=True) + def test_error_raised_if_more_than_one_filter_is_provided(self): + """Test that an error is raised if more than one filter is provided to the satisfies method.""" + with self.assertRaises(ValueError): + FilterableSubclass(age=23).satisfies(age__equals=23, age__equals__gt=20) + def test_bool_filters(self): """ Test that the boolean filters work as expected. """ filterable_thing = FilterableSubclass(is_alive=True) diff --git a/tests/mixins/test_serialisable.py b/tests/mixins/test_serialisable.py index 7d844a362..96d29ec89 100644 --- a/tests/mixins/test_serialisable.py +++ b/tests/mixins/test_serialisable.py @@ -25,6 +25,16 @@ def test_instantiates_with_no_args(self): """Ensures the class instantiates without arguments""" Serialisable() + def test_logger_attribute_excluded_even_when_not_explicitly_excluded(self): + """Test that a Serialisable with "logger" not explicitly included in the `_EXCLUDE_SERIALISE_FIELDS` class + variable still excludes the `logger` attribute. + """ + + class SerialisableWithLoggerNotExplicitlyExcluded(Serialisable): + _EXCLUDE_SERIALISE_FIELDS = [] + + self.assertTrue("logger" in SerialisableWithLoggerNotExplicitlyExcluded()._EXCLUDE_SERIALISE_FIELDS) + def test_returns_primitive_without_logger_or_protected_fields(self): """Ensures class instantiates with a UUID()""" resource = Inherit() From 00912a1181e93a9aa81c0f96107908b3d1980d22 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Fri, 21 May 2021 17:00:47 +0100 Subject: [PATCH 062/103] IMP: Use new format for manifests' datasets in twine.json files --- octue/resources/manifest.py | 6 ++-- .../template-python-fractal/twine.json | 14 ++++++---- .../template-using-manifests/twine.json | 28 +++++++++++-------- tests/test_runner.py | 22 ++++++++------- 4 files changed, 39 insertions(+), 31 deletions(-) diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index c1735e0ab..bfe39dec6 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -138,12 +138,12 @@ def prepare(self, data): if len(self.datasets) > 0: raise InvalidInputException("You cannot `prepare()` a manifest already instantiated with datasets") - for idx, dataset_spec in enumerate(data): + for index, dataset_specification in enumerate(data["datasets"]): - self.keys[dataset_spec["key"]] = idx + self.keys[dataset_specification["key"]] = index # TODO generate a unique name based on the filter key, label datasets so that the label filters in the spec # apply automatically and generate a description of the dataset - self.datasets.append(Dataset(logger=self.logger, path_from=self, path=dataset_spec["key"])) + self.datasets.append(Dataset(logger=self.logger, path_from=self, path=dataset_specification["key"])) return self diff --git a/octue/templates/template-python-fractal/twine.json b/octue/templates/template-python-fractal/twine.json index 33a3b2856..03c53ae42 100644 --- a/octue/templates/template-python-fractal/twine.json +++ b/octue/templates/template-python-fractal/twine.json @@ -60,10 +60,12 @@ } } }, - "output_manifest": [ - { - "key": "fractal_figure_files", - "purpose": "A dataset containing .json files containing the output figures" - } - ] + "output_manifest": { + "datasets": [ + { + "key": "fractal_figure_files", + "purpose": "A dataset containing .json files containing the output figures" + } + ] + } } diff --git a/octue/templates/template-using-manifests/twine.json b/octue/templates/template-using-manifests/twine.json index 5b0ec4e17..3ce7b182c 100644 --- a/octue/templates/template-using-manifests/twine.json +++ b/octue/templates/template-using-manifests/twine.json @@ -12,16 +12,20 @@ } } }, - "input_manifest": [ - { - "key": "raw_met_mast_data", - "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up" - } - ], - "output_manifest": [ - { - "key": "cleaned_met_mast_data", - "purpose": "A dataset containing .csv files of cleaned meteorological mast data" - } - ] + "input_manifest": { + "datasets": [ + { + "key": "raw_met_mast_data", + "purpose": "A dataset containing .csv files of raw meteorological mast data which we need to clean up" + } + ] + }, + "output_manifest": { + "datasets": [ + { + "key": "cleaned_met_mast_data", + "purpose": "A dataset containing .csv files of cleaned meteorological mast data" + } + ] + } } diff --git a/tests/test_runner.py b/tests/test_runner.py index a7ee96b17..f74fd1e18 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -127,16 +127,18 @@ def test_output_manifest_is_not_none(self): app_src=mock_app, twine=""" { - "output_manifest": [ - { - "key": "open_foam_result", - "purpose": "A dataset containing solution fields of an openfoam case." - }, - { - "key": "airfoil_cp_values", - "purpose": "A file containing cp values" - } - ] + "output_manifest": { + "datasets": [ + { + "key": "open_foam_result", + "purpose": "A dataset containing solution fields of an openfoam case." + }, + { + "key": "airfoil_cp_values", + "purpose": "A file containing cp values" + } + ] + } } """, ) From e17be0c680c588edbd2cdd8bb494a0d891ee09ce Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 12:47:39 +0100 Subject: [PATCH 063/103] IMP: Prefix GCS custom metadata fields with "octue__" --- octue/resources/datafile.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index e839eb5b1..7d438a60f 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -19,6 +19,7 @@ TEMPORARY_LOCAL_FILE_CACHE = {} +OCTUE_METADATA_NAMESPACE = "octue" ID_DEFAULT = None @@ -191,18 +192,22 @@ def from_cloud( if not allow_overwrite: cls._check_for_attribute_conflict(custom_metadata, **kwargs) - timestamp = kwargs.get("timestamp", custom_metadata.get("timestamp")) + timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) if isinstance(timestamp, str): timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") - datafile._set_id(kwargs.pop("id", custom_metadata.get("id", ID_DEFAULT))) + datafile._set_id(kwargs.pop("id", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__id", ID_DEFAULT))) datafile.path = storage.path.generate_gs_path(bucket_name, datafile_path) datafile.timestamp = timestamp datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) - datafile.cluster = kwargs.pop("cluster", custom_metadata.get("cluster", CLUSTER_DEFAULT)) - datafile.sequence = kwargs.pop("sequence", custom_metadata.get("sequence", SEQUENCE_DEFAULT)) - datafile.tags = kwargs.pop("tags", custom_metadata.get("tags", TAGS_DEFAULT)) + datafile.cluster = kwargs.pop( + "cluster", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__cluster", CLUSTER_DEFAULT) + ) + datafile.sequence = kwargs.pop( + "sequence", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__sequence", SEQUENCE_DEFAULT) + ) + datafile.tags = kwargs.pop("tags", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__tags", TAGS_DEFAULT)) datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} return datafile @@ -254,11 +259,15 @@ def get_cloud_metadata(self, project_name=None, bucket_name=None, path_in_bucket custom_metadata = cloud_metadata["custom_metadata"] - if custom_metadata.get("cluster") is not None: - custom_metadata["cluster"] = int(custom_metadata["cluster"]) + if custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__cluster") is not None: + custom_metadata[f"{OCTUE_METADATA_NAMESPACE}__cluster"] = int( + custom_metadata[f"{OCTUE_METADATA_NAMESPACE}__cluster"] + ) - if custom_metadata.get("sequence") is not None: - custom_metadata["sequence"] = int(custom_metadata["sequence"]) + if custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__sequence") is not None: + custom_metadata[f"{OCTUE_METADATA_NAMESPACE}__sequence"] = int( + custom_metadata[f"{OCTUE_METADATA_NAMESPACE}__sequence"] + ) self._cloud_metadata = cloud_metadata @@ -478,12 +487,13 @@ def open(self): """ return functools.partial(_DatafileContextManager, self) - def metadata(self): + def metadata(self, use_octue_namespace=True): """Get the datafile's metadata in a serialised form. + :param bool use_octue_namespace: if True, prefix metadata names with "octue__" :return dict: """ - return { + metadata = { "id": self.id, "timestamp": self.timestamp, "cluster": self.cluster, @@ -491,6 +501,11 @@ def metadata(self): "tags": self.tags.serialise(to_string=True), } + if not use_octue_namespace: + return metadata + + return {f"{OCTUE_METADATA_NAMESPACE}__{key}": value for key, value in metadata.items()} + class _DatafileContextManager: """A context manager for opening datafiles for reading and writing locally or from the cloud. Its usage is analogous From 02e1a4773fa69e5f150139cfbdcec0bc604222ba Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 12:52:49 +0100 Subject: [PATCH 064/103] TST: Test Datafile.metadata --- tests/resources/test_datafile.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index c08012cec..7a62a88c2 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -513,3 +513,16 @@ def test_from_datafile_as_context_manager(self): # Check that the cloud file has been updated. with re_downloaded_datafile.open() as f: self.assertEqual(f.read(), new_contents) + + def test_metadata(self): + """Test that the metadata method namespaces the metadata names when required.""" + datafile = self.create_valid_datafile() + + self.assertEqual( + datafile.metadata().keys(), + {"octue__id", "octue__timestamp", "octue__cluster", "octue__sequence", "octue__tags"}, + ) + + self.assertEqual( + datafile.metadata(use_octue_namespace=False).keys(), {"id", "timestamp", "cluster", "sequence", "tags"} + ) From fe3a8a0e22db8925864b16864715fd880d9d1fe6 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 12:57:43 +0100 Subject: [PATCH 065/103] OPS: Increase version number --- .../template-child-services/elevation_service/requirements.txt | 2 +- .../template-child-services/parent_service/requirements.txt | 2 +- .../template-child-services/wind_speed_service/requirements.txt | 2 +- octue/templates/template-python-fractal/requirements.txt | 2 +- octue/templates/template-using-manifests/requirements.txt | 2 +- setup.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/octue/templates/template-child-services/elevation_service/requirements.txt b/octue/templates/template-child-services/elevation_service/requirements.txt index 792c83cda..c6f8356cc 100644 --- a/octue/templates/template-child-services/elevation_service/requirements.txt +++ b/octue/templates/template-child-services/elevation_service/requirements.txt @@ -1 +1 @@ -octue==0.1.18 +octue==0.1.19 diff --git a/octue/templates/template-child-services/parent_service/requirements.txt b/octue/templates/template-child-services/parent_service/requirements.txt index 792c83cda..c6f8356cc 100644 --- a/octue/templates/template-child-services/parent_service/requirements.txt +++ b/octue/templates/template-child-services/parent_service/requirements.txt @@ -1 +1 @@ -octue==0.1.18 +octue==0.1.19 diff --git a/octue/templates/template-child-services/wind_speed_service/requirements.txt b/octue/templates/template-child-services/wind_speed_service/requirements.txt index 792c83cda..c6f8356cc 100644 --- a/octue/templates/template-child-services/wind_speed_service/requirements.txt +++ b/octue/templates/template-child-services/wind_speed_service/requirements.txt @@ -1 +1 @@ -octue==0.1.18 +octue==0.1.19 diff --git a/octue/templates/template-python-fractal/requirements.txt b/octue/templates/template-python-fractal/requirements.txt index 70df230eb..504ad0afa 100644 --- a/octue/templates/template-python-fractal/requirements.txt +++ b/octue/templates/template-python-fractal/requirements.txt @@ -1,4 +1,4 @@ -octue==0.1.18 +octue==0.1.19 # ----------- Some common libraries ----------------------------------------------------------------------------------- diff --git a/octue/templates/template-using-manifests/requirements.txt b/octue/templates/template-using-manifests/requirements.txt index 866486a84..2c599166a 100644 --- a/octue/templates/template-using-manifests/requirements.txt +++ b/octue/templates/template-using-manifests/requirements.txt @@ -1,4 +1,4 @@ -octue==0.1.18 +octue==0.1.19 # ----------- Some common libraries ----------------------------------------------------------------------------------- diff --git a/setup.py b/setup.py index 5388c710e..1cac8df8b 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( name="octue", - version="0.1.18", # Ensure all requirements files containing octue are updated, too (e.g. docs build). + version="0.1.19", # Ensure all requirements files containing octue are updated, too (e.g. docs build). py_modules=["cli"], install_requires=[ "click>=7.1.2", From 7ec75c2485cc05449a539133cf4e60f3a867594b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 15:34:18 +0100 Subject: [PATCH 066/103] IMP: Allow gs:// paths to be used directly with resources --- octue/cloud/storage/client.py | 91 ++++++++++++++++++++------------ octue/resources/datafile.py | 65 +++++++++++++++-------- octue/resources/dataset.py | 28 ++++++---- octue/resources/manifest.py | 32 +++++++---- tests/resources/test_datafile.py | 53 ++++++++++++------- tests/resources/test_dataset.py | 2 +- tests/resources/test_manifest.py | 6 +-- 7 files changed, 179 insertions(+), 98 deletions(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index d4ba9f087..32df9500b 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -5,6 +5,7 @@ from google_crc32c import Checksum from octue.cloud.credentials import GCPCredentialsManager +from octue.cloud.storage.path import split_bucket_name_from_gs_path logger = logging.getLogger(__name__) @@ -46,17 +47,20 @@ def create_bucket(self, name, location=None, allow_existing=False, timeout=_DEFA self.client.create_bucket(bucket_or_name=name, location=location, timeout=timeout) - def upload_file(self, local_path, bucket_name, path_in_bucket, metadata=None, timeout=_DEFAULT_TIMEOUT): + def upload_file( + self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT + ): """Upload a local file to a Google Cloud bucket at gs:///. :param str local_path: - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param dict metadata: :param float timeout: :return None: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) with open(local_path) as f: blob.crc32c = self._compute_crc32c_checksum(f.read()) @@ -65,69 +69,81 @@ def upload_file(self, local_path, bucket_name, path_in_bucket, metadata=None, ti self._update_metadata(blob, metadata) logger.info("Uploaded %r to Google Cloud at %r.", local_path, blob.public_url) - def upload_from_string(self, string, bucket_name, path_in_bucket, metadata=None, timeout=_DEFAULT_TIMEOUT): + def upload_from_string( + self, string, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT + ): """Upload serialised data in string form to a file in a Google Cloud bucket at gs:///. :param str string: - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param dict metadata: :param float timeout: :return None: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) blob.crc32c = self._compute_crc32c_checksum(string) blob.upload_from_string(data=string, timeout=timeout) self._update_metadata(blob, metadata) logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) - def update_metadata(self, bucket_name, path_in_bucket, metadata): + def update_metadata(self, metadata, gs_path=None, bucket_name=None, path_in_bucket=None): """Update the metadata for the given cloud file. - :param str bucket_name: - :param str path_in_bucket: :param dict metadata: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :return None: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) self._update_metadata(blob, metadata) - def download_to_file(self, bucket_name, path_in_bucket, local_path, timeout=_DEFAULT_TIMEOUT): + def download_to_file( + self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT + ): """Download a file to a file from a Google Cloud bucket at gs:///. - :param str bucket_name: - :param str path_in_bucket: :param str local_path: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param float timeout: :return None: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) blob.download_to_filename(local_path, timeout=timeout) logger.info("Downloaded %r from Google Cloud to %r.", blob.public_url, local_path) - def download_as_string(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT): + def download_as_string(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Download a file to a string from a Google Cloud bucket at gs:///. - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param float timeout: :return str: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) data = blob.download_as_bytes(timeout=timeout) logger.info("Downloaded %r from Google Cloud to as string.", blob.public_url) return data.decode() - def get_metadata(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT): + def get_metadata(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Get the metadata of the given file in the given bucket. - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param float timeout: :return dict: """ + if gs_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + bucket = self.client.get_bucket(bucket_or_name=bucket_name) blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) @@ -147,27 +163,32 @@ def get_metadata(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT): "path_in_bucket": path_in_bucket, } - def delete(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT): + def delete(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Delete the given file from the given bucket. - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :param float timeout: :return None: """ - blob = self._blob(bucket_name, path_in_bucket) + blob = self._blob(gs_path, bucket_name, path_in_bucket) blob.delete(timeout=timeout) logger.info("Deleted %r from Google Cloud.", blob.public_url) - def scandir(self, bucket_name, directory_path, filter=None, timeout=_DEFAULT_TIMEOUT): + def scandir(self, gs_path=None, bucket_name=None, directory_path=None, filter=None, timeout=_DEFAULT_TIMEOUT): """Yield the blobs belonging to the given "directory" in the given bucket. - :param str bucket_name: - :param str directory_path: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None directory_path: :param callable filter: :param float timeout: :yield google.cloud.storage.blob.Blob: """ + if gs_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + bucket = self.client.get_bucket(bucket_or_name=bucket_name) blobs = bucket.list_blobs(timeout=timeout) directory_path = self._strip_leading_slash(directory_path) @@ -185,13 +206,17 @@ def _strip_leading_slash(self, path): """ return path.lstrip("/") - def _blob(self, bucket_name, path_in_bucket): + def _blob(self, gs_path=None, bucket_name=None, path_in_bucket=None): """Instantiate a blob for the given bucket at the given path. Note that this is not synced up with Google Cloud. - :param str bucket_name: - :param str path_in_bucket: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_in_bucket: :return google.cloud.storage.blob.Blob: """ + if gs_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + bucket = self.client.get_bucket(bucket_or_name=bucket_name) return bucket.blob(blob_name=self._strip_leading_slash(path_in_bucket)) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 7d438a60f..2c636280f 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -160,8 +160,9 @@ def deserialise(cls, serialised_datafile, path_from=None): def from_cloud( cls, project_name, - bucket_name, - datafile_path, + gs_path=None, + bucket_name=None, + datafile_path=None, allow_overwrite=False, mode="r", update_cloud_metadata=True, @@ -175,8 +176,9 @@ def from_cloud( Note that a value provided for an attribute in kwargs will override any existing value for the attribute. :param str project_name: - :param str bucket_name: - :param str datafile_path: path to file represented by datafile + :param str|None gs_path: + :param str|None bucket_name: + :param str|None datafile_path: path to file represented by datafile :param bool allow_overwrite: if `True`, allow attributes of the datafile to be overwritten by values given in kwargs :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode @@ -185,8 +187,11 @@ def from_cloud( the datafile when the context is exited :return Datafile: """ - datafile = cls(path=storage.path.generate_gs_path(bucket_name, datafile_path)) - datafile.get_cloud_metadata(project_name, bucket_name, datafile_path) + if not gs_path: + gs_path = storage.path.generate_gs_path(bucket_name, datafile_path) + + datafile = cls(path=gs_path) + datafile.get_cloud_metadata(project_name, gs_path=gs_path) custom_metadata = datafile._cloud_metadata.get("custom_metadata", {}) if not allow_overwrite: @@ -198,7 +203,6 @@ def from_cloud( timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") datafile._set_id(kwargs.pop("id", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__id", ID_DEFAULT))) - datafile.path = storage.path.generate_gs_path(bucket_name, datafile_path) datafile.timestamp = timestamp datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) datafile.cluster = kwargs.pop( @@ -211,23 +215,27 @@ def from_cloud( datafile._open_attributes = {"mode": mode, "update_cloud_metadata": update_cloud_metadata, **kwargs} return datafile - def to_cloud(self, project_name=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True): + def to_cloud( + self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True + ): """Upload a datafile to Google Cloud Storage. :param str|None project_name: + :param str|None gs_path: :param str|None bucket_name: :param str|None path_in_bucket: :param bool update_cloud_metadata: :return str: gs:// path for datafile """ - project_name, bucket_name, path_in_bucket = self._get_cloud_location(project_name, bucket_name, path_in_bucket) - self.get_cloud_metadata(project_name, bucket_name, path_in_bucket) + project_name, bucket_name, path_in_bucket = self._get_cloud_location( + project_name, gs_path, bucket_name, path_in_bucket + ) - storage_client = GoogleCloudStorageClient(project_name=project_name) + self.get_cloud_metadata(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) # If the datafile's file has been changed locally, overwrite its cloud copy. if self._cloud_metadata.get("crc32c") != self.hash_value: - storage_client.upload_file( + GoogleCloudStorageClient(project_name=project_name).upload_file( local_path=self.get_local_path(), bucket_name=bucket_name, path_in_bucket=path_in_bucket, @@ -239,20 +247,26 @@ def to_cloud(self, project_name=None, bucket_name=None, path_in_bucket=None, upd local_metadata = self.metadata() if self._cloud_metadata.get("custom_metadata") != local_metadata: - self.update_cloud_metadata(project_name, bucket_name, path_in_bucket) + self.update_cloud_metadata(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) - return storage.path.generate_gs_path(bucket_name, path_in_bucket) + return gs_path or storage.path.generate_gs_path(bucket_name, path_in_bucket) - def get_cloud_metadata(self, project_name=None, bucket_name=None, path_in_bucket=None): + def get_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud metadata for the datafile, casting the types of the cluster and sequence fields to integer. :param str|None project_name: + :param str|None gs_path: :param str|None bucket_name: :param str|None path_in_bucket: :return dict: """ - project_name, bucket_name, path_in_bucket = self._get_cloud_location(project_name, bucket_name, path_in_bucket) - cloud_metadata = GoogleCloudStorageClient(project_name).get_metadata(bucket_name, path_in_bucket) + project_name, bucket_name, path_in_bucket = self._get_cloud_location( + project_name, gs_path, bucket_name, path_in_bucket + ) + + cloud_metadata = GoogleCloudStorageClient(project_name).get_metadata( + bucket_name=bucket_name, path_in_bucket=path_in_bucket + ) if cloud_metadata is None: return None @@ -271,20 +285,23 @@ def get_cloud_metadata(self, project_name=None, bucket_name=None, path_in_bucket self._cloud_metadata = cloud_metadata - def update_cloud_metadata(self, project_name=None, bucket_name=None, path_in_bucket=None): + def update_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): """Update the cloud metadata for the datafile. :param str|None project_name: + :param str|None gs_path: :param str|None bucket_name: :param str|None path_in_bucket: :return None: """ - project_name, bucket_name, path_in_bucket = self._get_cloud_location(project_name, bucket_name, path_in_bucket) + project_name, bucket_name, path_in_bucket = self._get_cloud_location( + project_name, gs_path, bucket_name, path_in_bucket + ) GoogleCloudStorageClient(project_name=project_name).update_metadata( + metadata=self.metadata(), bucket_name=bucket_name, path_in_bucket=path_in_bucket, - metadata=self.metadata(), ) self._store_cloud_location(project_name, bucket_name, path_in_bucket) @@ -372,7 +389,7 @@ def get_local_path(self): temporary_local_path = tempfile.NamedTemporaryFile(delete=False).name GoogleCloudStorageClient(project_name=self._cloud_metadata["project_name"]).download_to_file( - *storage.path.split_bucket_name_from_gs_path(self.absolute_path), local_path=temporary_local_path + local_path=temporary_local_path, gs_path=self.absolute_path ) TEMPORARY_LOCAL_FILE_CACHE[self.absolute_path] = temporary_local_path @@ -407,16 +424,20 @@ def _calculate_hash(self): return super()._calculate_hash(hash) - def _get_cloud_location(self, project_name=None, bucket_name=None, path_in_bucket=None): + def _get_cloud_location(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud location details for the bucket, allowing the keyword arguments to override any stored values. :param str|None project_name: + :param str|None gs_path: :param str|None bucket_name: :param str|None path_in_bucket: :raise octue.exceptions.CloudLocationNotSpecified: if an exact cloud location isn't provided and isn't available implicitly (i.e. the Datafile wasn't loaded from the cloud previously) :return (str, str, str): """ + if gs_path: + bucket_name, path_in_bucket = storage.path.split_bucket_name_from_gs_path(gs_path) + try: project_name = project_name or self._cloud_metadata["project_name"] bucket_name = bucket_name or self._cloud_metadata["bucket_name"] diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 406a4b87d..fc5d5f358 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -55,18 +55,20 @@ def __len__(self): return len(self.files) @classmethod - def from_cloud(cls, project_name, bucket_name, path_to_dataset_directory): + def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_dataset_directory=None): """Instantiate a Dataset from Google Cloud storage. :param str project_name: - :param str bucket_name: - :param str path_to_dataset_directory: path to dataset directory (directory containing dataset's files) + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_to_dataset_directory: path to dataset directory (directory containing dataset's files) :return Dataset: """ - storage_client = GoogleCloudStorageClient(project_name=project_name) + if gs_path: + bucket_name, path_to_dataset_directory = storage.path.split_bucket_name_from_gs_path(gs_path) serialised_dataset = json.loads( - storage_client.download_as_string( + GoogleCloudStorageClient(project_name=project_name).download_as_string( bucket_name=bucket_name, path_in_bucket=storage.path.join(path_to_dataset_directory, definitions.DATASET_FILENAME), ) @@ -89,19 +91,25 @@ def from_cloud(cls, project_name, bucket_name, path_to_dataset_directory): files=datafiles, ) - def to_cloud(self, project_name, bucket_name, output_directory): + def to_cloud(self, project_name, gs_path=None, bucket_name=None, output_directory=None): """Upload a dataset to a cloud location. :param str project_name: - :param str bucket_name: - :param str output_directory: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None output_directory: :return str: gs:// path for dataset """ + if gs_path: + bucket_name, output_directory = storage.path.split_bucket_name_from_gs_path(gs_path) + files = [] for datafile in self.files: datafile_path = datafile.to_cloud( - project_name, bucket_name, path_in_bucket=storage.path.join(output_directory, self.name, datafile.name) + project_name, + bucket_name=bucket_name, + path_in_bucket=storage.path.join(output_directory, self.name, datafile.name), ) files.append(datafile_path) @@ -116,7 +124,7 @@ def to_cloud(self, project_name, bucket_name, output_directory): path_in_bucket=storage.path.join(output_directory, self.name, definitions.DATASET_FILENAME), ) - return storage.path.generate_gs_path(bucket_name, output_directory, self.name) + return gs_path or storage.path.generate_gs_path(bucket_name, output_directory, self.name) @property def name(self): diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index d5224fa8e..d389cb1fd 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -45,18 +45,22 @@ def __init__(self, id=None, logger=None, path=None, datasets=None, keys=None, ** vars(self).update(**kwargs) @classmethod - def from_cloud(cls, project_name, bucket_name, path_to_manifest_file): + def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None): """Instantiate a Manifest from Google Cloud storage. :param str project_name: - :param str bucket_name: - :param str path_to_manifest_file: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_to_manifest_file: :return Dataset: """ - storage_client = GoogleCloudStorageClient(project_name=project_name) + if gs_path: + bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(gs_path) serialised_manifest = json.loads( - storage_client.download_as_string(bucket_name=bucket_name, path_in_bucket=path_to_manifest_file) + GoogleCloudStorageClient(project_name=project_name).download_as_string( + bucket_name=bucket_name, path_in_bucket=path_to_manifest_file + ) ) datasets = [] @@ -77,23 +81,31 @@ def from_cloud(cls, project_name, bucket_name, path_to_manifest_file): keys=serialised_manifest["keys"], ) - def to_cloud(self, project_name, bucket_name, path_to_manifest_file, store_datasets=True): + def to_cloud(self, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None, store_datasets=True): """Upload a manifest to a cloud location, optionally uploading its datasets into the same directory. :param str project_name: - :param str bucket_name: - :param str path_to_manifest_file: + :param str|None gs_path: + :param str|None bucket_name: + :param str|None path_to_manifest_file: :param bool store_datasets: if True, upload datasets to same directory as manifest file :return str: gs:// path for manifest file """ + if gs_path: + bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(gs_path) + datasets = [] output_directory = storage.path.dirname(path_to_manifest_file) for dataset in self.datasets: if store_datasets: - dataset_path = dataset.to_cloud(project_name, bucket_name, output_directory=output_directory) + dataset_path = dataset.to_cloud( + project_name, bucket_name=bucket_name, output_directory=output_directory + ) + datasets.append(dataset_path) + else: datasets.append(dataset.absolute_path) @@ -107,7 +119,7 @@ def to_cloud(self, project_name, bucket_name, path_to_manifest_file, store_datas path_in_bucket=path_to_manifest_file, ) - return storage.path.generate_gs_path(bucket_name, path_to_manifest_file) + return gs_path or storage.path.generate_gs_path(bucket_name, path_to_manifest_file) @property def all_datasets_are_in_cloud(self): diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 7a62a88c2..fa3e4b0f4 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -203,10 +203,10 @@ def test_from_cloud_with_datafile(self): sequence=1, tags={"blah:shah:nah", "blib", "glib"}, ) + gs_path = f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}" + downloaded_datafile = Datafile.from_cloud(project_name, gs_path=gs_path) - downloaded_datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) - - self.assertEqual(downloaded_datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}") + self.assertEqual(downloaded_datafile.path, gs_path) self.assertEqual(downloaded_datafile.id, datafile.id) self.assertEqual(downloaded_datafile.timestamp, datafile.timestamp) self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) @@ -257,9 +257,11 @@ def test_to_cloud_updates_cloud_metadata(self): datafile, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud(cluster=0) datafile.cluster = 3 - datafile.to_cloud(project_name, bucket_name, path_in_bucket) + datafile.to_cloud(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) - self.assertEqual(Datafile.from_cloud(project_name, bucket_name, path_in_bucket).cluster, 3) + self.assertEqual( + Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket).cluster, 3 + ) def test_to_cloud_does_not_update_cloud_metadata_if_update_cloud_metadata_is_false(self): """Test that calling Datafile.to_cloud with `update_cloud_metadata=False` doesn't update the cloud metadata.""" @@ -267,16 +269,20 @@ def test_to_cloud_does_not_update_cloud_metadata_if_update_cloud_metadata_is_fal datafile.cluster = 3 with patch("octue.resources.datafile.Datafile.update_cloud_metadata") as mock: - datafile.to_cloud(project_name, bucket_name, path_in_bucket, update_cloud_metadata=False) + datafile.to_cloud( + project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket, update_cloud_metadata=False + ) self.assertFalse(mock.called) - self.assertEqual(Datafile.from_cloud(project_name, bucket_name, path_in_bucket).cluster, 0) + self.assertEqual( + Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket).cluster, 0 + ) def test_to_cloud_does_not_update_metadata_if_no_metadata_change_has_been_made(self): """Test that Datafile.to_cloud does not try to update cloud metadata if no metadata change has been made.""" _, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud(cluster=0) - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) with patch("octue.resources.datafile.Datafile.update_cloud_metadata") as mock: datafile.to_cloud() @@ -296,7 +302,7 @@ def test_to_cloud_works_with_implicit_cloud_location_if_cloud_location_previousl provided. """ _, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) datafile.to_cloud() def test_to_cloud_does_not_try_to_update_file_if_no_change_has_been_made_locally(self): @@ -312,9 +318,11 @@ def test_update_cloud_metadata(self): _, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud() new_datafile = Datafile(path="glib.txt", cluster=32) - new_datafile.update_cloud_metadata(project_name, bucket_name, path_in_bucket) + new_datafile.update_cloud_metadata(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) - self.assertEqual(Datafile.from_cloud(project_name, bucket_name, path_in_bucket).cluster, 32) + self.assertEqual( + Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket).cluster, 32 + ) def test_update_cloud_metadata_works_with_implicit_cloud_location_if_cloud_location_previously_provided(self): """Test that datafile.update_metadata works with an implicit cloud location if the cloud location has been @@ -322,11 +330,13 @@ def test_update_cloud_metadata_works_with_implicit_cloud_location_if_cloud_locat """ _, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) datafile.cluster = 32 datafile.update_cloud_metadata() - self.assertEqual(Datafile.from_cloud(project_name, bucket_name, path_in_bucket).cluster, 32) + self.assertEqual( + Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket).cluster, 32 + ) def test_update_cloud_metadata_raises_error_if_no_cloud_location_provided_and_datafile_not_from_cloud(self): """Test that trying to update a cloud datafile's metadata with no cloud location provided when the datafile was @@ -340,7 +350,7 @@ def test_update_cloud_metadata_raises_error_if_no_cloud_location_provided_and_da def test_get_local_path(self): """Test that a file in the cloud can be temporarily downloaded and its local path returned.""" _, project_name, bucket_name, path_in_bucket, contents = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) with open(datafile.get_local_path()) as f: self.assertEqual(f.read(), contents) @@ -348,7 +358,7 @@ def test_get_local_path(self): def test_get_local_path_with_cached_file_avoids_downloading_again(self): """Test that attempting to download a cached file avoids downloading it again.""" _, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) # Download for first time. datafile.get_local_path() @@ -388,7 +398,7 @@ def test_open_with_writing_local_file(self): def test_open_with_reading_cloud_file(self): """Test that a cloud datafile can be opened for reading.""" _, project_name, bucket_name, path_in_bucket, contents = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) with datafile.open() as f: self.assertEqual(f.read(), contents) @@ -396,7 +406,7 @@ def test_open_with_reading_cloud_file(self): def test_open_with_writing_to_cloud_file(self): """Test that a cloud datafile can be opened for writing and that both the remote and local copies are updated.""" _, project_name, bucket_name, path_in_bucket, original_contents = self.create_datafile_in_cloud() - datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + datafile = Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket) new_file_contents = "nanana" @@ -499,12 +509,17 @@ def test_from_datafile_as_context_manager(self): new_contents = "Here is the new content." self.assertNotEqual(original_content, new_contents) - with Datafile.from_cloud(project_name, bucket_name, path_in_bucket, mode="w") as (datafile, f): + with Datafile.from_cloud(project_name, bucket_name=bucket_name, datafile_path=path_in_bucket, mode="w") as ( + datafile, + f, + ): datafile.add_tags("blue") f.write(new_contents) # Check that the cloud metadata has been updated. - re_downloaded_datafile = Datafile.from_cloud(project_name, bucket_name, path_in_bucket) + re_downloaded_datafile = Datafile.from_cloud( + project_name, bucket_name=bucket_name, datafile_path=path_in_bucket + ) self.assertTrue("blue" in re_downloaded_datafile.tags) # The file cache must be cleared so the modified cloud file is downloaded. diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 96660f1df..43de56ad9 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -363,7 +363,7 @@ def test_to_cloud(self): } ) - dataset.to_cloud(project_name, TEST_BUCKET_NAME, output_directory) + dataset.to_cloud(project_name, bucket_name=TEST_BUCKET_NAME, output_directory=output_directory) storage_client = GoogleCloudStorageClient(project_name) diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index 3ae3c6acd..0ead268fc 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -79,7 +79,7 @@ def test_to_cloud(self): manifest.to_cloud( self.TEST_PROJECT_NAME, - TEST_BUCKET_NAME, + bucket_name=TEST_BUCKET_NAME, path_to_manifest_file=storage.path.join("blah", "manifest.json"), ) @@ -118,7 +118,7 @@ def test_to_cloud_without_storing_datasets(self): manifest.to_cloud( self.TEST_PROJECT_NAME, - TEST_BUCKET_NAME, + bucket_name=TEST_BUCKET_NAME, path_to_manifest_file=storage.path.join("my-manifests", "manifest.json"), store_datasets=False, ) @@ -156,7 +156,7 @@ def test_from_cloud(self): manifest = Manifest(datasets=[dataset], keys={"my-dataset": 0}) manifest.to_cloud( self.TEST_PROJECT_NAME, - TEST_BUCKET_NAME, + bucket_name=TEST_BUCKET_NAME, path_to_manifest_file=storage.path.join("my-directory", "manifest.json"), ) From a12f69fbee6aecccccfbe663f1397ba60d76a8f2 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 15:41:25 +0100 Subject: [PATCH 067/103] DOC: Add info on gs_path to docstrings --- octue/cloud/storage/client.py | 24 ++++++++++++++++-------- octue/resources/datafile.py | 10 ++++++++-- octue/resources/dataset.py | 6 ++++-- octue/resources/manifest.py | 6 ++++-- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index 32df9500b..26f04ce9a 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -50,7 +50,8 @@ def create_bucket(self, name, location=None, allow_existing=False, timeout=_DEFA def upload_file( self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT ): - """Upload a local file to a Google Cloud bucket at gs:///. + """Upload a local file to a Google Cloud bucket at gs:///. Either (`bucket_name` + and `path_in_bucket`) or `gs_path` must be provided. :param str local_path: :param str|None gs_path: @@ -73,7 +74,7 @@ def upload_from_string( self, string, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT ): """Upload serialised data in string form to a file in a Google Cloud bucket at - gs:///. + gs:///. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str string: :param str|None gs_path: @@ -91,7 +92,8 @@ def upload_from_string( logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) def update_metadata(self, metadata, gs_path=None, bucket_name=None, path_in_bucket=None): - """Update the metadata for the given cloud file. + """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must + be provided. :param dict metadata: :param str|None gs_path: @@ -105,7 +107,8 @@ def update_metadata(self, metadata, gs_path=None, bucket_name=None, path_in_buck def download_to_file( self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT ): - """Download a file to a file from a Google Cloud bucket at gs:///. + """Download a file to a file from a Google Cloud bucket at gs:///. Either + (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str local_path: :param str|None gs_path: @@ -119,7 +122,8 @@ def download_to_file( logger.info("Downloaded %r from Google Cloud to %r.", blob.public_url, local_path) def download_as_string(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Download a file to a string from a Google Cloud bucket at gs:///. + """Download a file to a string from a Google Cloud bucket at gs:///. Either + (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str|None gs_path: :param str|None bucket_name: @@ -133,7 +137,8 @@ def download_as_string(self, gs_path=None, bucket_name=None, path_in_bucket=None return data.decode() def get_metadata(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Get the metadata of the given file in the given bucket. + """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or + `gs_path` must be provided. :param str|None gs_path: :param str|None bucket_name: @@ -164,7 +169,8 @@ def get_metadata(self, gs_path=None, bucket_name=None, path_in_bucket=None, time } def delete(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Delete the given file from the given bucket. + """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must + be provided. :param str|None gs_path: :param str|None bucket_name: @@ -177,7 +183,8 @@ def delete(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_D logger.info("Deleted %r from Google Cloud.", blob.public_url) def scandir(self, gs_path=None, bucket_name=None, directory_path=None, filter=None, timeout=_DEFAULT_TIMEOUT): - """Yield the blobs belonging to the given "directory" in the given bucket. + """Yield the blobs belonging to the given "directory" in the given bucket. Either (`bucket_name` and + `path_in_bucket`) or `gs_path` must be provided. :param str|None gs_path: :param str|None bucket_name: @@ -208,6 +215,7 @@ def _strip_leading_slash(self, path): def _blob(self, gs_path=None, bucket_name=None, path_in_bucket=None): """Instantiate a blob for the given bucket at the given path. Note that this is not synced up with Google Cloud. + Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str|None gs_path: :param str|None bucket_name: diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 2c636280f..adee22a01 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -175,6 +175,8 @@ def from_cloud( Note that a value provided for an attribute in kwargs will override any existing value for the attribute. + Either (`bucket_name` and `datafile_path`) or `gs_path` must be provided. + :param str project_name: :param str|None gs_path: :param str|None bucket_name: @@ -218,7 +220,8 @@ def from_cloud( def to_cloud( self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True ): - """Upload a datafile to Google Cloud Storage. + """Upload a datafile to Google Cloud Storage. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be + provided. :param str|None project_name: :param str|None gs_path: @@ -253,6 +256,7 @@ def to_cloud( def get_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud metadata for the datafile, casting the types of the cluster and sequence fields to integer. + Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str|None project_name: :param str|None gs_path: @@ -286,7 +290,8 @@ def get_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, self._cloud_metadata = cloud_metadata def update_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): - """Update the cloud metadata for the datafile. + """Update the cloud metadata for the datafile. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be + provided. :param str|None project_name: :param str|None gs_path: @@ -426,6 +431,7 @@ def _calculate_hash(self): def _get_cloud_location(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud location details for the bucket, allowing the keyword arguments to override any stored values. + Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. :param str|None project_name: :param str|None gs_path: diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index fc5d5f358..aa5e140bb 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -56,7 +56,8 @@ def __len__(self): @classmethod def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_dataset_directory=None): - """Instantiate a Dataset from Google Cloud storage. + """Instantiate a Dataset from Google Cloud storage. Either (`bucket_name` and `path_to_dataset_directory`) or + `gs_path` must be provided. :param str project_name: :param str|None gs_path: @@ -92,7 +93,8 @@ def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_datase ) def to_cloud(self, project_name, gs_path=None, bucket_name=None, output_directory=None): - """Upload a dataset to a cloud location. + """Upload a dataset to a cloud location. Either (`bucket_name` and `output_directory`) or `gs_path` must be + provided. :param str project_name: :param str|None gs_path: diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index d389cb1fd..051c7b4c9 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -46,7 +46,8 @@ def __init__(self, id=None, logger=None, path=None, datasets=None, keys=None, ** @classmethod def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None): - """Instantiate a Manifest from Google Cloud storage. + """Instantiate a Manifest from Google Cloud storage. Either (`bucket_name` and `path_to_manifest_file`) or + `gs_path` must be provided. :param str project_name: :param str|None gs_path: @@ -82,7 +83,8 @@ def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_manife ) def to_cloud(self, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None, store_datasets=True): - """Upload a manifest to a cloud location, optionally uploading its datasets into the same directory. + """Upload a manifest to a cloud location, optionally uploading its datasets into the same directory. Either + (`bucket_name` and `path_to_manifest_file`) or `gs_path` must be provided. :param str project_name: :param str|None gs_path: From d07b80985d4a24834bc3193692de69a039fcb319 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 15:55:51 +0100 Subject: [PATCH 068/103] FIX: Use correct variable name --- octue/cloud/storage/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index 26f04ce9a..7a6e93eb9 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -194,7 +194,7 @@ def scandir(self, gs_path=None, bucket_name=None, directory_path=None, filter=No :yield google.cloud.storage.blob.Blob: """ if gs_path: - bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + bucket_name, directory_path = split_bucket_name_from_gs_path(gs_path) bucket = self.client.get_bucket(bucket_or_name=bucket_name) blobs = bucket.list_blobs(timeout=timeout) From b7c55f9e1bac638f3fb424fad08a91710e4373b3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 16:27:28 +0100 Subject: [PATCH 069/103] TST: Test cloud integrations with gs_path --- tests/cloud/storage/test_client.py | 20 ++++++ tests/resources/test_dataset.py | 103 +++++++++++++++++------------ tests/resources/test_manifest.py | 64 ++++++++++-------- 3 files changed, 117 insertions(+), 70 deletions(-) diff --git a/tests/cloud/storage/test_client.py b/tests/cloud/storage/test_client.py index 3e5ef3308..aa3b97158 100644 --- a/tests/cloud/storage/test_client.py +++ b/tests/cloud/storage/test_client.py @@ -170,6 +170,18 @@ def test_scandir(self): self.assertEqual(len(contents), 1) self.assertEqual(contents[0].name, storage.path.join(directory_path, self.FILENAME)) + def test_scandir_with_gs_path(self): + """Test that Google Cloud storage "directories"' contents can be listed when a GS path is used.""" + directory_path = storage.path.join("my", "path") + path_in_bucket = storage.path.join(directory_path, self.FILENAME) + gs_path = f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}" + + self.storage_client.upload_from_string(string=json.dumps({"height": 32}), gs_path=gs_path) + contents = list(self.storage_client.scandir(gs_path)) + + self.assertEqual(len(contents), 1) + self.assertEqual(contents[0].name, storage.path.join(directory_path, self.FILENAME)) + def test_scandir_with_empty_directory(self): """Test that an empty directory shows as such.""" directory_path = storage.path.join("another", "path") @@ -190,3 +202,11 @@ def test_get_metadata(self): ) self.assertTrue(len(metadata) > 0) + + def test_get_metadata_with_gs_path(self): + """Test that file metadata can be retrieved when a GS path is used.""" + gs_path = f"gs://{TEST_BUCKET_NAME}/{self.FILENAME}" + self.storage_client.upload_from_string(string=json.dumps({"height": 32}), gs_path=gs_path) + + metadata = self.storage_client.get_metadata(gs_path) + self.assertTrue(len(metadata) > 0) diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 43de56ad9..89164f819 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -300,9 +300,9 @@ def test_is_in_cloud(self): self.assertTrue(Dataset(files=files).all_files_are_in_cloud) def test_from_cloud(self): - """Test that a Dataset in cloud storage can be accessed.""" - project_name = "test-project" - + """Test that a Dataset in cloud storage can be accessed via (`bucket_name`, `output_directory`) and via + `gs_path`. + """ with tempfile.TemporaryDirectory() as temporary_directory: file_0_path = os.path.join(temporary_directory, "file_0.txt") file_1_path = os.path.join(temporary_directory, "file_1.txt") @@ -321,31 +321,37 @@ def test_from_cloud(self): }, ) + project_name = "test-project" dataset.to_cloud(project_name=project_name, bucket_name=TEST_BUCKET_NAME, output_directory="a_directory") - persisted_dataset = Dataset.from_cloud( - project_name=project_name, - bucket_name=TEST_BUCKET_NAME, - path_to_dataset_directory=storage.path.join("a_directory", dataset.name), - ) + bucket_name = TEST_BUCKET_NAME + path_to_dataset_directory = storage.path.join("a_directory", dataset.name) + gs_path = f"gs://{bucket_name}/{path_to_dataset_directory}" + + for location_parameters in ( + {"bucket_name": bucket_name, "path_to_dataset_directory": path_to_dataset_directory, "gs_path": None}, + {"bucket_name": None, "path_to_dataset_directory": None, "gs_path": gs_path}, + ): - self.assertEqual(persisted_dataset.path, f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}") - self.assertEqual(persisted_dataset.id, dataset.id) - self.assertEqual(persisted_dataset.name, dataset.name) - self.assertEqual(persisted_dataset.hash_value, dataset.hash_value) - self.assertEqual(persisted_dataset.tags, dataset.tags) - self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files}) + persisted_dataset = Dataset.from_cloud( + project_name=project_name, + **location_parameters, + ) + + self.assertEqual(persisted_dataset.path, f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}") + self.assertEqual(persisted_dataset.id, dataset.id) + self.assertEqual(persisted_dataset.name, dataset.name) + self.assertEqual(persisted_dataset.hash_value, dataset.hash_value) + self.assertEqual(persisted_dataset.tags, dataset.tags) + self.assertEqual({file.name for file in persisted_dataset.files}, {file.name for file in dataset.files}) - for file in persisted_dataset: - self.assertEqual(file.path, f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}/{file.name}") + for file in persisted_dataset: + self.assertEqual(file.path, f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}/{file.name}") def test_to_cloud(self): - """Test that a dataset can be uploaded to the cloud, including all its files and a serialised JSON file of the - Datafile instance. + """Test that a dataset can be uploaded to the cloud via (`bucket_name`, `output_directory`) and via `gs_path`, + including all its files and a serialised JSON file of the Datafile instance. """ - project_name = "test-project" - output_directory = "my_datasets" - with tempfile.TemporaryDirectory() as temporary_directory: file_0_path = os.path.join(temporary_directory, "file_0.txt") file_1_path = os.path.join(temporary_directory, "file_1.txt") @@ -363,34 +369,43 @@ def test_to_cloud(self): } ) - dataset.to_cloud(project_name, bucket_name=TEST_BUCKET_NAME, output_directory=output_directory) + project_name = "test-project" + bucket_name = TEST_BUCKET_NAME + output_directory = "my_datasets" + gs_path = storage.path.generate_gs_path(bucket_name, output_directory) - storage_client = GoogleCloudStorageClient(project_name) + for location_parameters in ( + {"bucket_name": bucket_name, "output_directory": output_directory, "gs_path": None}, + {"bucket_name": None, "output_directory": None, "gs_path": gs_path}, + ): + dataset.to_cloud(project_name, **location_parameters) - persisted_file_0 = storage_client.download_as_string( - bucket_name=TEST_BUCKET_NAME, - path_in_bucket=storage.path.join(output_directory, dataset.name, "file_0.txt"), - ) + storage_client = GoogleCloudStorageClient(project_name) - self.assertEqual(persisted_file_0, "[1, 2, 3]") + persisted_file_0 = storage_client.download_as_string( + bucket_name=TEST_BUCKET_NAME, + path_in_bucket=storage.path.join(output_directory, dataset.name, "file_0.txt"), + ) - persisted_file_1 = storage_client.download_as_string( - bucket_name=TEST_BUCKET_NAME, - path_in_bucket=storage.path.join(output_directory, dataset.name, "file_1.txt"), - ) - self.assertEqual(persisted_file_1, "[4, 5, 6]") + self.assertEqual(persisted_file_0, "[1, 2, 3]") - persisted_dataset = json.loads( - storage_client.download_as_string( + persisted_file_1 = storage_client.download_as_string( bucket_name=TEST_BUCKET_NAME, - path_in_bucket=storage.path.join(output_directory, dataset.name, "dataset.json"), + path_in_bucket=storage.path.join(output_directory, dataset.name, "file_1.txt"), ) - ) + self.assertEqual(persisted_file_1, "[4, 5, 6]") - self.assertEqual( - persisted_dataset["files"], - [ - "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_0.txt", - "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_1.txt", - ], - ) + persisted_dataset = json.loads( + storage_client.download_as_string( + bucket_name=TEST_BUCKET_NAME, + path_in_bucket=storage.path.join(output_directory, dataset.name, "dataset.json"), + ) + ) + + self.assertEqual( + persisted_dataset["files"], + [ + "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_0.txt", + "gs://octue-test-bucket/my_datasets/octue-sdk-python/file_1.txt", + ], + ) diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index 0ead268fc..79cd00e27 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -56,7 +56,9 @@ def test_deserialise(self): self.assertEqual(original_dataset.absolute_path, deserialised_dataset.absolute_path) def test_to_cloud(self): - """Test that a manifest can be uploaded to the cloud as a serialised JSON file of the Manifest instance. """ + """Test that a manifest can be uploaded to the cloud as a serialised JSON file of the Manifest instance via + (`bucket_name`, `output_directory`) and via `gs_path`. + """ with tempfile.TemporaryDirectory() as temporary_directory: file_0_path = os.path.join(temporary_directory, "file_0.txt") file_1_path = os.path.join(temporary_directory, "file_1.txt") @@ -77,11 +79,15 @@ def test_to_cloud(self): manifest = Manifest(datasets=[dataset], keys={"my-dataset": 0}) - manifest.to_cloud( - self.TEST_PROJECT_NAME, - bucket_name=TEST_BUCKET_NAME, - path_to_manifest_file=storage.path.join("blah", "manifest.json"), - ) + bucket_name = TEST_BUCKET_NAME + path_to_manifest_file = storage.path.join("blah", "manifest.json") + gs_path = storage.path.generate_gs_path(bucket_name, path_to_manifest_file) + + for location_parameters in ( + {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "gs_path": None}, + {"bucket_name": None, "path_to_manifest_file": None, "gs_path": gs_path}, + ): + manifest.to_cloud(self.TEST_PROJECT_NAME, **location_parameters) persisted_manifest = json.loads( GoogleCloudStorageClient(self.TEST_PROJECT_NAME).download_as_string( @@ -134,7 +140,9 @@ def test_to_cloud_without_storing_datasets(self): self.assertEqual(persisted_manifest["keys"], {"my-dataset": 0}) def test_from_cloud(self): - """Test that a Manifest can be instantiated from the cloud.""" + """Test that a Manifest can be instantiated from the cloud via (`bucket_name`, `output_directory`) and via + `gs_path`. + """ with tempfile.TemporaryDirectory() as temporary_directory: file_0_path = os.path.join(temporary_directory, "file_0.txt") file_1_path = os.path.join(temporary_directory, "file_1.txt") @@ -160,22 +168,26 @@ def test_from_cloud(self): path_to_manifest_file=storage.path.join("my-directory", "manifest.json"), ) - persisted_manifest = Manifest.from_cloud( - project_name=self.TEST_PROJECT_NAME, - bucket_name=TEST_BUCKET_NAME, - path_to_manifest_file=storage.path.join("my-directory", "manifest.json"), - ) - - self.assertEqual(persisted_manifest.path, f"gs://{TEST_BUCKET_NAME}/my-directory/manifest.json") - self.assertEqual(persisted_manifest.id, manifest.id) - self.assertEqual(persisted_manifest.hash_value, manifest.hash_value) - self.assertEqual(persisted_manifest.keys, manifest.keys) - self.assertEqual( - {dataset.name for dataset in persisted_manifest.datasets}, - {dataset.name for dataset in manifest.datasets}, - ) - - for dataset in persisted_manifest.datasets: - self.assertEqual(dataset.path, f"gs://{TEST_BUCKET_NAME}/my-directory/{dataset.name}") - self.assertTrue(len(dataset.files), 2) - self.assertTrue(all(isinstance(file, Datafile) for file in dataset.files)) + bucket_name = TEST_BUCKET_NAME + path_to_manifest_file = storage.path.join("my-directory", "manifest.json") + gs_path = storage.path.generate_gs_path(bucket_name, path_to_manifest_file) + + for location_parameters in ( + {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "gs_path": None}, + {"bucket_name": None, "path_to_manifest_file": None, "gs_path": gs_path}, + ): + persisted_manifest = Manifest.from_cloud(project_name=self.TEST_PROJECT_NAME, **location_parameters) + + self.assertEqual(persisted_manifest.path, f"gs://{TEST_BUCKET_NAME}/my-directory/manifest.json") + self.assertEqual(persisted_manifest.id, manifest.id) + self.assertEqual(persisted_manifest.hash_value, manifest.hash_value) + self.assertEqual(persisted_manifest.keys, manifest.keys) + self.assertEqual( + {dataset.name for dataset in persisted_manifest.datasets}, + {dataset.name for dataset in manifest.datasets}, + ) + + for dataset in persisted_manifest.datasets: + self.assertEqual(dataset.path, f"gs://{TEST_BUCKET_NAME}/my-directory/{dataset.name}") + self.assertTrue(len(dataset.files), 2) + self.assertTrue(all(isinstance(file, Datafile) for file in dataset.files)) From a9b81ff2af12dbffa0f7da434855e92adda3a73c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 16:28:03 +0100 Subject: [PATCH 070/103] IMP: Add datetime encoding to OctueJSONEncoder --- octue/mixins/serialisable.py | 6 +++++- octue/utils/encoders.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/octue/mixins/serialisable.py b/octue/mixins/serialisable.py index 304138ab0..cac0d4db2 100644 --- a/octue/mixins/serialisable.py +++ b/octue/mixins/serialisable.py @@ -78,8 +78,12 @@ def __init__(self): self_as_primitive = {} for name in names_of_attributes_to_serialise: attribute = getattr(self, name, None) + # Serialise sets as sorted list (JSON doesn't support sets). - self_as_primitive[name] = sorted(attribute) if isinstance(attribute, set) else attribute + if isinstance(attribute, set): + self_as_primitive[name] = sorted(attribute) + else: + self_as_primitive[name] = attribute # TODO this conversion backward-and-forward is very inefficient but allows us to use the same encoder for # converting the object to a dict as to strings, which ensures that nested attributes are also cast to diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index 416ed7701..94b39d19d 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -1,3 +1,5 @@ +import datetime + from twined.utils import TwinedEncoder @@ -10,5 +12,8 @@ def default(self, obj): if hasattr(obj, "serialise"): return obj.serialise() + if isinstance(obj, datetime.datetime): + return str(obj) + # Otherwise let the base class default method raise the TypeError return TwinedEncoder.default(self, obj) From bfccca77791ddc3f6539a1c8550b1fded9a0919c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 20:25:36 +0100 Subject: [PATCH 071/103] IMP: Support non-English characters in case-insensitive filtering --- octue/mixins/filterable.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index c4c1a28b9..2c0a0b964 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -28,8 +28,8 @@ } ICONTAINS_FILTER_ACTIONS = { - "icontains": lambda item, filter_value: filter_value.lower() in item.lower(), - "not_icontains": lambda item, filter_value: filter_value.lower() not in item.lower(), + "icontains": lambda item, filter_value: filter_value.casefold() in item.casefold(), + "not_icontains": lambda item, filter_value: filter_value.casefold() not in item.casefold(), } @@ -37,8 +37,8 @@ TYPE_FILTERS = { "bool": IS_FILTER_ACTIONS, "str": { - "iequals": lambda item, filter_value: filter_value.lower() == item.lower(), - "not_iequals": lambda item, filter_value: filter_value.lower() != item.lower(), + "iequals": lambda item, filter_value: filter_value.casefold() == item.casefold(), + "not_iequals": lambda item, filter_value: filter_value.casefold() != item.casefold(), "starts_with": lambda item, filter_value: item.startswith(filter_value), "not_starts_with": lambda item, filter_value: not item.startswith(filter_value), "ends_with": lambda item, filter_value: item.endswith(filter_value), From 05e6d5d801ec15837cd44a277815c56b77a771c1 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 20:37:31 +0100 Subject: [PATCH 072/103] REF: Base filter containers on new FilterContainer abstract class --- octue/resources/filter_containers.py | 53 ++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 28c2148b4..7c2ec1c2e 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -1,3 +1,4 @@ +from abc import ABC from collections import UserDict from octue import exceptions @@ -5,12 +6,37 @@ from octue.utils.objects import get_nested_attribute +class FilterContainer(ABC): + def filter(self, ignore_items_without_attribute=True, **kwargs): + """Return a new instance of the container containing only the `Filterable`s to which the given filter criteria + are `True`. + + :param bool ignore_items_without_attribute: + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the + values to filter for + :return octue.resources.filter_containers.FilterContainer: + """ + pass + + def order_by(self, attribute_name, reverse=False): + """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new + `FilterList` regardless of the type of filter container begun with. + + :param str attribute_name: + :param bool reverse: + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the + container's members + :return FilterList: + """ + pass + + def _filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables to which the given filter criteria apply. :param bool ignore_items_without_attribute: - :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the value - to filter for + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the + values to filter for :return octue.resources.filter_containers.FilterSet: """ if any(not isinstance(item, Filterable) for item in self): @@ -35,8 +61,14 @@ def _filter(self, ignore_items_without_attribute=True, **kwargs): def _order_by(self, attribute_name, reverse=False): - """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a - FilterSet. + """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new + `FilterList` regardless of the type of filter container begun with. + + :param str attribute_name: + :param bool reverse: + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the + container's members + :return FilterList: """ try: return FilterList(sorted(self, key=lambda item: getattr(item, attribute_name), reverse=reverse)) @@ -46,24 +78,24 @@ def _order_by(self, attribute_name, reverse=False): ) -class FilterSet(set): +class FilterSet(FilterContainer, set): filter = _filter order_by = _order_by -class FilterList(list): +class FilterList(FilterContainer, list): filter = _filter order_by = _order_by -class FilterDict(UserDict): +class FilterDict(FilterContainer, UserDict): def filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables for which the given filter criteria apply are satisfied. :param bool ignore_items_without_attribute: - :param {str: any} kwargs: a single keyword argument whose key is the name of the filter and whose value is the - value to filter for + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the + values to filter for :return FilterDict: """ if any(not isinstance(item, Filterable) for item in self.values()): @@ -87,8 +119,7 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): return self.filter(raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) def order_by(self, attribute_name, reverse=False): - """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList (not a - FilterSet. + """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList. :param str attribute_name: a dot-separated (optionally nested) attribute name e.g. "a", "a.b", "a.b.c" :param bool reverse: From 044649832d37fd11e22d0e4a4bda02f522fd2f1a Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 20:51:35 +0100 Subject: [PATCH 073/103] IMP: Return items when ordering FilterDict rather than just values --- octue/resources/filter_containers.py | 2 +- tests/resources/test_filter_containers.py | 32 ++++++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 7c2ec1c2e..cc767d264 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -129,7 +129,7 @@ def order_by(self, attribute_name, reverse=False): """ try: return FilterList( - sorted(self.values(), key=lambda item: get_nested_attribute(item, attribute_name), reverse=reverse) + sorted(self.items(), key=lambda item: get_nested_attribute(item[1], attribute_name), reverse=reverse) ) except AttributeError: diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index 9c859ae9e..cd94cda25 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -177,26 +177,50 @@ def test_order_by_with_string_attribute(self): """Test that ordering a FilterDict by a string attribute returns an appropriately ordered FilterList.""" self.assertEqual( self.ANIMALS.order_by("name"), - FilterList((self.ANIMALS["cat"], self.ANIMALS["another_dog"], self.ANIMALS["dog"])), + FilterList( + ( + ("cat", self.ANIMALS["cat"]), + ("another_dog", self.ANIMALS["another_dog"]), + ("dog", self.ANIMALS["dog"]), + ) + ), ) def test_order_by_with_int_attribute(self): """ Test ordering a FilterDict by an integer attribute returns an appropriately ordered FilterList. """ self.assertEqual( self.ANIMALS.order_by("age"), - FilterList((self.ANIMALS["cat"], self.ANIMALS["dog"], self.ANIMALS["another_dog"])), + FilterList( + ( + ("cat", self.ANIMALS["cat"]), + ("dog", self.ANIMALS["dog"]), + ("another_dog", self.ANIMALS["another_dog"]), + ) + ), ) def test_order_by_list_attribute(self): """Test that ordering by list attributes orders members alphabetically by the first element of each list.""" self.assertEqual( self.ANIMALS.order_by("previous_names"), - FilterList((self.ANIMALS["dog"], self.ANIMALS["another_dog"], self.ANIMALS["cat"])), + FilterList( + ( + ("dog", self.ANIMALS["dog"]), + ("another_dog", self.ANIMALS["another_dog"]), + ("cat", self.ANIMALS["cat"]), + ) + ), ) def test_order_by_in_reverse(self): """ Test ordering in reverse works correctly. """ self.assertEqual( self.ANIMALS.order_by("age", reverse=True), - FilterList((self.ANIMALS["another_dog"], self.ANIMALS["dog"], self.ANIMALS["cat"])), + FilterList( + ( + ("another_dog", self.ANIMALS["another_dog"]), + ("dog", self.ANIMALS["dog"]), + ("cat", self.ANIMALS["cat"]), + ) + ), ) From 2b56d4409bcb0be4153144e48dc2096b0e739b2e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 21:02:14 +0100 Subject: [PATCH 074/103] DOC: Update filter containers documentation --- docs/source/filter_containers.rst | 70 +++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/docs/source/filter_containers.rst b/docs/source/filter_containers.rst index a4087fd34..304cf9ba1 100644 --- a/docs/source/filter_containers.rst +++ b/docs/source/filter_containers.rst @@ -4,43 +4,61 @@ Filter containers ================= -A filter container is just a regular python container that has some extra methods for filtering or ordering its +A filter container is just a regular python container that has some extra methods for filtering and ordering its elements. It has the same interface (i.e. attributes and methods) as the primitive python type it inherits from, with these extra methods: - ``filter`` - ``order_by`` -There are two types of filter containers currently implemented: +There are three types of filter containers currently implemented: - ``FilterSet`` - ``FilterList`` +- ``FilterDict`` -``FilterSets`` are currently used in: +``FilterSets`` are currently used in ``Dataset.files`` to store ``Datafiles`` and make them filterable, which is useful +for dealing with a large number of datasets, while ``FilterList`` is returned when ordering any filter container. -- ``Dataset.files`` to store ``Datafiles`` -- ``Labelset.labels`` to store ``Labels`` - -You can see filtering in action on the files of a ``Dataset`` :doc:`here `. +You can see an example of filtering of a ``Dataset``'s files :doc:`here `. --------- Filtering --------- -Filters are named as ``"__"``, and any attribute of a member of the -``FilterSet`` whose type or interface is supported can be filtered. +Key points: + +* Any attribute of a member of a filter container whose type or interface is supported can be used when filtering +* Filters are named as ``"__"`` +* Multiple filters can be specified at once for chained filtering +* ```` can be a single attribute name or a double-underscore-separated string of nested attribute names +* Nested attribute names work for real attributes as well as dictionary keys (in any combination and to any depth) .. code-block:: python filter_set = FilterSet( - {Datafile(timestamp=time.time(), path="my_file.csv"), Datafile(timestamp=time.time(), path="your_file.txt"), Datafile(timestamp=time.time(), path="another_file.csv")} + { + Datafile(path="my_file.csv", cluster=0, tags={"manufacturer": "Vestas"}), + Datafile(path="your_file.txt", cluster=1, tags={"manufacturer": "Vergnet"}), + Datafile(path="another_file.csv", cluster=2, tags={"manufacturer": "Enercon"}) + } ) - filter_set.filter(filter_name="name__ends_with", filter_value=".csv") + # Single filter, non-nested attribute. + filter_set.filter(name__ends_with=".csv") >>> , })> -The following filters are implemented for the following types: + # Two filters, non-nested attributes. + filter_set.filter(name__ends_with=".csv", cluster__gt=1) + >>> })> + + # Single filter, nested attribute. + filter_set.filter(tags__manufacturer__startswith("V")) + >>> , })> + + +These filters are currently available for the following types: - ``bool``: @@ -73,12 +91,14 @@ The following filters are implemented for the following types: * ``is`` * ``is_not`` -- ``Labelset``: +- ``LabelSet``: * ``is`` * ``is_not`` * ``equals`` * ``not_equals`` + * ``contains`` + * ``not_contains`` * ``any_label_contains`` * ``not_any_label_contains`` * ``any_label_starts_with`` @@ -87,7 +107,6 @@ The following filters are implemented for the following types: * ``not_any_label_ends_with`` - Additionally, these filters are defined for the following *interfaces* (duck-types). : - Numbers: @@ -118,14 +137,31 @@ list of filters. -------- Ordering -------- -As sets are inherently orderless, ordering a ``FilterSet`` results in a new ``FilterList``, which has the same extra -methods and behaviour as a ``FilterSet``, but is based on the ``list`` type instead - meaning it can be ordered and -indexed etc. A ``FilterSet`` or ``FilterList`` can be ordered by any of the attributes of its members: +As sets and dictionaries are inherently orderless, ordering any filter container results in a new ``FilterList``, which +has the same methods and behaviour but is based on ``list`` instead, meaning it can be ordered and indexed etc. A +filter container can be ordered by any of the attributes of its members: .. code-block:: python filter_set.order_by("name") >>> , , ])> + filter_set.order_by("cluster") + >>> , , ])> + The ordering can also be carried out in reverse (i.e. descending order) by passing ``reverse=True`` as a second argument to the ``order_by`` method. + + +----------- +FilterDicts +----------- +The keys of a ``FilterDict`` can be anything, but each value must be a ``Filterable``. Hence, a ``FilterDict`` is +filtered and ordered by its values' attributes; when ordering, its items (key-value tuples) are returned in a +``FilterList``. + +----------------------- +Using for your own data +----------------------- +If using filter containers for your own data, all the members must inherit from ``octue.mixins.filterable.Filterable`` +to be filterable and orderable. From 55035fc366762dcaa2a231c17ab3ecb2b7e538f3 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 21:18:17 +0100 Subject: [PATCH 075/103] DOC: Update other documentation skip_ci_tests --- docs/source/datafile.rst | 4 ++-- docs/source/dataset.rst | 19 +++++++++++-------- docs/source/filter_containers.rst | 6 +++--- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/source/datafile.rst b/docs/source/datafile.rst index 32703245a..42a11880e 100644 --- a/docs/source/datafile.rst +++ b/docs/source/datafile.rst @@ -44,7 +44,7 @@ Example A bucket_name = "my-bucket", datafile_path = "path/to/data.csv" - with Datafile.from_cloud(project_name, bucket_name, datafile_path, mode="r") as datafile, f: + with Datafile.from_cloud(project_name, bucket_name, datafile_path, mode="r") as (datafile, f): data = f.read() new_metadata = metadata_calculating_function(data) @@ -129,7 +129,7 @@ For creating new data in a new local file: labels = {"Vestas"} - with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels, mode="w") as datafile, f: + with Datafile(path="path/to/local/file.dat", sequence=sequence, tags=tags, labels=labels, mode="w") as (datafile, f): f.write("This is some cleaned data.") datafile.to_cloud(project_name="my-project", bucket_name="my-bucket", path_in_bucket="path/to/data.dat") diff --git a/docs/source/dataset.rst b/docs/source/dataset.rst index bdc770168..f41c979ea 100644 --- a/docs/source/dataset.rst +++ b/docs/source/dataset.rst @@ -11,7 +11,7 @@ A ``Dataset`` contains any number of ``Datafiles`` along with the following meta - ``labels`` The files are stored in a ``FilterSet``, meaning they can be easily filtered according to any attribute of the -:doc:`Datafile ` instances it contains. +:doc:`Datafile ` instances contained. -------------------------------- @@ -24,23 +24,26 @@ You can filter a ``Dataset``'s files as follows: dataset = Dataset( files=[ - Datafile(timestamp=time.time(), path="path-within-dataset/my_file.csv", labels="one a2 b3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/your_file.txt", labels="two a2 b3 all"), - Datafile(timestamp=time.time(), path="path-within-dataset/another_file.csv", labels="three all"), + Datafile(path="path-within-dataset/my_file.csv", labels=["one", "a", "b" "all"]), + Datafile(path="path-within-dataset/your_file.txt", labels=["two", "a", "b", "all"), + Datafile(path="path-within-dataset/another_file.csv", labels=["three", "all"]), ] ) - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv") + dataset.files.filter(name__ends_with=".csv") >>> , })> - dataset.files.filter("labels__contains", filter_value="a:2") + dataset.files.filter(labels__contains="a") >>> , })> -You can also chain filters indefinitely: +You can also chain filters indefinitely, or specify them all at the same time: .. code-block:: python - dataset.files.filter(filter_name="name__ends_with", filter_value=".csv").filter("labels__contains", filter_value="a2") + dataset.files.filter(name__ends_with=".csv").filter(labels__contains="a") + >>> })> + + dataset.files.filter(name__ends_with=".csv", labels__contains="a") >>> })> Find out more about ``FilterSets`` :doc:`here `, including all the possible filters available for each type of object stored on diff --git a/docs/source/filter_containers.rst b/docs/source/filter_containers.rst index 304cf9ba1..59f988c6f 100644 --- a/docs/source/filter_containers.rst +++ b/docs/source/filter_containers.rst @@ -153,9 +153,9 @@ The ordering can also be carried out in reverse (i.e. descending order) by passi to the ``order_by`` method. ------------ -FilterDicts ------------ +-------------- +``FilterDict`` +-------------- The keys of a ``FilterDict`` can be anything, but each value must be a ``Filterable``. Hence, a ``FilterDict`` is filtered and ordered by its values' attributes; when ordering, its items (key-value tuples) are returned in a ``FilterList``. From d0b48dc99f7180c50c3dcaa29486a63bea50c0fa Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 25 May 2021 21:52:41 +0100 Subject: [PATCH 076/103] CLN: Remove unnecessary pass statements --- octue/resources/filter_containers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index cc767d264..01d4fe34d 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -16,7 +16,6 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): values to filter for :return octue.resources.filter_containers.FilterContainer: """ - pass def order_by(self, attribute_name, reverse=False): """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new @@ -28,7 +27,6 @@ def order_by(self, attribute_name, reverse=False): container's members :return FilterList: """ - pass def _filter(self, ignore_items_without_attribute=True, **kwargs): From d9f134bcaae290fb93f2ee714cd83404d23999dd Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 31 May 2021 20:52:57 +0100 Subject: [PATCH 077/103] IMP: Add datetime filters and in-range filters --- octue/mixins/filterable.py | 41 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index a6b66d468..2d97615e7 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -31,6 +31,11 @@ "not_icontains": lambda item, filter_value: filter_value.lower() not in item.lower(), } +IN_RANGE_FILTER_ACTIONS = { + "in_range": lambda item, filter_value: filter_value[0] <= item <= filter_value[1], + "not_in_range": lambda item, filter_value: item < filter_value[0] or item > filter_value[1], +} + # Filters for specific types e.g. list or int. TYPE_FILTERS = { @@ -43,12 +48,39 @@ "ends_with": lambda item, filter_value: item.endswith(filter_value), "not_ends_with": lambda item, filter_value: not item.endswith(filter_value), **EQUALS_FILTER_ACTIONS, - **COMPARISON_FILTER_ACTIONS, **IS_FILTER_ACTIONS, + **COMPARISON_FILTER_ACTIONS, **CONTAINS_FILTER_ACTIONS, **ICONTAINS_FILTER_ACTIONS, + **IN_RANGE_FILTER_ACTIONS, }, "NoneType": IS_FILTER_ACTIONS, + "datetime": { + **EQUALS_FILTER_ACTIONS, + **IS_FILTER_ACTIONS, + **COMPARISON_FILTER_ACTIONS, + **IN_RANGE_FILTER_ACTIONS, + "year_equals": lambda item, filter_value: item.year == filter_value, + "year_in": lambda item, filter_value: item.year in filter_value, + "month_equals": lambda item, filter_value: item.month == filter_value, + "month_in": lambda item, filter_value: item.month in filter_value, + "day_equals": lambda item, filter_value: item.day == filter_value, + "day_in": lambda item, filter_value: item.day in filter_value, + "weekday_equals": lambda item, filter_value: item.weekday() == filter_value, + "weekday_in": lambda item, filter_value: item.weekday() in filter_value, + "iso_weekday_equals": lambda item, filter_value: item.isoweekday() == filter_value, + "iso_weekday_in": lambda item, filter_value: item.isoweekday() in filter_value, + "time_equals": lambda item, filter_value: item.time() == filter_value, + "time_in": lambda item, filter_value: item.time() in filter_value, + "hour_equals": lambda item, filter_value: item.hour == filter_value, + "hour_in": lambda item, filter_value: item.hour in filter_value, + "minute_equals": lambda item, filter_value: item.minute == filter_value, + "minute_in": lambda item, filter_value: item.minute in filter_value, + "second_equals": lambda item, filter_value: item.second == filter_value, + "second_in": lambda item, filter_value: item.second in filter_value, + "in_date_range": lambda item, filter_value: filter_value[0] <= item.date() <= filter_value[1], + "in_time_range": lambda item, filter_value: filter_value[0] <= item.time() <= filter_value[1], + }, "TagSet": { "any_tag_contains": lambda item, filter_value: item.any_tag_contains(filter_value), "not_any_tag_contains": lambda item, filter_value: not item.any_tag_contains(filter_value), @@ -64,7 +96,12 @@ # Filters for interfaces e.g. iterables or numbers. INTERFACE_FILTERS = { - numbers.Number: {**EQUALS_FILTER_ACTIONS, **COMPARISON_FILTER_ACTIONS, **IS_FILTER_ACTIONS}, + numbers.Number: { + **EQUALS_FILTER_ACTIONS, + **COMPARISON_FILTER_ACTIONS, + **IS_FILTER_ACTIONS, + **IN_RANGE_FILTER_ACTIONS, + }, collections.abc.Iterable: { **EQUALS_FILTER_ACTIONS, **CONTAINS_FILTER_ACTIONS, From e3fbfa32225cb92b9453de324c03f12de1b8ab9e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 31 May 2021 20:53:28 +0100 Subject: [PATCH 078/103] TST: Test in-range filters --- tests/mixins/test_filterable.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 14242c99c..7021966fc 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -81,6 +81,10 @@ def test_str_filters(self): self.assertFalse(filterable_thing.satisfies("name__gt", "Noel")) self.assertTrue(filterable_thing.satisfies("name__gte", "Michael")) self.assertFalse(filterable_thing.satisfies("name__gte", "Noel")) + self.assertTrue(filterable_thing.satisfies("name__in_range", ("Amy", "Zoe"))) + self.assertFalse(filterable_thing.satisfies("name__in_range", ("Noel", "Peter"))) + self.assertTrue(filterable_thing.satisfies("name__not_in_range", ("Noel", "Peter"))) + self.assertFalse(filterable_thing.satisfies("name__not_in_range", ("Amy", "Zoe"))) def test_none_filters(self): """ Test that the None filters work as expected. """ @@ -110,6 +114,10 @@ def test_number_filters_with_integers_and_floats(self): self.assertFalse(filterable_thing.satisfies("age__is", 63)) self.assertTrue(filterable_thing.satisfies("age__is_not", 63)) self.assertFalse(filterable_thing.satisfies("age__is_not", age)) + self.assertTrue(filterable_thing.satisfies("age__in_range", (0, 10))) + self.assertFalse(filterable_thing.satisfies("age__in_range", (0, 3))) + self.assertTrue(filterable_thing.satisfies("age__not_in_range", (0, 3))) + self.assertFalse(filterable_thing.satisfies("age__not_in_range", (0, 10))) def test_iterable_filters(self): """ Test that the iterable filters work as expected with lists, sets, and tuples. """ From f6c774c06c2e4c1f6549a505fa882dd75ef78b00 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 31 May 2021 21:29:41 +0100 Subject: [PATCH 079/103] TST: Test some of the datetime filters --- tests/mixins/test_filterable.py | 55 ++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index 7021966fc..f16243647 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -1,3 +1,5 @@ +from datetime import datetime + from octue import exceptions from octue.mixins.filterable import Filterable from octue.resources.tag import TagSet @@ -5,12 +7,13 @@ class FilterableSubclass(Filterable): - def __init__(self, name=None, is_alive=None, iterable=None, age=None, owner=None): + def __init__(self, name=None, is_alive=None, iterable=None, age=None, owner=None, timestamp=None): self.name = name self.is_alive = is_alive self.iterable = iterable self.age = age self.owner = owner + self.timestamp = timestamp class TestFilterable(BaseTestCase): @@ -132,6 +135,56 @@ def test_iterable_filters(self): self.assertTrue(filterable_thing.satisfies("iterable__is_not", None)) self.assertFalse(filterable_thing.satisfies("iterable__is_not", iterable)) + def test_datetime_filters(self): + my_datetime = datetime(2000, 1, 1) + filterable_thing = FilterableSubclass(timestamp=my_datetime) + self.assertTrue(filterable_thing.satisfies("timestamp__equals", my_datetime)) + self.assertFalse(filterable_thing.satisfies("timestamp__equals", datetime(2, 2, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__not_equals", datetime(2, 2, 2))) + self.assertFalse(filterable_thing.satisfies("timestamp__not_equals", my_datetime)) + self.assertTrue(filterable_thing.satisfies("timestamp__is", my_datetime)) + self.assertFalse(filterable_thing.satisfies("timestamp__is", datetime(2, 2, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__is_not", datetime(2, 2, 2))) + self.assertFalse(filterable_thing.satisfies("timestamp__is_not", my_datetime)) + self.assertTrue(filterable_thing.satisfies("timestamp__gt", datetime(1900, 1, 2))) + self.assertFalse(filterable_thing.satisfies("timestamp__gt", datetime(3000, 1, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__gte", my_datetime)) + self.assertFalse(filterable_thing.satisfies("timestamp__gte", datetime(3000, 1, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__lt", datetime(3000, 1, 2))) + self.assertFalse(filterable_thing.satisfies("timestamp__lt", datetime(1990, 1, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__lte", my_datetime)) + self.assertFalse(filterable_thing.satisfies("timestamp__lte", datetime(1900, 1, 2))) + self.assertTrue(filterable_thing.satisfies("timestamp__in_range", (datetime(1900, 1, 2), datetime(3000, 1, 2)))) + self.assertFalse( + filterable_thing.satisfies("timestamp__in_range", (datetime(2100, 1, 2), datetime(3000, 1, 2))) + ) + self.assertTrue( + filterable_thing.satisfies("timestamp__not_in_range", (datetime(2100, 1, 2), datetime(3000, 1, 2))) + ) + self.assertFalse( + filterable_thing.satisfies("timestamp__not_in_range", (datetime(1900, 1, 2), datetime(3000, 1, 2))) + ) + self.assertTrue(filterable_thing.satisfies("timestamp__year_equals", 2000)) + self.assertFalse(filterable_thing.satisfies("timestamp__year_equals", 3000)) + self.assertTrue(filterable_thing.satisfies("timestamp__year_in", {2000, 3000, 4000})) + self.assertFalse(filterable_thing.satisfies("timestamp__year_in", {3000, 4000})) + self.assertTrue(filterable_thing.satisfies("timestamp__month_equals", 1)) + self.assertFalse(filterable_thing.satisfies("timestamp__month_equals", 9)) + self.assertTrue(filterable_thing.satisfies("timestamp__month_in", {1, 2, 3})) + self.assertFalse(filterable_thing.satisfies("timestamp__month_in", {2, 3})) + self.assertTrue(filterable_thing.satisfies("timestamp__day_equals", 1)) + self.assertFalse(filterable_thing.satisfies("timestamp__day_equals", 2)) + self.assertTrue(filterable_thing.satisfies("timestamp__day_in", {1, 2, 3})) + self.assertFalse(filterable_thing.satisfies("timestamp__day_in", {2, 3})) + self.assertTrue(filterable_thing.satisfies("timestamp__weekday_equals", 5)) + self.assertFalse(filterable_thing.satisfies("timestamp__weekday_equals", 3)) + self.assertTrue(filterable_thing.satisfies("timestamp__weekday_in", {5, 6, 7})) + self.assertFalse(filterable_thing.satisfies("timestamp__weekday_in", {6, 7})) + self.assertTrue(filterable_thing.satisfies("timestamp__iso_weekday_equals", 6)) + self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_equals", 4)) + self.assertTrue(filterable_thing.satisfies("timestamp__iso_weekday_in", {5, 6, 7})) + self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_in", {7, 8})) + def test_tag_set_filters(self): """ Test the filters for TagSet. """ filterable_thing = FilterableSubclass(iterable=TagSet({"fred", "charlie"})) From 3f4af3285754440923eb62007174b91183c3ac1c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 31 May 2021 22:25:27 +0100 Subject: [PATCH 080/103] TST: Test rest of datetime filters --- tests/mixins/test_filterable.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/mixins/test_filterable.py b/tests/mixins/test_filterable.py index f16243647..f1e8ed902 100644 --- a/tests/mixins/test_filterable.py +++ b/tests/mixins/test_filterable.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import date, datetime, time from octue import exceptions from octue.mixins.filterable import Filterable @@ -184,6 +184,24 @@ def test_datetime_filters(self): self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_equals", 4)) self.assertTrue(filterable_thing.satisfies("timestamp__iso_weekday_in", {5, 6, 7})) self.assertFalse(filterable_thing.satisfies("timestamp__iso_weekday_in", {7, 8})) + self.assertTrue(filterable_thing.satisfies("timestamp__time_equals", time(0, 0, 0))) + self.assertFalse(filterable_thing.satisfies("timestamp__time_equals", time(1, 2, 3))) + self.assertTrue(filterable_thing.satisfies("timestamp__hour_equals", 0)) + self.assertFalse(filterable_thing.satisfies("timestamp__hour_equals", 1)) + self.assertTrue(filterable_thing.satisfies("timestamp__hour_in", {0, 1, 2})) + self.assertFalse(filterable_thing.satisfies("timestamp__hour_in", {1, 2})) + self.assertTrue(filterable_thing.satisfies("timestamp__minute_equals", 0)) + self.assertFalse(filterable_thing.satisfies("timestamp__minute_equals", 1)) + self.assertTrue(filterable_thing.satisfies("timestamp__minute_in", {0, 1, 2})) + self.assertFalse(filterable_thing.satisfies("timestamp__minute_in", {1, 2})) + self.assertTrue(filterable_thing.satisfies("timestamp__second_equals", 0)) + self.assertFalse(filterable_thing.satisfies("timestamp__second_equals", 1)) + self.assertTrue(filterable_thing.satisfies("timestamp__second_in", {0, 1, 2})) + self.assertFalse(filterable_thing.satisfies("timestamp__second_in", {1, 2})) + self.assertTrue(filterable_thing.satisfies("timestamp__in_date_range", (date(1000, 1, 4), date(3000, 7, 10)))) + self.assertFalse(filterable_thing.satisfies("timestamp__in_date_range", (date(2000, 1, 4), date(3000, 7, 10)))) + self.assertTrue(filterable_thing.satisfies("timestamp__in_time_range", (time(0, 0, 0), time(13, 2, 22)))) + self.assertFalse(filterable_thing.satisfies("timestamp__in_time_range", (time(0, 0, 1), time(13, 2, 22)))) def test_tag_set_filters(self): """ Test the filters for TagSet. """ From e7d5a2afbe2ced08c031aab637d19c436435984f Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Mon, 31 May 2021 22:54:20 +0100 Subject: [PATCH 081/103] REF: Generate not filters automatically --- octue/mixins/filterable.py | 114 ++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/octue/mixins/filterable.py b/octue/mixins/filterable.py index 2d97615e7..327f38bb8 100644 --- a/octue/mixins/filterable.py +++ b/octue/mixins/filterable.py @@ -4,36 +4,42 @@ from octue import exceptions -IS_FILTER_ACTIONS = { - "is": lambda item, filter_value: item is filter_value, - "is_not": lambda item, filter_value: item is not filter_value, -} +def generate_complementary_filters(name, func): + """Use a filter to generate its complementary filter, then return them together mapped to their names in a + dictionary. The complementary filter is named f"not_{name}" or, if the name is "is", "is_not". -EQUALS_FILTER_ACTIONS = { - "equals": lambda item, filter_value: filter_value == item, - "not_equals": lambda item, filter_value: filter_value != item, -} + :param str name: + :param callable func: + :return dict: + """ + filter_action = {name: func} -COMPARISON_FILTER_ACTIONS = { - "lt": lambda item, filter_value: item < filter_value, - "lte": lambda item, filter_value: item <= filter_value, - "gt": lambda item, filter_value: item > filter_value, - "gte": lambda item, filter_value: item >= filter_value, -} + if name == "is": + not_filter_name = "is_not" + else: + not_filter_name = f"not_{name}" -CONTAINS_FILTER_ACTIONS = { - "contains": lambda item, filter_value: filter_value in item, - "not_contains": lambda item, filter_value: filter_value not in item, -} + not_filter_action = { + not_filter_name: lambda item, value: not action(item, value) for name, action in filter_action.items() + } -ICONTAINS_FILTER_ACTIONS = { - "icontains": lambda item, filter_value: filter_value.lower() in item.lower(), - "not_icontains": lambda item, filter_value: filter_value.lower() not in item.lower(), -} + return {**filter_action, **not_filter_action} + + +IS_FILTER_ACTIONS = generate_complementary_filters("is", lambda item, value: item is value) +EQUALS_FILTER_ACTIONS = generate_complementary_filters("equals", lambda item, value: value == item) +CONTAINS_FILTER_ACTIONS = generate_complementary_filters("contains", lambda item, value: value in item) +IN_RANGE_FILTER_ACTIONS = generate_complementary_filters("in_range", lambda item, value: value[0] <= item <= value[1]) -IN_RANGE_FILTER_ACTIONS = { - "in_range": lambda item, filter_value: filter_value[0] <= item <= filter_value[1], - "not_in_range": lambda item, filter_value: item < filter_value[0] or item > filter_value[1], +ICONTAINS_FILTER_ACTIONS = generate_complementary_filters( + "icontains", lambda item, value: value.lower() in item.lower() +) + +COMPARISON_FILTER_ACTIONS = { + "lt": lambda item, value: item < value, + "lte": lambda item, value: item <= value, + "gt": lambda item, value: item > value, + "gte": lambda item, value: item >= value, } @@ -41,12 +47,9 @@ TYPE_FILTERS = { "bool": IS_FILTER_ACTIONS, "str": { - "iequals": lambda item, filter_value: filter_value.lower() == item.lower(), - "not_iequals": lambda item, filter_value: filter_value.lower() != item.lower(), - "starts_with": lambda item, filter_value: item.startswith(filter_value), - "not_starts_with": lambda item, filter_value: not item.startswith(filter_value), - "ends_with": lambda item, filter_value: item.endswith(filter_value), - "not_ends_with": lambda item, filter_value: not item.endswith(filter_value), + **generate_complementary_filters("iequals", lambda item, value: value.lower() == item.lower()), + **generate_complementary_filters("starts_with", lambda item, value: item.startswith(value)), + **generate_complementary_filters("ends_with", lambda item, value: item.endswith(value)), **EQUALS_FILTER_ACTIONS, **IS_FILTER_ACTIONS, **COMPARISON_FILTER_ACTIONS, @@ -60,34 +63,31 @@ **IS_FILTER_ACTIONS, **COMPARISON_FILTER_ACTIONS, **IN_RANGE_FILTER_ACTIONS, - "year_equals": lambda item, filter_value: item.year == filter_value, - "year_in": lambda item, filter_value: item.year in filter_value, - "month_equals": lambda item, filter_value: item.month == filter_value, - "month_in": lambda item, filter_value: item.month in filter_value, - "day_equals": lambda item, filter_value: item.day == filter_value, - "day_in": lambda item, filter_value: item.day in filter_value, - "weekday_equals": lambda item, filter_value: item.weekday() == filter_value, - "weekday_in": lambda item, filter_value: item.weekday() in filter_value, - "iso_weekday_equals": lambda item, filter_value: item.isoweekday() == filter_value, - "iso_weekday_in": lambda item, filter_value: item.isoweekday() in filter_value, - "time_equals": lambda item, filter_value: item.time() == filter_value, - "time_in": lambda item, filter_value: item.time() in filter_value, - "hour_equals": lambda item, filter_value: item.hour == filter_value, - "hour_in": lambda item, filter_value: item.hour in filter_value, - "minute_equals": lambda item, filter_value: item.minute == filter_value, - "minute_in": lambda item, filter_value: item.minute in filter_value, - "second_equals": lambda item, filter_value: item.second == filter_value, - "second_in": lambda item, filter_value: item.second in filter_value, - "in_date_range": lambda item, filter_value: filter_value[0] <= item.date() <= filter_value[1], - "in_time_range": lambda item, filter_value: filter_value[0] <= item.time() <= filter_value[1], + "year_equals": lambda item, value: item.year == value, + "year_in": lambda item, value: item.year in value, + "month_equals": lambda item, value: item.month == value, + "month_in": lambda item, value: item.month in value, + "day_equals": lambda item, value: item.day == value, + "day_in": lambda item, value: item.day in value, + "weekday_equals": lambda item, value: item.weekday() == value, + "weekday_in": lambda item, value: item.weekday() in value, + "iso_weekday_equals": lambda item, value: item.isoweekday() == value, + "iso_weekday_in": lambda item, value: item.isoweekday() in value, + "time_equals": lambda item, value: item.time() == value, + "time_in": lambda item, value: item.time() in value, + "hour_equals": lambda item, value: item.hour == value, + "hour_in": lambda item, value: item.hour in value, + "minute_equals": lambda item, value: item.minute == value, + "minute_in": lambda item, value: item.minute in value, + "second_equals": lambda item, value: item.second == value, + "second_in": lambda item, value: item.second in value, + "in_date_range": lambda item, value: value[0] <= item.date() <= value[1], + "in_time_range": lambda item, value: value[0] <= item.time() <= value[1], }, "TagSet": { - "any_tag_contains": lambda item, filter_value: item.any_tag_contains(filter_value), - "not_any_tag_contains": lambda item, filter_value: not item.any_tag_contains(filter_value), - "any_tag_starts_with": lambda item, filter_value: item.any_tag_starts_with(filter_value), - "not_any_tag_starts_with": lambda item, filter_value: not item.any_tag_starts_with(filter_value), - "any_tag_ends_with": lambda item, filter_value: item.any_tag_ends_with(filter_value), - "not_any_tag_ends_with": lambda item, filter_value: not item.any_tag_ends_with(filter_value), + **generate_complementary_filters("any_tag_contains", lambda item, value: item.any_tag_contains(value)), + **generate_complementary_filters("any_tag_starts_with", lambda item, value: item.any_tag_starts_with(value)), + **generate_complementary_filters("any_tag_ends_with", lambda item, value: item.any_tag_ends_with(value)), **EQUALS_FILTER_ACTIONS, **CONTAINS_FILTER_ACTIONS, **IS_FILTER_ACTIONS, From 34c10cda5ad8492896e7ce00abd608246f3ea70b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 12:03:14 +0100 Subject: [PATCH 082/103] REF: Rename gs_path to cloud_path --- octue/cloud/storage/client.py | 84 ++++++++++++++++-------------- octue/resources/datafile.py | 52 +++++++++--------- octue/resources/dataset.py | 22 ++++---- octue/resources/manifest.py | 24 +++++---- tests/cloud/storage/test_client.py | 4 +- tests/resources/test_datafile.py | 2 +- tests/resources/test_dataset.py | 12 +++-- tests/resources/test_manifest.py | 8 +-- 8 files changed, 110 insertions(+), 98 deletions(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index 7a6e93eb9..1d93b33fc 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -48,20 +48,26 @@ def create_bucket(self, name, location=None, allow_existing=False, timeout=_DEFA self.client.create_bucket(bucket_or_name=name, location=location, timeout=timeout) def upload_file( - self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT + self, + local_path, + cloud_path=None, + bucket_name=None, + path_in_bucket=None, + metadata=None, + timeout=_DEFAULT_TIMEOUT, ): """Upload a local file to a Google Cloud bucket at gs:///. Either (`bucket_name` - and `path_in_bucket`) or `gs_path` must be provided. + and `path_in_bucket`) or `cloud_path` must be provided. :param str local_path: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param dict metadata: :param float timeout: :return None: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) with open(local_path) as f: blob.crc32c = self._compute_crc32c_checksum(f.read()) @@ -71,83 +77,83 @@ def upload_file( logger.info("Uploaded %r to Google Cloud at %r.", local_path, blob.public_url) def upload_from_string( - self, string, gs_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT + self, string, cloud_path=None, bucket_name=None, path_in_bucket=None, metadata=None, timeout=_DEFAULT_TIMEOUT ): """Upload serialised data in string form to a file in a Google Cloud bucket at - gs:///. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str string: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param dict metadata: :param float timeout: :return None: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) blob.crc32c = self._compute_crc32c_checksum(string) blob.upload_from_string(data=string, timeout=timeout) self._update_metadata(blob, metadata) logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) - def update_metadata(self, metadata, gs_path=None, bucket_name=None, path_in_bucket=None): - """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must + def update_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): + """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param dict metadata: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :return None: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) self._update_metadata(blob, metadata) def download_to_file( - self, local_path, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT + self, local_path, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT ): """Download a file to a file from a Google Cloud bucket at gs:///. Either - (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str local_path: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param float timeout: :return None: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) blob.download_to_filename(local_path, timeout=timeout) logger.info("Downloaded %r from Google Cloud to %r.", blob.public_url, local_path) - def download_as_string(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): + def download_as_string(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Download a file to a string from a Google Cloud bucket at gs:///. Either - (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param float timeout: :return str: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) data = blob.download_as_bytes(timeout=timeout) logger.info("Downloaded %r from Google Cloud to as string.", blob.public_url) return data.decode() - def get_metadata(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): + def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or - `gs_path` must be provided. + `cloud_path` must be provided. - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param float timeout: :return dict: """ - if gs_path: - bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) bucket = self.client.get_bucket(bucket_or_name=bucket_name) blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) @@ -168,33 +174,33 @@ def get_metadata(self, gs_path=None, bucket_name=None, path_in_bucket=None, time "path_in_bucket": path_in_bucket, } - def delete(self, gs_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must + def delete(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): + """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param float timeout: :return None: """ - blob = self._blob(gs_path, bucket_name, path_in_bucket) + blob = self._blob(cloud_path, bucket_name, path_in_bucket) blob.delete(timeout=timeout) logger.info("Deleted %r from Google Cloud.", blob.public_url) - def scandir(self, gs_path=None, bucket_name=None, directory_path=None, filter=None, timeout=_DEFAULT_TIMEOUT): + def scandir(self, cloud_path=None, bucket_name=None, directory_path=None, filter=None, timeout=_DEFAULT_TIMEOUT): """Yield the blobs belonging to the given "directory" in the given bucket. Either (`bucket_name` and - `path_in_bucket`) or `gs_path` must be provided. + `path_in_bucket`) or `cloud_path` must be provided. - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None directory_path: :param callable filter: :param float timeout: :yield google.cloud.storage.blob.Blob: """ - if gs_path: - bucket_name, directory_path = split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, directory_path = split_bucket_name_from_gs_path(cloud_path) bucket = self.client.get_bucket(bucket_or_name=bucket_name) blobs = bucket.list_blobs(timeout=timeout) @@ -213,17 +219,17 @@ def _strip_leading_slash(self, path): """ return path.lstrip("/") - def _blob(self, gs_path=None, bucket_name=None, path_in_bucket=None): + def _blob(self, cloud_path=None, bucket_name=None, path_in_bucket=None): """Instantiate a blob for the given bucket at the given path. Note that this is not synced up with Google Cloud. - Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :return google.cloud.storage.blob.Blob: """ - if gs_path: - bucket_name, path_in_bucket = split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) bucket = self.client.get_bucket(bucket_or_name=bucket_name) return bucket.blob(blob_name=self._strip_leading_slash(path_in_bucket)) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index adee22a01..35f529731 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -160,7 +160,7 @@ def deserialise(cls, serialised_datafile, path_from=None): def from_cloud( cls, project_name, - gs_path=None, + cloud_path=None, bucket_name=None, datafile_path=None, allow_overwrite=False, @@ -175,10 +175,10 @@ def from_cloud( Note that a value provided for an attribute in kwargs will override any existing value for the attribute. - Either (`bucket_name` and `datafile_path`) or `gs_path` must be provided. + Either (`bucket_name` and `datafile_path`) or `cloud_path` must be provided. :param str project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None datafile_path: path to file represented by datafile :param bool allow_overwrite: if `True`, allow attributes of the datafile to be overwritten by values given in @@ -189,11 +189,11 @@ def from_cloud( the datafile when the context is exited :return Datafile: """ - if not gs_path: - gs_path = storage.path.generate_gs_path(bucket_name, datafile_path) + if not cloud_path: + cloud_path = storage.path.generate_gs_path(bucket_name, datafile_path) - datafile = cls(path=gs_path) - datafile.get_cloud_metadata(project_name, gs_path=gs_path) + datafile = cls(path=cloud_path) + datafile.get_cloud_metadata(project_name, cloud_path=cloud_path) custom_metadata = datafile._cloud_metadata.get("custom_metadata", {}) if not allow_overwrite: @@ -218,20 +218,20 @@ def from_cloud( return datafile def to_cloud( - self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True + self, project_name=None, cloud_path=None, bucket_name=None, path_in_bucket=None, update_cloud_metadata=True ): - """Upload a datafile to Google Cloud Storage. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be + """Upload a datafile to Google Cloud Storage. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str|None project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :param bool update_cloud_metadata: :return str: gs:// path for datafile """ project_name, bucket_name, path_in_bucket = self._get_cloud_location( - project_name, gs_path, bucket_name, path_in_bucket + project_name, cloud_path, bucket_name, path_in_bucket ) self.get_cloud_metadata(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) @@ -252,20 +252,20 @@ def to_cloud( if self._cloud_metadata.get("custom_metadata") != local_metadata: self.update_cloud_metadata(project_name, bucket_name=bucket_name, path_in_bucket=path_in_bucket) - return gs_path or storage.path.generate_gs_path(bucket_name, path_in_bucket) + return cloud_path or storage.path.generate_gs_path(bucket_name, path_in_bucket) - def get_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): + def get_cloud_metadata(self, project_name=None, cloud_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud metadata for the datafile, casting the types of the cluster and sequence fields to integer. - Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str|None project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :return dict: """ project_name, bucket_name, path_in_bucket = self._get_cloud_location( - project_name, gs_path, bucket_name, path_in_bucket + project_name, cloud_path, bucket_name, path_in_bucket ) cloud_metadata = GoogleCloudStorageClient(project_name).get_metadata( @@ -289,18 +289,18 @@ def get_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, self._cloud_metadata = cloud_metadata - def update_cloud_metadata(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): - """Update the cloud metadata for the datafile. Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be + def update_cloud_metadata(self, project_name=None, cloud_path=None, bucket_name=None, path_in_bucket=None): + """Update the cloud metadata for the datafile. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str|None project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :return None: """ project_name, bucket_name, path_in_bucket = self._get_cloud_location( - project_name, gs_path, bucket_name, path_in_bucket + project_name, cloud_path, bucket_name, path_in_bucket ) GoogleCloudStorageClient(project_name=project_name).update_metadata( @@ -394,7 +394,7 @@ def get_local_path(self): temporary_local_path = tempfile.NamedTemporaryFile(delete=False).name GoogleCloudStorageClient(project_name=self._cloud_metadata["project_name"]).download_to_file( - local_path=temporary_local_path, gs_path=self.absolute_path + local_path=temporary_local_path, cloud_path=self.absolute_path ) TEMPORARY_LOCAL_FILE_CACHE[self.absolute_path] = temporary_local_path @@ -429,20 +429,20 @@ def _calculate_hash(self): return super()._calculate_hash(hash) - def _get_cloud_location(self, project_name=None, gs_path=None, bucket_name=None, path_in_bucket=None): + def _get_cloud_location(self, project_name=None, cloud_path=None, bucket_name=None, path_in_bucket=None): """Get the cloud location details for the bucket, allowing the keyword arguments to override any stored values. - Either (`bucket_name` and `path_in_bucket`) or `gs_path` must be provided. + Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. :param str|None project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_in_bucket: :raise octue.exceptions.CloudLocationNotSpecified: if an exact cloud location isn't provided and isn't available implicitly (i.e. the Datafile wasn't loaded from the cloud previously) :return (str, str, str): """ - if gs_path: - bucket_name, path_in_bucket = storage.path.split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_in_bucket = storage.path.split_bucket_name_from_gs_path(cloud_path) try: project_name = project_name or self._cloud_metadata["project_name"] diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index aa5e140bb..ef37bfbf2 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -55,18 +55,18 @@ def __len__(self): return len(self.files) @classmethod - def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_dataset_directory=None): + def from_cloud(cls, project_name, cloud_path=None, bucket_name=None, path_to_dataset_directory=None): """Instantiate a Dataset from Google Cloud storage. Either (`bucket_name` and `path_to_dataset_directory`) or - `gs_path` must be provided. + `cloud_path` must be provided. :param str project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_to_dataset_directory: path to dataset directory (directory containing dataset's files) :return Dataset: """ - if gs_path: - bucket_name, path_to_dataset_directory = storage.path.split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_to_dataset_directory = storage.path.split_bucket_name_from_gs_path(cloud_path) serialised_dataset = json.loads( GoogleCloudStorageClient(project_name=project_name).download_as_string( @@ -92,18 +92,18 @@ def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_datase files=datafiles, ) - def to_cloud(self, project_name, gs_path=None, bucket_name=None, output_directory=None): - """Upload a dataset to a cloud location. Either (`bucket_name` and `output_directory`) or `gs_path` must be + def to_cloud(self, project_name, cloud_path=None, bucket_name=None, output_directory=None): + """Upload a dataset to a cloud location. Either (`bucket_name` and `output_directory`) or `cloud_path` must be provided. :param str project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None output_directory: :return str: gs:// path for dataset """ - if gs_path: - bucket_name, output_directory = storage.path.split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, output_directory = storage.path.split_bucket_name_from_gs_path(cloud_path) files = [] @@ -126,7 +126,7 @@ def to_cloud(self, project_name, gs_path=None, bucket_name=None, output_director path_in_bucket=storage.path.join(output_directory, self.name, definitions.DATASET_FILENAME), ) - return gs_path or storage.path.generate_gs_path(bucket_name, output_directory, self.name) + return cloud_path or storage.path.generate_gs_path(bucket_name, output_directory, self.name) @property def name(self): diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index 051c7b4c9..8525fec30 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -45,18 +45,18 @@ def __init__(self, id=None, logger=None, path=None, datasets=None, keys=None, ** vars(self).update(**kwargs) @classmethod - def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None): + def from_cloud(cls, project_name, cloud_path=None, bucket_name=None, path_to_manifest_file=None): """Instantiate a Manifest from Google Cloud storage. Either (`bucket_name` and `path_to_manifest_file`) or - `gs_path` must be provided. + `cloud_path` must be provided. :param str project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_to_manifest_file: :return Dataset: """ - if gs_path: - bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(cloud_path) serialised_manifest = json.loads( GoogleCloudStorageClient(project_name=project_name).download_as_string( @@ -82,19 +82,21 @@ def from_cloud(cls, project_name, gs_path=None, bucket_name=None, path_to_manife keys=serialised_manifest["keys"], ) - def to_cloud(self, project_name, gs_path=None, bucket_name=None, path_to_manifest_file=None, store_datasets=True): + def to_cloud( + self, project_name, cloud_path=None, bucket_name=None, path_to_manifest_file=None, store_datasets=True + ): """Upload a manifest to a cloud location, optionally uploading its datasets into the same directory. Either - (`bucket_name` and `path_to_manifest_file`) or `gs_path` must be provided. + (`bucket_name` and `path_to_manifest_file`) or `cloud_path` must be provided. :param str project_name: - :param str|None gs_path: + :param str|None cloud_path: :param str|None bucket_name: :param str|None path_to_manifest_file: :param bool store_datasets: if True, upload datasets to same directory as manifest file :return str: gs:// path for manifest file """ - if gs_path: - bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(gs_path) + if cloud_path: + bucket_name, path_to_manifest_file = storage.path.split_bucket_name_from_gs_path(cloud_path) datasets = [] output_directory = storage.path.dirname(path_to_manifest_file) @@ -121,7 +123,7 @@ def to_cloud(self, project_name, gs_path=None, bucket_name=None, path_to_manifes path_in_bucket=path_to_manifest_file, ) - return gs_path or storage.path.generate_gs_path(bucket_name, path_to_manifest_file) + return cloud_path or storage.path.generate_gs_path(bucket_name, path_to_manifest_file) @property def all_datasets_are_in_cloud(self): diff --git a/tests/cloud/storage/test_client.py b/tests/cloud/storage/test_client.py index aa3b97158..f17ce11ed 100644 --- a/tests/cloud/storage/test_client.py +++ b/tests/cloud/storage/test_client.py @@ -176,7 +176,7 @@ def test_scandir_with_gs_path(self): path_in_bucket = storage.path.join(directory_path, self.FILENAME) gs_path = f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}" - self.storage_client.upload_from_string(string=json.dumps({"height": 32}), gs_path=gs_path) + self.storage_client.upload_from_string(string=json.dumps({"height": 32}), cloud_path=gs_path) contents = list(self.storage_client.scandir(gs_path)) self.assertEqual(len(contents), 1) @@ -206,7 +206,7 @@ def test_get_metadata(self): def test_get_metadata_with_gs_path(self): """Test that file metadata can be retrieved when a GS path is used.""" gs_path = f"gs://{TEST_BUCKET_NAME}/{self.FILENAME}" - self.storage_client.upload_from_string(string=json.dumps({"height": 32}), gs_path=gs_path) + self.storage_client.upload_from_string(string=json.dumps({"height": 32}), cloud_path=gs_path) metadata = self.storage_client.get_metadata(gs_path) self.assertTrue(len(metadata) > 0) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index fa3e4b0f4..8426f156f 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -204,7 +204,7 @@ def test_from_cloud_with_datafile(self): tags={"blah:shah:nah", "blib", "glib"}, ) gs_path = f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}" - downloaded_datafile = Datafile.from_cloud(project_name, gs_path=gs_path) + downloaded_datafile = Datafile.from_cloud(project_name, cloud_path=gs_path) self.assertEqual(downloaded_datafile.path, gs_path) self.assertEqual(downloaded_datafile.id, datafile.id) diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 89164f819..c7fffdabf 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -329,8 +329,12 @@ def test_from_cloud(self): gs_path = f"gs://{bucket_name}/{path_to_dataset_directory}" for location_parameters in ( - {"bucket_name": bucket_name, "path_to_dataset_directory": path_to_dataset_directory, "gs_path": None}, - {"bucket_name": None, "path_to_dataset_directory": None, "gs_path": gs_path}, + { + "bucket_name": bucket_name, + "path_to_dataset_directory": path_to_dataset_directory, + "cloud_path": None, + }, + {"bucket_name": None, "path_to_dataset_directory": None, "cloud_path": gs_path}, ): persisted_dataset = Dataset.from_cloud( @@ -375,8 +379,8 @@ def test_to_cloud(self): gs_path = storage.path.generate_gs_path(bucket_name, output_directory) for location_parameters in ( - {"bucket_name": bucket_name, "output_directory": output_directory, "gs_path": None}, - {"bucket_name": None, "output_directory": None, "gs_path": gs_path}, + {"bucket_name": bucket_name, "output_directory": output_directory, "cloud_path": None}, + {"bucket_name": None, "output_directory": None, "cloud_path": gs_path}, ): dataset.to_cloud(project_name, **location_parameters) diff --git a/tests/resources/test_manifest.py b/tests/resources/test_manifest.py index 79cd00e27..b423ac5f6 100644 --- a/tests/resources/test_manifest.py +++ b/tests/resources/test_manifest.py @@ -84,8 +84,8 @@ def test_to_cloud(self): gs_path = storage.path.generate_gs_path(bucket_name, path_to_manifest_file) for location_parameters in ( - {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "gs_path": None}, - {"bucket_name": None, "path_to_manifest_file": None, "gs_path": gs_path}, + {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "cloud_path": None}, + {"bucket_name": None, "path_to_manifest_file": None, "cloud_path": gs_path}, ): manifest.to_cloud(self.TEST_PROJECT_NAME, **location_parameters) @@ -173,8 +173,8 @@ def test_from_cloud(self): gs_path = storage.path.generate_gs_path(bucket_name, path_to_manifest_file) for location_parameters in ( - {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "gs_path": None}, - {"bucket_name": None, "path_to_manifest_file": None, "gs_path": gs_path}, + {"bucket_name": bucket_name, "path_to_manifest_file": path_to_manifest_file, "cloud_path": None}, + {"bucket_name": None, "path_to_manifest_file": None, "cloud_path": gs_path}, ): persisted_manifest = Manifest.from_cloud(project_name=self.TEST_PROJECT_NAME, **location_parameters) From 87d1a53f423cbf7c890f1668c4d6932f593c70e6 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 12:20:44 +0100 Subject: [PATCH 083/103] DOC: Add parameter descriptions to cloud methods --- octue/resources/datafile.py | 27 ++++++++++++--------------- octue/resources/dataset.py | 18 +++++++++--------- octue/resources/manifest.py | 16 ++++++++-------- 3 files changed, 29 insertions(+), 32 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 35f529731..4f9f08c84 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -177,16 +177,13 @@ def from_cloud( Either (`bucket_name` and `datafile_path`) or `cloud_path` must be provided. - :param str project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None datafile_path: path to file represented by datafile - :param bool allow_overwrite: if `True`, allow attributes of the datafile to be overwritten by values given in - kwargs - :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode - options are the same as for the builtin open function) - :param bool update_cloud_metadata: if using as a context manager and this is True, update the cloud metadata of - the datafile when the context is exited + :param str project_name: name of Google Cloud project datafile is stored in + :param str|None cloud_path: full path to datafile in cloud storage (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket datafile is stored in + :param str|None datafile_path: cloud storage path of datafile (e.g. `path/to/file.csv`) + :param bool allow_overwrite: if `True`, allow attributes of the datafile to be overwritten by values given in kwargs + :param str mode: if using as a context manager, open the datafile for reading/editing in this mode (the mode options are the same as for the builtin open function) + :param bool update_cloud_metadata: if using as a context manager and this is True, update the cloud metadata of the datafile when the context is exited :return Datafile: """ if not cloud_path: @@ -223,11 +220,11 @@ def to_cloud( """Upload a datafile to Google Cloud Storage. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param bool update_cloud_metadata: + :param str|None project_name: name of Google Cloud project to store datafile in + :param str|None cloud_path: full path to cloud storage location to store datafile at (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket to store datafile in + :param str|None path_in_bucket: cloud storage path to store datafile at (e.g. `path/to/file.csv`) + :param bool update_cloud_metadata: if `True`, update the metadata of the datafile in the cloud at upload time :return str: gs:// path for datafile """ project_name, bucket_name, path_in_bucket = self._get_cloud_location( diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index ef37bfbf2..edc753eb9 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -59,10 +59,10 @@ def from_cloud(cls, project_name, cloud_path=None, bucket_name=None, path_to_dat """Instantiate a Dataset from Google Cloud storage. Either (`bucket_name` and `path_to_dataset_directory`) or `cloud_path` must be provided. - :param str project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_to_dataset_directory: path to dataset directory (directory containing dataset's files) + :param str project_name: name of Google Cloud project dataset is stored in + :param str|None cloud_path: full path to dataset in cloud storage (e.g. `gs://bucket_name/path/to/dataset`) + :param str|None bucket_name: name of bucket dataset is stored in + :param str|None path_to_dataset_directory: path to dataset directory (containing dataset's files) in cloud (e.g. `path/to/dataset`) :return Dataset: """ if cloud_path: @@ -96,11 +96,11 @@ def to_cloud(self, project_name, cloud_path=None, bucket_name=None, output_direc """Upload a dataset to a cloud location. Either (`bucket_name` and `output_directory`) or `cloud_path` must be provided. - :param str project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None output_directory: - :return str: gs:// path for dataset + :param str project_name: name of Google Cloud project to store dataset in + :param str|None cloud_path: full cloud storage path to store dataset at (e.g. `gs://bucket_name/path/to/dataset`) + :param str|None bucket_name: name of bucket to store dataset in + :param str|None output_directory: path to output directory in cloud storage (e.g. `path/to/dataset`) + :return str: cloud path for dataset """ if cloud_path: bucket_name, output_directory = storage.path.split_bucket_name_from_gs_path(cloud_path) diff --git a/octue/resources/manifest.py b/octue/resources/manifest.py index 8525fec30..fd84e1815 100644 --- a/octue/resources/manifest.py +++ b/octue/resources/manifest.py @@ -49,10 +49,10 @@ def from_cloud(cls, project_name, cloud_path=None, bucket_name=None, path_to_man """Instantiate a Manifest from Google Cloud storage. Either (`bucket_name` and `path_to_manifest_file`) or `cloud_path` must be provided. - :param str project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_to_manifest_file: + :param str project_name: name of Google Cloud project manifest is stored in + :param str|None cloud_path: full path to manifest in cloud storage (e.g. `gs://bucket_name/path/to/manifest.json`) + :param str|None bucket_name: name of bucket manifest is stored in + :param str|None path_to_manifest_file: path to manifest in cloud storage e.g. `path/to/manifest.json` :return Dataset: """ if cloud_path: @@ -88,10 +88,10 @@ def to_cloud( """Upload a manifest to a cloud location, optionally uploading its datasets into the same directory. Either (`bucket_name` and `path_to_manifest_file`) or `cloud_path` must be provided. - :param str project_name: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_to_manifest_file: + :param str project_name: name of Google Cloud project to store manifest in + :param str|None cloud_path: full path to cloud storage location to store manifest at (e.g. `gs://bucket_name/path/to/manifest.json`) + :param str|None bucket_name: name of bucket to store manifest in + :param str|None path_to_manifest_file: cloud storage path to store manifest at e.g. `path/to/manifest.json` :param bool store_datasets: if True, upload datasets to same directory as manifest file :return str: gs:// path for manifest file """ From 6400f95c537359b1ce9630d57141e6ad5a73067e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 13:06:46 +0100 Subject: [PATCH 084/103] IMP: Serialise datetimes in ISO format --- octue/utils/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index 94b39d19d..59604bf27 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -13,7 +13,7 @@ def default(self, obj): return obj.serialise() if isinstance(obj, datetime.datetime): - return str(obj) + return obj.isoformat() # Otherwise let the base class default method raise the TypeError return TwinedEncoder.default(self, obj) From a40126abb9d58eae834d17300513247abf0b174e Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 13:23:38 +0100 Subject: [PATCH 085/103] DEP: Use latest version of twined --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1cac8df8b..46fb79e56 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "google-cloud-storage>=1.35.1", "google-crc32c>=1.1.2", "gunicorn", - "twined>=0.0.19", + "twined>=0.0.20", ], url="https://www.github.com/octue/octue-sdk-python", license="MIT", From 1189a39a8c9083e569f8a4f67ff9a56bb00a0e6b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 13:39:10 +0100 Subject: [PATCH 086/103] IMP: Add octue SDK version to datafile metadata --- octue/resources/datafile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 6dcf81a0a..3c12aee90 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -3,6 +3,7 @@ import logging import os import tempfile +import pkg_resources from google_crc32c import Checksum from octue.cloud import storage @@ -530,6 +531,7 @@ def metadata(self, use_octue_namespace=True): "sequence": self.sequence, "labels": self.labels.serialise(to_string=True), **self.tags, + "sdk_version": pkg_resources.get_distribution("octue").version, } if not use_octue_namespace: From a2c3d98a9b03b88fcc02b11f7ae5fca1f29ab305 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 14:52:03 +0100 Subject: [PATCH 087/103] IMP: Add `one` method to filter containers --- octue/resources/dataset.py | 13 ++------- octue/resources/filter_containers.py | 20 ++++++++++++++ tests/resources/test_dataset.py | 2 +- tests/resources/test_filter_containers.py | 33 +++++++++++++++++++++++ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index dd19fa2de..39aa22917 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -6,7 +6,7 @@ from octue import definitions from octue.cloud import storage from octue.cloud.storage import GoogleCloudStorageClient -from octue.exceptions import BrokenSequenceException, InvalidInputException, UnexpectedNumberOfResultsException +from octue.exceptions import BrokenSequenceException, InvalidInputException from octue.mixins import Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.resources.datafile import Datafile from octue.resources.filter_containers import FilterSet @@ -218,13 +218,4 @@ def get_file_by_label(self, label_string): :param str label_string: :return octue.resources.datafile.DataFile: """ - results = self.files.filter(labels__contains=label_string) - - if len(results) > 1: - raise UnexpectedNumberOfResultsException( - f"More than one result found when searching for a file by label {label_string!r}." - ) - elif len(results) == 0: - raise UnexpectedNumberOfResultsException(f"No files found with label {label_string!r}.") - - return results.pop() + return self.files.one(labels__contains=label_string) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 01d4fe34d..296983fe9 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -28,6 +28,26 @@ def order_by(self, attribute_name, reverse=False): :return FilterList: """ + def one(self, **kwargs): + """If a single result exists for the given filters, return it. Otherwise, raise an error. + + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for + :raise UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters + :return octue.resources.mixins.filterable.Filterable: + """ + results = self.filter(**kwargs) + + if len(results) > 1: + raise exceptions.UnexpectedNumberOfResultsException(f"More than one result found for filters {kwargs}.") + + if len(results) == 0: + raise exceptions.UnexpectedNumberOfResultsException(f"No results found for filters {kwargs}.") + + if isinstance(self, UserDict): + return results.popitem() + + return results.pop() + def _filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables to which the given filter criteria apply. diff --git a/tests/resources/test_dataset.py b/tests/resources/test_dataset.py index 9b4c8ffa8..695e43941 100644 --- a/tests/resources/test_dataset.py +++ b/tests/resources/test_dataset.py @@ -191,7 +191,7 @@ def test_get_file_by_label(self): with self.assertRaises(exceptions.UnexpectedNumberOfResultsException) as e: resource.get_file_by_label("billyjeanisnotmylover") - self.assertIn("No files found with label", e.exception.args[0]) + self.assertIn("No results found for filters {'labels__contains': 'billyjeanisnotmylover'}", e.exception.args[0]) def test_filter_by_sequence_not_none(self): """Ensures that filter works with sequence lookups""" diff --git a/tests/resources/test_filter_containers.py b/tests/resources/test_filter_containers.py index cd94cda25..6c8f2a3db 100644 --- a/tests/resources/test_filter_containers.py +++ b/tests/resources/test_filter_containers.py @@ -50,6 +50,25 @@ def test_filtering_with_multiple_filters(self): filterables = {FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)} self.assertEqual(FilterSet(filterables).filter(a__equals=3, b__gt=80), {FilterableThing(a=3, b=99)}) + def test_one_fails_if_no_results(self): + """Test that the `one` method raises an error if there are no results.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + filterables.one(a__equals=10) + + def test_one_fails_if_more_than_one_result(self): + """Test that the `one` method raises an error if there is more than one result.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + filterables.one(a__equals=3) + + def test_one(self): + """Test that the `one` method works and returns one result.""" + filterables = FilterSet({FilterableThing(a=3, b=2), FilterableThing(a=3, b=99), FilterableThing(a=77)}) + self.assertEqual(filterables.one(a__equals=77), FilterableThing(a=77)) + def test_ordering_by_a_non_existent_attribute(self): """ Ensure an error is raised if ordering is attempted by a non-existent attribute. """ filter_set = FilterSet([FilterableThing(age=5), FilterableThing(age=4), FilterableThing(age=3)]) @@ -168,6 +187,20 @@ def test_filtering_with_multiple_filters(self): """Test that multiple filters can be specified in FilterDict.filter at once.""" self.assertEqual(self.ANIMALS.filter(size__equals="small", age__lt=5), {"cat": self.ANIMALS["cat"]}) + def test_one_fails_if_no_results(self): + """Test that the `one` method raises an error if there are no results.""" + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + self.ANIMALS.one(age__equals=10) + + def test_one_fails_if_more_than_one_result(self): + """Test that the `one` method raises an error if there is more than one result.""" + with self.assertRaises(exceptions.UnexpectedNumberOfResultsException): + self.ANIMALS.one(size__equals="small") + + def test_one(self): + """Test that the `one` method works and returns one result.""" + self.assertEqual(self.ANIMALS.one(age__equals=91), ("another_dog", self.ANIMALS["another_dog"])) + def test_ordering_by_a_non_existent_attribute(self): """Ensure an error is raised if ordering is attempted by a non-existent attribute.""" with self.assertRaises(exceptions.InvalidInputException): From e160ed7ead387a341da07c1f232b0990197e8844 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 14:52:47 +0100 Subject: [PATCH 088/103] TST: Update tests --- tests/resources/test_datafile.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 681c3e268..95a5b150f 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -215,7 +215,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) - self.assertEqual(downloaded_datafile.tags, {"good": "True", "how_good": "very"}) + self.assertEqual(downloaded_datafile.tags, {"sdk_version": "0.1.18", "good": "True", "how_good": "very"}) self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -535,9 +535,17 @@ def test_metadata(self): self.assertEqual( datafile.metadata().keys(), - {"octue__id", "octue__timestamp", "octue__cluster", "octue__sequence", "octue__labels"}, + { + "octue__id", + "octue__timestamp", + "octue__cluster", + "octue__sequence", + "octue__labels", + "octue__sdk_version", + }, ) self.assertEqual( - datafile.metadata(use_octue_namespace=False).keys(), {"id", "timestamp", "cluster", "sequence", "labels"} + datafile.metadata(use_octue_namespace=False).keys(), + {"id", "timestamp", "cluster", "sequence", "labels", "sdk_version"}, ) From 6ad724ea526d57bf704126696f2884d8333047d2 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 14:59:57 +0100 Subject: [PATCH 089/103] REF: Move filter and order methods into FilterContainer --- octue/resources/filter_containers.py | 93 ++++++++++------------------ 1 file changed, 32 insertions(+), 61 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index 296983fe9..bdeea7dc4 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -8,14 +8,31 @@ class FilterContainer(ABC): def filter(self, ignore_items_without_attribute=True, **kwargs): - """Return a new instance of the container containing only the `Filterable`s to which the given filter criteria - are `True`. + """Return a new instance containing only the `Filterable`s to which the given filter criteria are `True`. :param bool ignore_items_without_attribute: - :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the - values to filter for + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for :return octue.resources.filter_containers.FilterContainer: """ + if any(not isinstance(item, Filterable) for item in self): + raise TypeError(f"All items in a {type(self).__name__} must be of type {Filterable.__name__}.") + + raise_error_if_filter_is_invalid = not ignore_items_without_attribute + + if len(kwargs) == 1: + return type(self)( + ( + item + for item in self + if item.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) + ) + ) + + filter_names = list(kwargs) + + for filter_name in filter_names: + filter_value = kwargs.pop(filter_name) + return self.filter(raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) def order_by(self, attribute_name, reverse=False): """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new @@ -23,10 +40,15 @@ def order_by(self, attribute_name, reverse=False): :param str attribute_name: :param bool reverse: - :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the - container's members + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the container's members :return FilterList: """ + try: + return FilterList(sorted(self, key=lambda item: getattr(item, attribute_name), reverse=reverse)) + except AttributeError: + raise exceptions.InvalidInputException( + f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." + ) def one(self, **kwargs): """If a single result exists for the given filters, return it. Otherwise, raise an error. @@ -49,61 +71,12 @@ def one(self, **kwargs): return results.pop() -def _filter(self, ignore_items_without_attribute=True, **kwargs): - """Return a new instance containing only the Filterables to which the given filter criteria apply. - - :param bool ignore_items_without_attribute: - :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the - values to filter for - :return octue.resources.filter_containers.FilterSet: - """ - if any(not isinstance(item, Filterable) for item in self): - raise TypeError(f"All items in a {type(self).__name__} must be of type {Filterable.__name__}.") - - raise_error_if_filter_is_invalid = not ignore_items_without_attribute - - if len(kwargs) == 1: - return type(self)( - ( - item - for item in self - if item.satisfies(raise_error_if_filter_is_invalid=raise_error_if_filter_is_invalid, **kwargs) - ) - ) - - filter_names = list(kwargs) - - for filter_name in filter_names: - filter_value = kwargs.pop(filter_name) - return _filter(self, raise_error_if_filter_is_invalid, **{filter_name: filter_value}).filter(**kwargs) - - -def _order_by(self, attribute_name, reverse=False): - """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new - `FilterList` regardless of the type of filter container begun with. - - :param str attribute_name: - :param bool reverse: - :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the - container's members - :return FilterList: - """ - try: - return FilterList(sorted(self, key=lambda item: getattr(item, attribute_name), reverse=reverse)) - except AttributeError: - raise exceptions.InvalidInputException( - f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." - ) - - class FilterSet(FilterContainer, set): - filter = _filter - order_by = _order_by + pass class FilterList(FilterContainer, list): - filter = _filter - order_by = _order_by + pass class FilterDict(FilterContainer, UserDict): @@ -112,8 +85,7 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): satisfied. :param bool ignore_items_without_attribute: - :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the - values to filter for + :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for :return FilterDict: """ if any(not isinstance(item, Filterable) for item in self.values()): @@ -141,8 +113,7 @@ def order_by(self, attribute_name, reverse=False): :param str attribute_name: a dot-separated (optionally nested) attribute name e.g. "a", "a.b", "a.b.c" :param bool reverse: - :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the - FilterDict's values + :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the FilterDict's values :return FilterList: """ try: From b7b920e3ea7d042ff3e6bf199df730be41317449 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 16:51:26 +0100 Subject: [PATCH 090/103] IMP: JSON-encode cloud storage custom metadata --- octue/cloud/storage/client.py | 88 +++++++++++++++++++------------- octue/mixins/serialisable.py | 5 +- octue/resources/datafile.py | 10 +--- octue/utils/decoders.py | 33 +++++------- octue/utils/encoders.py | 2 +- tests/resources/test_datafile.py | 4 +- 6 files changed, 72 insertions(+), 70 deletions(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index 1d93b33fc..e7fbf9227 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -1,4 +1,5 @@ import base64 +import json import logging from google.cloud import storage from google.cloud.storage.constants import _DEFAULT_TIMEOUT @@ -6,6 +7,8 @@ from octue.cloud.credentials import GCPCredentialsManager from octue.cloud.storage.path import split_bucket_name_from_gs_path +from octue.utils.decoders import OctueJSONDecoder +from octue.utils.encoders import OctueJSONEncoder logger = logging.getLogger(__name__) @@ -97,6 +100,40 @@ def upload_from_string( self._update_metadata(blob, metadata) logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) + def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): + """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or + `cloud_path` must be provided. + + :param str|None cloud_path: + :param str|None bucket_name: + :param str|None path_in_bucket: + :param float timeout: + :return dict: + """ + if cloud_path: + bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) + + bucket = self.client.get_bucket(bucket_or_name=bucket_name) + blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) + + if blob is None: + return None + + custom_metadata = blob.metadata or {} + + return { + "custom_metadata": {key: json.loads(value, cls=OctueJSONDecoder) for key, value in custom_metadata.items()}, + "crc32c": blob.crc32c, + "size": blob.size, + "updated": blob.updated, + "time_created": blob.time_created, + "time_deleted": blob.time_deleted, + "custom_time": blob.custom_time, + "project_name": self.project_name, + "bucket_name": bucket_name, + "path_in_bucket": path_in_bucket, + } + def update_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. @@ -142,38 +179,6 @@ def download_as_string(self, cloud_path=None, bucket_name=None, path_in_bucket=N logger.info("Downloaded %r from Google Cloud to as string.", blob.public_url) return data.decode() - def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): - """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or - `cloud_path` must be provided. - - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: - :return dict: - """ - if cloud_path: - bucket_name, path_in_bucket = split_bucket_name_from_gs_path(cloud_path) - - bucket = self.client.get_bucket(bucket_or_name=bucket_name) - blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout) - - if blob is None: - return None - - return { - "custom_metadata": blob.metadata or {}, - "crc32c": blob.crc32c, - "size": blob.size, - "updated": blob.updated, - "time_created": blob.time_created, - "time_deleted": blob.time_deleted, - "custom_time": blob.custom_time, - "project_name": self.project_name, - "bucket_name": bucket_name, - "path_in_bucket": path_in_bucket, - } - def delete(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. @@ -250,6 +255,19 @@ def _update_metadata(self, blob, metadata): :param dict metadata: :return None: """ - if metadata is not None: - blob.metadata = metadata - blob.patch() + if not metadata: + return None + + blob.metadata = self._encode_metadata(metadata) + blob.patch() + + def _encode_metadata(self, metadata): + """Encode metadata as a dictionary of JSON strings. + + :param dict metadata: + :return dict: + """ + if not isinstance(metadata, dict): + raise TypeError(f"Metadata for Google Cloud storage should be a dictionary; received {metadata!r}") + + return {key: json.dumps(value, cls=OctueJSONEncoder) for key, value in metadata.items()} diff --git a/octue/mixins/serialisable.py b/octue/mixins/serialisable.py index a6987cae9..85865077d 100644 --- a/octue/mixins/serialisable.py +++ b/octue/mixins/serialisable.py @@ -1,5 +1,6 @@ import json +from octue.utils.decoders import OctueJSONDecoder from octue.utils.encoders import OctueJSONEncoder @@ -27,7 +28,7 @@ def deserialise(cls, serialised_object, from_string=False): :return any: """ if from_string: - serialised_object = json.loads(serialised_object) + serialised_object = json.loads(serialised_object, cls=OctueJSONDecoder) return cls(**serialised_object) @@ -94,4 +95,4 @@ def __init__(self): if to_string: return string - return json.loads(string) + return json.loads(string, cls=OctueJSONDecoder) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 19423947b..a66740efb 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -207,14 +207,6 @@ def from_cloud( if not allow_overwrite: cls._check_for_attribute_conflict(custom_metadata, **kwargs) - timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) - - if isinstance(timestamp, str): - try: - timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f") - except ValueError: - timestamp = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f%z") - datafile.tags = ( kwargs.pop("tags", None) or TagDict( @@ -227,8 +219,8 @@ def from_cloud( ) datafile._set_id(kwargs.pop("id", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__id", ID_DEFAULT))) - datafile.timestamp = timestamp datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) + datafile.timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) datafile.cluster = kwargs.pop( "cluster", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__cluster", CLUSTER_DEFAULT) diff --git a/octue/utils/decoders.py b/octue/utils/decoders.py index 70d2db914..42e19bf30 100644 --- a/octue/utils/decoders.py +++ b/octue/utils/decoders.py @@ -1,28 +1,19 @@ +import json from json import JSONDecoder +import dateutil.parser -from octue.resources import Datafile, Dataset, Manifest +class OctueJSONDecoder(JSONDecoder): + """A JSON Decoder to convert default json objects into their Datafile, Dataset or Manifest classes as appropriate""" -def default_object_hook(obj): - """A hook to convert default json objects into their Datafile, Dataset or Manifest class as appropriate""" - - # object hooks are called whenever a json object is created. When nested, this is done from innermost (deepest - # nesting) out so it's safe to work at multiple levels here - - if "files" in obj: - files = [Datafile(**df) for df in obj.pop("files")] - return {**obj, "files": files} - - if "datasets" in obj: - datasets = [Dataset(**ds) for ds in obj.pop("datasets")] - return Manifest(**obj, datasets=datasets) - - return obj + def __init__(self, *args, object_hook=None, **kwargs): + json.JSONDecoder.__init__(self, object_hook=object_hook or self.object_hook, *args, **kwargs) + def object_hook(self, obj): + if "_type" not in obj: + return obj -class OctueJSONDecoder(JSONDecoder): - """A JSON Decoder to convert default json objects into their Datafile, Dataset or Manifest classes as appropriate""" + if obj["_type"] == "datetime": + return dateutil.parser.parse(obj["value"]) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.object_hook = self.object_hook or default_object_hook + return obj diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index 59604bf27..899108543 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -13,7 +13,7 @@ def default(self, obj): return obj.serialise() if isinstance(obj, datetime.datetime): - return obj.isoformat() + return {"_type": "datetime", "value": obj.isoformat()} # Otherwise let the base class default method raise the TypeError return TwinedEncoder.default(self, obj) diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 83c056f79..384153953 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -215,7 +215,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) - self.assertEqual(downloaded_datafile.tags, {"sdk_version": "0.1.18", "good": "True", "how_good": "very"}) + self.assertEqual(downloaded_datafile.tags, {"sdk_version": "0.1.18", "good": True, "how_good": "very"}) self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -247,7 +247,7 @@ def test_each_tag_is_stored_as_custom_metadata_entry_in_cloud(self): ) datafile.get_cloud_metadata() - self.assertEqual(datafile._cloud_metadata["custom_metadata"]["octue__good"], "True") + self.assertEqual(datafile._cloud_metadata["custom_metadata"]["octue__good"], True) self.assertEqual(datafile._cloud_metadata["custom_metadata"]["octue__how_good"], "very") def test_from_cloud_with_overwrite_when_disallowed_results_in_error(self): From 18912eaf43f5e1e38c1f2e9f00b159edb009e9d5 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 17:03:45 +0100 Subject: [PATCH 091/103] REV: Store tags in tags field of cloud metadata again --- octue/resources/datafile.py | 21 +++------------------ tests/resources/test_datafile.py | 16 +++------------- 2 files changed, 6 insertions(+), 31 deletions(-) diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index a66740efb..718547f5b 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -12,7 +12,6 @@ from octue.exceptions import AttributeConflict, CloudLocationNotSpecified, FileNotFoundException, InvalidInputException from octue.mixins import Filterable, Hashable, Identifiable, Labelable, Loggable, Pathable, Serialisable, Taggable from octue.mixins.hashable import EMPTY_STRING_HASH_VALUE -from octue.resources.tag import TagDict from octue.utils import isfile from octue.utils.time import convert_from_posix_time, convert_to_posix_time @@ -29,10 +28,6 @@ TAGS_DEFAULT = None LABELS_DEFAULT = None -NON_TAG_METADATA = { - f"{OCTUE_METADATA_NAMESPACE}__{name}" for name in ("id", "timestamp", "cluster", "sequence", "labels") -} - class Datafile(Labelable, Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashable, Filterable): """Class for representing data files on the Octue system. @@ -207,20 +202,10 @@ def from_cloud( if not allow_overwrite: cls._check_for_attribute_conflict(custom_metadata, **kwargs) - datafile.tags = ( - kwargs.pop("tags", None) - or TagDict( - { - tag_name.replace(f"{OCTUE_METADATA_NAMESPACE}__", ""): custom_metadata[tag_name] - for tag_name in custom_metadata.keys() - NON_TAG_METADATA - } - ) - or TAGS_DEFAULT - ) - datafile._set_id(kwargs.pop("id", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__id", ID_DEFAULT))) datafile.immutable_hash_value = datafile._cloud_metadata.get("crc32c", EMPTY_STRING_HASH_VALUE) datafile.timestamp = kwargs.get("timestamp", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__timestamp")) + datafile.tags = kwargs.pop("tags", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__tags", TAGS_DEFAULT)) datafile.cluster = kwargs.pop( "cluster", custom_metadata.get(f"{OCTUE_METADATA_NAMESPACE}__cluster", CLUSTER_DEFAULT) @@ -545,8 +530,8 @@ def metadata(self, use_octue_namespace=True): "timestamp": self.timestamp, "cluster": self.cluster, "sequence": self.sequence, - "labels": self.labels.serialise(to_string=True), - **self.tags, + "labels": self.labels, + "tags": self.tags, "sdk_version": pkg_resources.get_distribution("octue").version, } diff --git a/tests/resources/test_datafile.py b/tests/resources/test_datafile.py index 384153953..8ea8d135a 100644 --- a/tests/resources/test_datafile.py +++ b/tests/resources/test_datafile.py @@ -215,7 +215,7 @@ def test_from_cloud_with_datafile(self): self.assertEqual(downloaded_datafile.hash_value, datafile.hash_value) self.assertEqual(downloaded_datafile.cluster, datafile.cluster) self.assertEqual(downloaded_datafile.sequence, datafile.sequence) - self.assertEqual(downloaded_datafile.tags, {"sdk_version": "0.1.18", "good": True, "how_good": "very"}) + self.assertEqual(downloaded_datafile.tags, datafile.tags) self.assertEqual(downloaded_datafile.labels, datafile.labels) self.assertEqual(downloaded_datafile.size_bytes, datafile.size_bytes) self.assertTrue(isinstance(downloaded_datafile._last_modified, float)) @@ -239,17 +239,6 @@ def test_from_cloud_with_overwrite(self): self.assertEqual(downloaded_datafile.id, new_id) self.assertNotEqual(datafile.id, downloaded_datafile.id) - def test_each_tag_is_stored_as_custom_metadata_entry_in_cloud(self): - """Test that each tag on a datafile is stored as a separate piece of custom metadata on the Google Cloud - Storage file.""" - datafile, project_name, bucket_name, path_in_bucket, _ = self.create_datafile_in_cloud( - tags={"good": True, "how_good": "very"}, - ) - - datafile.get_cloud_metadata() - self.assertEqual(datafile._cloud_metadata["custom_metadata"]["octue__good"], True) - self.assertEqual(datafile._cloud_metadata["custom_metadata"]["octue__how_good"], "very") - def test_from_cloud_with_overwrite_when_disallowed_results_in_error(self): """Test that attempting to overwrite the attributes of a datafile instantiated from the cloud when not allowed results in an error. @@ -555,6 +544,7 @@ def test_metadata(self): "octue__timestamp", "octue__cluster", "octue__sequence", + "octue__tags", "octue__labels", "octue__sdk_version", }, @@ -562,5 +552,5 @@ def test_metadata(self): self.assertEqual( datafile.metadata(use_octue_namespace=False).keys(), - {"id", "timestamp", "cluster", "sequence", "labels", "sdk_version"}, + {"id", "timestamp", "cluster", "sequence", "tags", "labels", "sdk_version"}, ) From 97c5aab0f9074f5427c6356a17dc759cbae00822 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 18:31:36 +0100 Subject: [PATCH 092/103] REF: Rename GoogleCloudStorageClient methods; update docstrings --- octue/cloud/storage/client.py | 96 +++++++++++++++++------------------ octue/resources/datafile.py | 2 +- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/octue/cloud/storage/client.py b/octue/cloud/storage/client.py index e7fbf9227..5ee6377ff 100644 --- a/octue/cloud/storage/client.py +++ b/octue/cloud/storage/client.py @@ -62,12 +62,12 @@ def upload_file( """Upload a local file to a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str local_path: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param dict metadata: - :param float timeout: + :param str local_path: path to local file + :param str|None cloud_path: full cloud path to upload file to (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket to store file in + :param str|None path_in_bucket: path to upload file to (e.g. `path/to/file.csv`) + :param dict metadata: key-value pairs to associate with the cloud file as metadata + :param float timeout: time in seconds to allow for the upload to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -76,7 +76,7 @@ def upload_file( blob.crc32c = self._compute_crc32c_checksum(f.read()) blob.upload_from_filename(filename=local_path, timeout=timeout) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) logger.info("Uploaded %r to Google Cloud at %r.", local_path, blob.public_url) def upload_from_string( @@ -85,29 +85,29 @@ def upload_from_string( """Upload serialised data in string form to a file in a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str string: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param dict metadata: - :param float timeout: + :param str string: string to upload as file + :param str|None cloud_path: full cloud path to upload as file to (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket to store as file in + :param str|None path_in_bucket: path to upload as file to (e.g. `path/to/file.csv`) + :param dict metadata: key-value pairs to associate with the cloud file as metadata + :param float timeout: time in seconds to allow for the upload to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) blob.crc32c = self._compute_crc32c_checksum(string) blob.upload_from_string(data=string, timeout=timeout) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) logger.info("Uploaded data to Google Cloud at %r.", blob.public_url) def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT): """Get the metadata of the given file in the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str|None cloud_path: full cloud path to file (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket where cloud file is located + :param str|None path_in_bucket: path to cloud file (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the request to complete :return dict: """ if cloud_path: @@ -134,18 +134,18 @@ def get_metadata(self, cloud_path=None, bucket_name=None, path_in_bucket=None, t "path_in_bucket": path_in_bucket, } - def update_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): - """Update the metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must - be provided. + def overwrite_custom_metadata(self, metadata, cloud_path=None, bucket_name=None, path_in_bucket=None): + """Overwrite the custom metadata for the given cloud file. Either (`bucket_name` and `path_in_bucket`) or + `cloud_path` must be provided. - :param dict metadata: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: + :param dict metadata: key-value pairs to set as the new custom metadata + :param str|None cloud_path: full cloud path to file (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket where cloud file is located + :param str|None path_in_bucket: path to cloud file (e.g. `path/to/file.csv`) :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) - self._update_metadata(blob, metadata) + self._overwrite_blob_custom_metadata(blob, metadata) def download_to_file( self, local_path, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout=_DEFAULT_TIMEOUT @@ -153,11 +153,11 @@ def download_to_file( """Download a file to a file from a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str local_path: - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str local_path: path to download to + :param str|None cloud_path: full cloud path to download from (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to download from (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the download to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -168,10 +168,10 @@ def download_as_string(self, cloud_path=None, bucket_name=None, path_in_bucket=N """Download a file to a string from a Google Cloud bucket at gs:///. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str|None cloud_path: full cloud path to download from (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to download from (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the download to complete :return str: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -183,10 +183,10 @@ def delete(self, cloud_path=None, bucket_name=None, path_in_bucket=None, timeout """Delete the given file from the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None path_in_bucket: - :param float timeout: + :param str|None cloud_path: full cloud path to file to delete (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud file is stored in + :param str|None path_in_bucket: path to file to delete (e.g. `path/to/file.csv`) + :param float timeout: time in seconds to allow for the request to complete :return None: """ blob = self._blob(cloud_path, bucket_name, path_in_bucket) @@ -197,11 +197,11 @@ def scandir(self, cloud_path=None, bucket_name=None, directory_path=None, filter """Yield the blobs belonging to the given "directory" in the given bucket. Either (`bucket_name` and `path_in_bucket`) or `cloud_path` must be provided. - :param str|None cloud_path: - :param str|None bucket_name: - :param str|None directory_path: - :param callable filter: - :param float timeout: + :param str|None cloud_path: full cloud path of directory to scan (e.g. `gs://bucket_name/path/to/file.csv`) + :param str|None bucket_name: name of bucket cloud directory is located in + :param str|None directory_path: path of cloud directory to scan (e.g. `path/to/file.csv`) + :param callable filter: blob filter to constrain the yielded results + :param float timeout: time in seconds to allow for the request to complete :yield google.cloud.storage.blob.Blob: """ if cloud_path: @@ -248,11 +248,11 @@ def _compute_crc32c_checksum(self, string): checksum = Checksum(string.encode()) return base64.b64encode(checksum.digest()).decode("utf-8") - def _update_metadata(self, blob, metadata): - """Update the metadata for the given blob. Note that this is synced up with Google Cloud. + def _overwrite_blob_custom_metadata(self, blob, metadata): + """Overwrite the custom metadata for the given blob. Note that this is synced up with Google Cloud. - :param google.cloud.storage.blob.Blob blob: - :param dict metadata: + :param google.cloud.storage.blob.Blob blob: Google Cloud Storage blob to update + :param dict metadata: key-value pairs of metadata to overwrite the blob's metadata with :return None: """ if not metadata: diff --git a/octue/resources/datafile.py b/octue/resources/datafile.py index 718547f5b..358c8c13f 100644 --- a/octue/resources/datafile.py +++ b/octue/resources/datafile.py @@ -308,7 +308,7 @@ def update_cloud_metadata(self, project_name=None, cloud_path=None, bucket_name= project_name, cloud_path, bucket_name, path_in_bucket ) - GoogleCloudStorageClient(project_name=project_name).update_metadata( + GoogleCloudStorageClient(project_name=project_name).overwrite_custom_metadata( metadata=self.metadata(), bucket_name=bucket_name, path_in_bucket=path_in_bucket, From d1f26c9841f4fe558d1874ceecd343cba8937520 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 20:04:43 +0100 Subject: [PATCH 093/103] DOC: Update filter container docstrings --- octue/resources/filter_containers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index bdeea7dc4..f99611770 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -10,7 +10,7 @@ class FilterContainer(ABC): def filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the `Filterable`s to which the given filter criteria are `True`. - :param bool ignore_items_without_attribute: + :param bool ignore_items_without_attribute: if True, just ignore any members of the container without a filtered-for attribute rather than raising an error :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for :return octue.resources.filter_containers.FilterContainer: """ @@ -38,8 +38,8 @@ def order_by(self, attribute_name, reverse=False): """Order the `Filterable`s in the container by an attribute with the given name, returning them as a new `FilterList` regardless of the type of filter container begun with. - :param str attribute_name: - :param bool reverse: + :param str attribute_name: name of attribute (optionally nested) to order by e.g. "a", "a.b", "a.b.c" + :param bool reverse: if True, reverse the ordering :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the container's members :return FilterList: """ @@ -54,7 +54,7 @@ def one(self, **kwargs): """If a single result exists for the given filters, return it. Otherwise, raise an error. :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for - :raise UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters + :raise octue.exceptions.UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters :return octue.resources.mixins.filterable.Filterable: """ results = self.filter(**kwargs) @@ -84,7 +84,7 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): """Return a new instance containing only the Filterables for which the given filter criteria apply are satisfied. - :param bool ignore_items_without_attribute: + :param bool ignore_items_without_attribute: if True, just ignore any members of the container without a filtered-for attribute rather than raising an error :param {str: any} kwargs: keyword arguments whose keys are the name of the filter and whose values are the values to filter for :return FilterDict: """ @@ -111,8 +111,8 @@ def filter(self, ignore_items_without_attribute=True, **kwargs): def order_by(self, attribute_name, reverse=False): """Order the instance by the given attribute_name, returning the instance's elements as a new FilterList. - :param str attribute_name: a dot-separated (optionally nested) attribute name e.g. "a", "a.b", "a.b.c" - :param bool reverse: + :param str attribute_name: name of attribute (optionally nested) to order by e.g. "a", "a.b", "a.b.c" + :param bool reverse: if True, reverse the ordering :raise octue.exceptions.InvalidInputException: if an attribute with the given name doesn't exist on any of the FilterDict's values :return FilterList: """ From af3a2ac44ffbc19ded698a7045e0486fa20c3266 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 20:06:49 +0100 Subject: [PATCH 094/103] FIX: Allow ordering by nested attributes in other FilterContainers --- octue/resources/filter_containers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/octue/resources/filter_containers.py b/octue/resources/filter_containers.py index f99611770..194c020da 100644 --- a/octue/resources/filter_containers.py +++ b/octue/resources/filter_containers.py @@ -44,7 +44,10 @@ def order_by(self, attribute_name, reverse=False): :return FilterList: """ try: - return FilterList(sorted(self, key=lambda item: getattr(item, attribute_name), reverse=reverse)) + return FilterList( + sorted(self, key=lambda item: get_nested_attribute(item, attribute_name), reverse=reverse) + ) + except AttributeError: raise exceptions.InvalidInputException( f"An attribute named {attribute_name!r} does not exist on one or more members of {self!r}." From cd90140b66857ef01d1a0aa143ac1cd199c23081 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 20:08:05 +0100 Subject: [PATCH 095/103] REF: Refactor Dataset.get_file_by_label --- octue/resources/dataset.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/octue/resources/dataset.py b/octue/resources/dataset.py index 682097600..cdf7d0583 100644 --- a/octue/resources/dataset.py +++ b/octue/resources/dataset.py @@ -220,12 +220,11 @@ def get_file_sequence(self, strict=True, **kwargs): return results - def get_file_by_label(self, label_string): - """Get a single datafile from a dataset by searching for files with the provided label(s). + def get_file_by_label(self, label): + """Get a single datafile from a dataset by filtering for files with the provided label. - Gets exclusively one file; if no file or more than one file is found this results in an error. - - :param str label_string: + :param str label: the label to filter for + :raise octue.exceptions.UnexpectedNumberOfResultsException: if zero or more than one results satisfy the filters :return octue.resources.datafile.DataFile: """ - return self.files.one(labels__contains=label_string) + return self.files.one(labels__contains=label) From 87e0f2b6377bbe11059c1b8dc97a3640adeda15d Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 20:17:12 +0100 Subject: [PATCH 096/103] IMP: Allow UserStrings to be JSON-encoded by default --- octue/resources/label.py | 6 +----- octue/utils/encoders.py | 6 +++++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index 782d240b7..fa0587526 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -14,16 +14,12 @@ class Label(UserString): """A label starts and ends with a character in [A-Za-z0-9] and can contain hyphens e.g. angry-marmaduke - :param str name: + :param str name: the text of the label :return None: """ def __init__(self, name): super().__init__(self._clean(name)) - self.name = self.data - - def serialise(self): - return self.name @staticmethod def _clean(name): diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index 899108543..c7e562b2d 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -1,10 +1,11 @@ import datetime +from collections import UserString from twined.utils import TwinedEncoder class OctueJSONEncoder(TwinedEncoder): - """A JSON Encoder which allows objects having a `serialise()` method to control their own conversion to primitives""" + """A JSON Encoder which allows objects having a `serialise()` method to control their own conversion to primitives.""" def default(self, obj): @@ -12,6 +13,9 @@ def default(self, obj): if hasattr(obj, "serialise"): return obj.serialise() + if isinstance(obj, UserString): + return str(obj) + if isinstance(obj, datetime.datetime): return {"_type": "datetime", "value": obj.isoformat()} From 52355b2e7bec17cd6f4b931ba4d36c5720292820 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 21:14:44 +0100 Subject: [PATCH 097/103] IMP: Add set serialisation to en/decoders --- octue/utils/decoders.py | 3 +++ octue/utils/encoders.py | 4 ++++ tests/resources/test_label.py | 19 +++++++++++++++---- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/octue/utils/decoders.py b/octue/utils/decoders.py index 42e19bf30..35ecfc9ec 100644 --- a/octue/utils/decoders.py +++ b/octue/utils/decoders.py @@ -13,6 +13,9 @@ def object_hook(self, obj): if "_type" not in obj: return obj + if obj["_type"] == "set": + return set(obj["items"]) + if obj["_type"] == "datetime": return dateutil.parser.parse(obj["value"]) diff --git a/octue/utils/encoders.py b/octue/utils/encoders.py index c7e562b2d..f263839ce 100644 --- a/octue/utils/encoders.py +++ b/octue/utils/encoders.py @@ -13,6 +13,10 @@ def default(self, obj): if hasattr(obj, "serialise"): return obj.serialise() + # Serialise sets as sorted list (JSON doesn't support sets). + if isinstance(obj, set): + return {"_type": "set", "items": sorted(obj)} + if isinstance(obj, UserString): return str(obj) diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index ab03bed7e..544e3c72b 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -1,6 +1,10 @@ +import json + from octue import exceptions from octue.resources.filter_containers import FilterSet from octue.resources.label import Label, LabelSet +from octue.utils.decoders import OctueJSONDecoder +from octue.utils.encoders import OctueJSONEncoder from tests.base import BaseTestCase @@ -102,6 +106,7 @@ def test_instantiation_from_label_set(self): def test_equality(self): """ Ensure two LabelSets with the same labels compare equal. """ self.assertTrue(self.LABEL_SET == LabelSet(labels="a b-c d-e-f")) + self.assertTrue(self.LABEL_SET == {"a", "b-c", "d-e-f"}) def test_inequality(self): """ Ensure two LabelSets with different labels compare unequal. """ @@ -152,17 +157,23 @@ def test_any_label_ends_swith(self): def test_serialise(self): """Ensure that LabelSets serialise to a list.""" - self.assertEqual(self.LABEL_SET.serialise(), ["a", "b-c", "d-e-f"]) + self.assertEqual( + json.dumps(self.LABEL_SET, cls=OctueJSONEncoder), + json.dumps({"_type": "set", "items": ["a", "b-c", "d-e-f"]}), + ) def test_serialise_orders_labels(self): """Ensure that serialising a LabelSet results in a sorted list.""" label_set = LabelSet("z hello a c-no") - self.assertEqual(label_set.serialise(), ["a", "c-no", "hello", "z"]) + self.assertEqual( + json.dumps(label_set, cls=OctueJSONEncoder), + json.dumps({"_type": "set", "items": ["a", "c-no", "hello", "z"]}), + ) def test_deserialise(self): """Test that serialisation is reversible.""" - serialised_label_set = self.LABEL_SET.serialise() - deserialised_label_set = LabelSet.deserialise(serialised_label_set) + serialised_label_set = json.dumps(self.LABEL_SET, cls=OctueJSONEncoder) + deserialised_label_set = LabelSet(json.loads(serialised_label_set, cls=OctueJSONDecoder)) self.assertEqual(deserialised_label_set, self.LABEL_SET) def test_repr(self): From 1793d1e9076ee4ca26e4c3651a097ea811ed3fd8 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 21:17:13 +0100 Subject: [PATCH 098/103] REF: Remove unnecessary methods from LabelSet --- octue/resources/label.py | 41 ---------------------------------------- 1 file changed, 41 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index fa0587526..1dfc01dd8 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -1,11 +1,9 @@ import json import re from collections import UserString -from collections.abc import Iterable from octue.exceptions import InvalidLabelException from octue.resources.filter_containers import FilterSet -from octue.utils.encoders import OctueJSONEncoder LABEL_PATTERN = re.compile(r"^[a-z0-9][a-z0-9-]*(? Date: Wed, 2 Jun 2021 21:45:22 +0100 Subject: [PATCH 099/103] DOC: Document label module --- octue/resources/label.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/octue/resources/label.py b/octue/resources/label.py index 1dfc01dd8..9f99820f7 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -21,7 +21,11 @@ def __init__(self, name): @staticmethod def _clean(name): - """Ensure the label name is a string and conforms to the label regex pattern.""" + """Clean the label name, making sure it is a string and conforms to the label regex pattern. + + :param str name: name of the label to check and clean + :return str: + """ if not isinstance(name, str): raise InvalidLabelException("Labels must be expressed as a string.") @@ -37,7 +41,11 @@ def _clean(name): class LabelSet(set): - """Class to handle a set of labels as a string.""" + """Class to handle a set of labels. + + :param iter(str|Label)|LabelSet labels: labels to add to the set + :return None: + """ def __init__(self, labels=None): # TODO Call the superclass with *args and **kwargs, then update everything to using ResourceBase @@ -65,17 +73,33 @@ def __init__(self, labels=None): super().__init__(labels) def add_labels(self, *args): - """Adds one or more new label strings to the object labels. New labels will be cleaned and validated.""" + """Add one or more new label strings to the object labels. New labels will be cleaned and validated. + + :param str *labels: a variable number of string labels + :return None: + """ self.update({Label(arg) for arg in args}) def any_label_starts_with(self, value): - """ Implement a startswith method that returns true if any of the labels starts with value """ + """Return `True` if any of the labels starts with the value. + + :param str value: value to check + :return bool: + """ return any(label.startswith(value) for label in self) def any_label_ends_with(self, value): - """ Implement an endswith method that returns true if any of the labels endswith value. """ + """Return `True` if any of the labels ends with the value. + + :param str value: value to check + :return bool: + """ return any(label.endswith(value) for label in self) def any_label_contains(self, value): - """ Return True if any of the labels contains value. """ + """Return `True` if any of the labels contains the value. + + :param str value: value to check + :return bool: + """ return any(value in label for label in self) From 8c5bdfa1e3346a8dce18f257a22c4867decf4681 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 21:46:20 +0100 Subject: [PATCH 100/103] REF: Remove method from TagDict; document methods --- octue/resources/tag.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index f342f4e4f..27470874c 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,31 +1,29 @@ -import json import re from collections import UserDict from octue.exceptions import InvalidTagException from octue.mixins import Serialisable -from octue.utils.encoders import OctueJSONEncoder TAG_NAME_PATTERN = re.compile(r"^[a-z0-9][a-z0-9_]*(? Date: Wed, 2 Jun 2021 22:05:39 +0100 Subject: [PATCH 101/103] FIX: Restore required method --- octue/resources/tag.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/octue/resources/tag.py b/octue/resources/tag.py index 27470874c..d8ce473d3 100644 --- a/octue/resources/tag.py +++ b/octue/resources/tag.py @@ -1,8 +1,10 @@ +import json import re from collections import UserDict from octue.exceptions import InvalidTagException from octue.mixins import Serialisable +from octue.utils.encoders import OctueJSONEncoder TAG_NAME_PATTERN = re.compile(r"^[a-z0-9][a-z0-9_]*(? Date: Wed, 2 Jun 2021 22:11:07 +0100 Subject: [PATCH 102/103] REF: Rename add_labels method and add `add` method to Label --- octue/mixins/labelable.py | 2 +- octue/resources/label.py | 12 ++++++++++-- tests/resources/test_label.py | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/octue/mixins/labelable.py b/octue/mixins/labelable.py index b50500dbd..54ee9dc0a 100644 --- a/octue/mixins/labelable.py +++ b/octue/mixins/labelable.py @@ -10,7 +10,7 @@ def __init__(self, *args, labels=None, **kwargs): def add_labels(self, *args): """Add one or more new labels to the object. New labels will be cleaned and validated.""" - self.labels.add_labels(*args) + self.labels.update(*args) @property def labels(self): diff --git a/octue/resources/label.py b/octue/resources/label.py index 9f99820f7..986562994 100644 --- a/octue/resources/label.py +++ b/octue/resources/label.py @@ -72,13 +72,21 @@ def __init__(self, labels=None): super().__init__(labels) - def add_labels(self, *args): + def add(self, label): + """Add a label string to the set. + + :param str label: the label to add + :return None: + """ + super().add(Label(label)) + + def update(self, *labels): """Add one or more new label strings to the object labels. New labels will be cleaned and validated. :param str *labels: a variable number of string labels :return None: """ - self.update({Label(arg) for arg in args}) + super().update({Label(label) for label in labels}) def any_label_starts_with(self, value): """Return `True` if any of the labels starts with the value. diff --git a/tests/resources/test_label.py b/tests/resources/test_label.py index 544e3c72b..5c32e14a1 100644 --- a/tests/resources/test_label.py +++ b/tests/resources/test_label.py @@ -139,6 +139,24 @@ def test_contains_only_matches_full_labels(self): for label in "b", "c", "d", "e", "f": self.assertFalse(label in self.LABEL_SET) + def test_add(self): + """Test that the add method adds a valid label but raises an error for an invalid label.""" + label_set = LabelSet({"a", "b"}) + label_set.add("c") + self.assertEqual(label_set, {"a", "b", "c"}) + + with self.assertRaises(exceptions.InvalidLabelException): + label_set.add("d_") + + def test_update(self): + """Test that the update method adds valid labels but raises an error for invalid labels.""" + label_set = LabelSet({"a", "b"}) + label_set.update("c", "d") + self.assertEqual(label_set, {"a", "b", "c", "d"}) + + with self.assertRaises(exceptions.InvalidLabelException): + label_set.update("e", "f_") + def test_any_label_starts_with(self): """ Ensure starts_with only checks the starts of labels, and doesn't check the starts of sublabels. """ for label in "a", "b", "d": From bd726c651f7e0981eba231df35b5781c40a7ee51 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Wed, 2 Jun 2021 22:35:32 +0100 Subject: [PATCH 103/103] DOC: Add new filters to filters documentation skip_ci_tests --- docs/source/filter_containers.rst | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/source/filter_containers.rst b/docs/source/filter_containers.rst index 59f988c6f..badcfa70e 100644 --- a/docs/source/filter_containers.rst +++ b/docs/source/filter_containers.rst @@ -85,6 +85,8 @@ These filters are currently available for the following types: * ``not_starts_with`` * ``ends_with`` * ``not_ends_with`` + * ``in_range`` + * ``not_in_range`` - ``NoneType``: @@ -106,6 +108,38 @@ These filters are currently available for the following types: * ``any_label_ends_with`` * ``not_any_label_ends_with`` +- ``datetime.datetime``: + * ``is`` + * ``is_not`` + * ``equals`` + * ``not_equals`` + * ``lt`` (less than) + * ``lte`` (less than or equal) + * ``gt`` (greater than) + * ``gte`` (greater than or equal) + * ``in_range`` + * ``not_in_range`` + * ``year_equals`` + * ``year_in`` + * ``month_equals`` + * ``month_in`` + * ``day_equals`` + * ``day_in`` + * ``weekday_equals`` + * ``weekday_in`` + * ``iso_weekday_equals`` + * ``iso_weekday_in`` + * ``time_equals`` + * ``time_in`` + * ``hour_equals`` + * ``hour_in`` + * ``minute_equals`` + * ``minute_in`` + * ``second_equals`` + * ``second_in`` + * ``in_date_range`` + * ``in_time_range`` + Additionally, these filters are defined for the following *interfaces* (duck-types). : @@ -119,6 +153,8 @@ Additionally, these filters are defined for the following *interfaces* (duck-typ * ``lte`` * ``gt`` * ``gte`` + * ``in_range`` + * ``not_in_range`` - Iterables: