From e5fa9c423cad59d44430cfccc15da77ce6d8bd82 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Wed, 14 Aug 2019 16:50:37 -0400 Subject: [PATCH 1/2] Fix inconsistent handling of bad criteria With the real client, if you'd passed a bad criteria object, you'd immediately get an exception. On the fake client, you'd rather get a future resolved with an exception. It's best to be consistent between the two clients, so let's adjust the fake client to also raise immediately. We can share the real client code to do this. --- CHANGELOG.md | 5 +++++ pubtools/pulplib/_impl/fake/client.py | 8 ++++++++ pubtools/pulplib/_impl/fake/match.py | 2 -- tests/fake/test_fake_search.py | 10 +++++++--- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d67bb59d..9447c59a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Introduced ``Repository.is_temporary`` attribute +### Fixed +- Fixed inconsistency between real and fake clients: both clients now immediately raise + if a search is attempted with invalid criteria. Previously, the fake client would + instead return a failed `Future`. + ## [1.2.1] - 2019-08-12 ### Fixed diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 34ab5755..d43943de 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -9,6 +9,7 @@ from more_executors.futures import f_return, f_return_error, f_flat_map from pubtools.pulplib import Page, PulpException, Criteria, Task +from pubtools.pulplib._impl.client.search import filters_for_criteria from .. import compat_attr as attr from .match import match_object @@ -39,6 +40,13 @@ def search_repository(self, criteria=None): criteria = criteria or Criteria.true() repos = [] + # Pass the criteria through the code used by the real client to build + # up the Pulp query. We don't actually *use* the resulting query since + # we're not accessing a real Pulp server. The point is to ensure the + # same validation and error behavior as used by the real client also + # applies to the fake. + filters_for_criteria(criteria) + try: for repo in self._repositories: if match_object(criteria, repo): diff --git a/pubtools/pulplib/_impl/fake/match.py b/pubtools/pulplib/_impl/fake/match.py index 4e5e331f..79b53395 100644 --- a/pubtools/pulplib/_impl/fake/match.py +++ b/pubtools/pulplib/_impl/fake/match.py @@ -33,8 +33,6 @@ def match_object(*args, **kwargs): if isinstance(dispatch, klass): return func(*args, **kwargs) - raise TypeError("Unsupported criteria/matcher: %s" % repr(dispatch)) - @visit(TrueCriteria) def match_true(*_): diff --git a/tests/fake/test_fake_search.py b/tests/fake/test_fake_search.py index d95e9788..3c3ef7f7 100644 --- a/tests/fake/test_fake_search.py +++ b/tests/fake/test_fake_search.py @@ -1,5 +1,7 @@ import datetime +import pytest + from pubtools.pulplib import FakeController, Repository, Criteria, Matcher @@ -169,9 +171,11 @@ def test_search_bad_criteria(): controller.insert_repository(repo1) client = controller.client - assert "Unsupported criteria" in str( - client.search_repository("not a valid criteria").exception() - ) + + with pytest.raises(Exception) as exc: + client.search_repository("not a valid criteria") + + assert "Not a criteria" in str(exc.value) def test_search_created_timestamp(): From 7df1023caea200e4452c07b3a25a6c8474df8404 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Wed, 14 Aug 2019 16:43:08 -0400 Subject: [PATCH 2/2] Allow searching on model fields rather than (only) raw Pulp fields Previously, it was necessary always to use Pulp field names in repo searches. This was awkward, as it means the caller needs to know and deal with both the Pulp and the model field names for the same piece of data. For example, to find Repository objects with eng_product_id=123, the caller would have to know that this field is stored as a string under "notes.eng_product", thus the model wouldn't fully encapsulate the Pulp implementation details. Improve this by making it possible to provide the model field names to searches. If the caller wants to find repositories with (for example) eng_product_id=123, they can now directly search with that field name and value. This should be preferred, as the caller no longer has to know exactly how a field has been stored within Pulp. However, since it's not usable under all circumstances, search on raw Pulp fields will remain supported. --- CHANGELOG.md | 4 +- docs/api/searching.rst | 29 +++++++++ pubtools/pulplib/_impl/client/client.py | 2 +- pubtools/pulplib/_impl/client/search.py | 45 +++++++++++++- pubtools/pulplib/_impl/compat_attr.py | 1 + pubtools/pulplib/_impl/criteria.py | 23 ++++++- pubtools/pulplib/_impl/fake/client.py | 4 +- pubtools/pulplib/_impl/fake/match.py | 35 +++++++++-- pubtools/pulplib/_impl/model/attr.py | 10 ++- .../pulplib/_impl/model/repository/base.py | 7 ++- tests/client/test_search_repo_conversions.py | 61 +++++++++++++++++++ tests/fake/test_fake_search.py | 60 ++++++++++++++++++ 12 files changed, 268 insertions(+), 13 deletions(-) create mode 100644 tests/client/test_search_repo_conversions.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9447c59a..662dc6a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Introduced ``Repository.is_temporary`` attribute +- Introduced ``Repository.is_temporary`` attribute. +- Extended search functionality; it is now possible to search using fields defined + on the `PulpObject` classes. Searching on raw Pulp fields remains supported. ### Fixed - Fixed inconsistency between real and fake clients: both clients now immediately raise diff --git a/docs/api/searching.rst b/docs/api/searching.rst index bb8f3bfa..9da8e01a 100644 --- a/docs/api/searching.rst +++ b/docs/api/searching.rst @@ -1,6 +1,35 @@ Searching ========= +.. _model_fields: + +Model vs Pulp fields +.................... + +.. versionadded:: 1.3.0 + +The :class:`~pubtools.pulplib.Criteria` and :class:`~pubtools.pulplib.Matcher` +classes are able to operate on two types of fields: + +- Model fields: fields documented on the :class:`~pubtools.pulplib.PulpObject` + class hierarchy. ``eng_product_id`` from the + :class:`~pubtools.pulplib.Repository` class is an example of a model field. + +- Pulp fields: any arbitrary fields within the Pulp 2.x database. + ``notes.eng_product`` is an example of a Pulp field. + +Generally, searching on model fields should be preferred when possible, +as this allows your code to avoid a dependency on Pulp implementation details +and allows you to use the same field names everywhere. + +However, not all model fields support this, as not every model field has +a direct mapping with a Pulp field. Attempting to search on an unsupported +model field will raise an exception. + + +Class reference +............... + .. autoclass:: pubtools.pulplib.Criteria :members: diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 5c65e616..3de9e083 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -168,7 +168,7 @@ def search_repository(self, criteria=None): pulp_crit = { "skip": 0, "limit": self._PAGE_SIZE, - "filters": filters_for_criteria(criteria), + "filters": filters_for_criteria(criteria, Repository), } search = {"criteria": pulp_crit, "distributors": True} diff --git a/pubtools/pulplib/_impl/client/search.py b/pubtools/pulplib/_impl/client/search.py index 335fed0c..ab84e92b 100644 --- a/pubtools/pulplib/_impl/client/search.py +++ b/pubtools/pulplib/_impl/client/search.py @@ -9,10 +9,51 @@ ExistsMatcher, ) +from pubtools.pulplib._impl import compat_attr as attr +from pubtools.pulplib._impl.model.attr import PULP2_FIELD, PY_PULP2_CONVERTER -def filters_for_criteria(criteria): + +def all_subclasses(klass): + out = set() + out.add(klass) + for subclass in klass.__subclasses__(): + out.update(all_subclasses(subclass)) + return out + + +def map_field_for_type(field_name, matcher, type_hint): + if not type_hint: + return (field_name, matcher) + + attrs_classes = all_subclasses(type_hint) + attrs_classes = [cls for cls in attrs_classes if attr.has(cls)] + for klass in attrs_classes: + # Does the class have this field? + klass_fields = attr.fields(klass) + if not hasattr(klass_fields, field_name): + continue + field = getattr(klass_fields, field_name) + metadata = field.metadata + if PULP2_FIELD in metadata: + field_name = metadata[PULP2_FIELD] + converter = metadata.get(PY_PULP2_CONVERTER, lambda x: x) + return (field_name, matcher._map(converter) if matcher else None) + + # Field was found on the model, but we don't support mapping it to + # a Pulp field. + raise NotImplementedError("Searching on field %s is not supported" % field_name) + + # No match => no change, search exactly what was requested + return (field_name, matcher) + + +def filters_for_criteria(criteria, type_hint=None): # convert a Criteria object to a filters dict as used in the Pulp 2.x API: # https://docs.pulpproject.org/dev-guide/conventions/criteria.html#search-criteria + # + # type_hint optionally provides the class expected to be found by this search. + # This can impact the application of certain criteria, e.g. it will affect + # field mappings looked up by FieldMatchCriteria. if criteria is None or isinstance(criteria, TrueCriteria): return {} @@ -26,6 +67,8 @@ def filters_for_criteria(criteria): field = criteria._field matcher = criteria._matcher + field, matcher = map_field_for_type(field, matcher, type_hint) + return {field: field_match(matcher)} raise TypeError("Not a criteria: %s" % repr(criteria)) diff --git a/pubtools/pulplib/_impl/compat_attr.py b/pubtools/pulplib/_impl/compat_attr.py index 93c096e8..988df9ed 100644 --- a/pubtools/pulplib/_impl/compat_attr.py +++ b/pubtools/pulplib/_impl/compat_attr.py @@ -46,3 +46,4 @@ def fields_dict(cls): Factory = attr.Factory fields = attr.fields evolve = attr.evolve +has = attr.has diff --git a/pubtools/pulplib/_impl/criteria.py b/pubtools/pulplib/_impl/criteria.py index b60ffb36..8eeb3886 100644 --- a/pubtools/pulplib/_impl/criteria.py +++ b/pubtools/pulplib/_impl/criteria.py @@ -65,8 +65,12 @@ def with_field(cls, field_name, field_value): field_name (str) The name of a field. - Field names may contain a "." to indicate nested fields, - such as ``notes.created``. + Supported field names include both model fields + and Pulp fields. See :ref:`model_fields` for information + about these two types of fields. + + When Pulp fields are used, field names may contain a "." to + indicate nesting, such as ``notes.created``. field_value :class:`Matcher` @@ -215,6 +219,12 @@ def in_(cls, values): """ return InMatcher(values) + def _map(self, _fn): + # Internal-only: return self with matched value mapped through + # the given function. Intended to be overridden in subclasses + # to support field conversions between Pulp and Python. + return self + @attr.s class RegexMatcher(Matcher): @@ -224,11 +234,17 @@ class RegexMatcher(Matcher): def _check_pattern(self, _, pattern): re.compile(pattern) + # Note: regex matcher does not implement _map since regex is defined only + # in terms of strings, there are no meaningful conversions. + @attr.s class EqMatcher(Matcher): _value = attr.ib() + def _map(self, fn): + return attr.evolve(self, value=fn(self._value)) + @attr.s class InMatcher(Matcher): @@ -240,6 +256,9 @@ def _check_values(self, _, values): return raise ValueError("Must be an iterable: %s" % repr(values)) + def _map(self, fn): + return attr.evolve(self, values=[fn(x) for x in self._values]) + @attr.s class ExistsMatcher(Matcher): diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index d43943de..ae44d8cb 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -8,7 +8,7 @@ import six from more_executors.futures import f_return, f_return_error, f_flat_map -from pubtools.pulplib import Page, PulpException, Criteria, Task +from pubtools.pulplib import Page, PulpException, Criteria, Task, Repository from pubtools.pulplib._impl.client.search import filters_for_criteria from .. import compat_attr as attr @@ -45,7 +45,7 @@ def search_repository(self, criteria=None): # we're not accessing a real Pulp server. The point is to ensure the # same validation and error behavior as used by the real client also # applies to the fake. - filters_for_criteria(criteria) + filters_for_criteria(criteria, Repository) try: for repo in self._repositories: diff --git a/pubtools/pulplib/_impl/fake/match.py b/pubtools/pulplib/_impl/fake/match.py index 79b53395..dc2190fb 100644 --- a/pubtools/pulplib/_impl/fake/match.py +++ b/pubtools/pulplib/_impl/fake/match.py @@ -3,6 +3,7 @@ from pubtools.pulplib._impl import compat_attr as attr from pubtools.pulplib._impl.client.errors import PulpException +from pubtools.pulplib._impl.client.search import map_field_for_type from pubtools.pulplib._impl.criteria import ( TrueCriteria, AndCriteria, @@ -34,6 +35,32 @@ def match_object(*args, **kwargs): return func(*args, **kwargs) +def get_field(field, obj): + # Obtain a named field from a model object; + # 'field' may be either a field name used in Pulp or a field name used + # by our model. + + # Determine whether this field name refers to a field on the model. + # Note that we don't care about conversion on the matcher here because: + # - If it's a field on the model, no conversion is needed since we already + # are storing plain objects from the model + # - If it's a Pulp field, conversion will be handled in pulp_value + mapped_field, _ = map_field_for_type(field, matcher=None, type_hint=obj.__class__) + + # Are we looking for a field on our model, or a raw Pulp field? + using_model_field = mapped_field is not field + + if using_model_field: + # If matching a field on the model, we can simply grab and compare + # the attribute directly. + return getattr(obj, field, ABSENT) + + # Otherwise, the user passed a Pulp field name (e.g. notes.eng_product_id). + # Then we delegate to pulp_value, which can look up the corresponding model + # field and do conversions. + return pulp_value(field, obj) + + @visit(TrueCriteria) def match_true(*_): return True @@ -70,13 +97,13 @@ def match_field(criteria, obj): @visit(EqMatcher) def match_field_eq(matcher, field, obj): - value = pulp_value(field, obj) + value = get_field(field, obj) return value == matcher._value @visit(RegexMatcher) def match_field_regex(matcher, field, obj): - value = pulp_value(field, obj) + value = get_field(field, obj) if value is ABSENT: return False return re.search(matcher._pattern, value) @@ -84,13 +111,13 @@ def match_field_regex(matcher, field, obj): @visit(ExistsMatcher) def match_field_exists(_matcher, field, obj): - value = pulp_value(field, obj) + value = get_field(field, obj) return value is not ABSENT @visit(InMatcher) def match_in(matcher, field, obj): - value = pulp_value(field, obj) + value = get_field(field, obj) for elem in matcher._values: if elem == value: return True diff --git a/pubtools/pulplib/_impl/model/attr.py b/pubtools/pulplib/_impl/model/attr.py index 2ab00e71..c4dae2ec 100644 --- a/pubtools/pulplib/_impl/model/attr.py +++ b/pubtools/pulplib/_impl/model/attr.py @@ -13,9 +13,14 @@ # from_data methods. PULP2_PY_CONVERTER = "_pubtools.pulplib.pulp2_to_py_converter" +# Inverse of the above: converter for Python value into Pulp2 value. +PY_PULP2_CONVERTER = "_pubtools.pulplib.py_to_pulp2_converter" + # make usage of the above less ugly -def pulp_attrib(pulp_field=None, pulp_py_converter=None, **kwargs): +def pulp_attrib( + pulp_field=None, pulp_py_converter=None, py_pulp_converter=None, **kwargs +): metadata = kwargs.get("metadata") or {} if pulp_field: @@ -24,5 +29,8 @@ def pulp_attrib(pulp_field=None, pulp_py_converter=None, **kwargs): if pulp_py_converter: metadata[PULP2_PY_CONVERTER] = pulp_py_converter + if py_pulp_converter: + metadata[PY_PULP2_CONVERTER] = py_pulp_converter + kwargs["metadata"] = metadata return attr.ib(**kwargs) diff --git a/pubtools/pulplib/_impl/model/repository/base.py b/pubtools/pulplib/_impl/model/repository/base.py index 272f4159..63c0ed6d 100644 --- a/pubtools/pulplib/_impl/model/repository/base.py +++ b/pubtools/pulplib/_impl/model/repository/base.py @@ -101,7 +101,11 @@ class Repository(PulpObject): """ eng_product_id = pulp_attrib( - default=None, type=int, pulp_field="notes.eng_product", pulp_py_converter=int + default=None, + type=int, + pulp_field="notes.eng_product", + pulp_py_converter=int, + py_pulp_converter=str, ) """ID of the product to which this repository belongs (if any).""" @@ -134,6 +138,7 @@ class Repository(PulpObject): type=list, pulp_field="notes.signatures", pulp_py_converter=lambda sigs: sigs.split(","), + py_pulp_converter=",".join, converter=lambda keys: [k.strip() for k in keys], ) """A list of GPG signing key IDs used to sign content in this repository.""" diff --git a/tests/client/test_search_repo_conversions.py b/tests/client/test_search_repo_conversions.py new file mode 100644 index 00000000..41a709e4 --- /dev/null +++ b/tests/client/test_search_repo_conversions.py @@ -0,0 +1,61 @@ +import pytest + +from pubtools.pulplib import Criteria, Matcher, Repository + +from pubtools.pulplib._impl.client.search import filters_for_criteria + + +def test_eng_product_in(): + """eng_product is mapped correctly""" + crit = Criteria.with_field_in("eng_product_id", [12, 34, 56]) + assert filters_for_criteria(crit, Repository) == { + "notes.eng_product": {"$in": ["12", "34", "56"]} + } + + +def test_is_temporary(): + """is_temporary is mapped correctly""" + crit = Criteria.with_field("is_temporary", True) + assert filters_for_criteria(crit, Repository) == { + "notes.pub_temp_repo": {"$eq": True} + } + + +def test_type(): + """type is mapped correctly""" + crit = Criteria.with_field("type", Matcher.regex("foobar")) + assert filters_for_criteria(crit, Repository) == { + "notes._repo-type": {"$regex": "foobar"} + } + + +def test_signing_keys(): + """signing_keys are mapped correctly""" + crit = Criteria.with_field("signing_keys", ["abc", "def", "123"]) + assert filters_for_criteria(crit, Repository) == { + "notes.signatures": {"$eq": "abc,def,123"} + } + + +def test_created(): + """created is mapped correctly""" + # TODO: there is no datetime => string implicit conversion right now. + # + # Should we do that? Right now, it doesn't seem all that useful because + # there's probably no usecase to search for an exact datetime anyway. + # + # In practice, searching by datetime probably is useful only with $lt + # or $gt, which is not supported by this library at all, at the time + # of writing. + crit = Criteria.with_field("created", "20190814T15:16Z") + assert filters_for_criteria(crit, Repository) == { + "notes.created": {"$eq": "20190814T15:16Z"} + } + + +def test_unsearchable(): + """passing a field which can't be mapped directly to Pulp raises an error""" + crit = Criteria.with_field("relative_url", "foobar") + with pytest.raises(NotImplementedError) as exc: + filters_for_criteria(crit, Repository) + assert "Searching on field relative_url is not supported" in str(exc.value) diff --git a/tests/fake/test_fake_search.py b/tests/fake/test_fake_search.py index 3c3ef7f7..c0aab8a7 100644 --- a/tests/fake/test_fake_search.py +++ b/tests/fake/test_fake_search.py @@ -199,6 +199,66 @@ def test_search_created_timestamp(): assert sorted(found) == [repo2] +def test_search_mapped_field_eq(): + """Can do equality search with fields subject to Python<=>Pulp conversion.""" + controller = FakeController() + + repo1 = Repository(id="repo1", eng_product_id=888) + repo2 = Repository(id="repo2", signing_keys=["foo", "bar"]) + repo3 = Repository(id="repo3", eng_product_id=123) + + controller.insert_repository(repo1) + controller.insert_repository(repo2) + controller.insert_repository(repo3) + + client = controller.client + keys_crit = Criteria.with_field("signing_keys", ["foo", "bar"]) + product_crit = Criteria.with_field("eng_product_id", 123) + found_by_keys = client.search_repository(keys_crit).result().data + found_by_product = client.search_repository(product_crit).result().data + + assert found_by_keys == [repo2] + assert found_by_product == [repo3] + + +def test_search_mapped_field_in(): + """Can do 'in' search with fields subject to Python<=>Pulp conversion.""" + controller = FakeController() + + repo1 = Repository(id="repo1", eng_product_id=888) + repo2 = Repository(id="repo2", eng_product_id=123) + repo3 = Repository(id="repo3", eng_product_id=456) + + controller.insert_repository(repo1) + controller.insert_repository(repo2) + controller.insert_repository(repo3) + + client = controller.client + crit = Criteria.with_field("eng_product_id", Matcher.in_([123, 456])) + found = client.search_repository(crit).result().data + + assert sorted(found) == [repo2, repo3] + + +def test_search_mapped_field_regex(): + """Can do regex search with fields subject to Python<=>Pulp conversion.""" + controller = FakeController() + + repo1 = Repository(id="repo1", type="foobar") + repo2 = Repository(id="repo2", type="foobaz") + repo3 = Repository(id="repo3", type="quux") + + controller.insert_repository(repo1) + controller.insert_repository(repo2) + controller.insert_repository(repo3) + + client = controller.client + crit = Criteria.with_field("type", Matcher.regex("fooba[rz]")) + found = client.search_repository(crit).result().data + + assert sorted(found) == [repo1, repo2] + + def test_search_created_regex(): """Can search using regular expressions."""