From c94e3518e78d5459429f4f83bf8ffd51669d3356 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Tue, 1 Jun 2021 15:47:37 +1000 Subject: [PATCH] refactor: move modulemd filtering from errata source to koji source ET requests modules by filename, e.g. modulemd.x86_64.txt. Previously, we had errata source query koji source and then filter the returned values according to ET's desired filenames. Let's instead make koji source natively support filtering by way of a new parameter. Motivation: koji source will soon need to start parsing these modulemd files so that the NSVCA can be put onto push items. If we do this for files which will be then filtered by the errata source, we're negatively impacting the performance and reliability by loading data we don't need. It's better if we can tell koji source in advance exactly which files we're interested in. --- CHANGELOG.md | 5 ++ docs/sources/koji.rst | 6 ++ src/pushsource/_impl/backend/errata_source.py | 9 +- src/pushsource/_impl/backend/koji_source.py | 38 ++++++++- tests/koji/conftest.py | 17 ++++ tests/koji/test_koji.py | 18 +--- tests/koji/test_koji_module.py | 84 +++++++++++++++++++ 7 files changed, 152 insertions(+), 25 deletions(-) create mode 100644 tests/koji/conftest.py create mode 100644 tests/koji/test_koji_module.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a9b02f7..00e17a5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix usage of `errata` source when the an Errata Tool URL includes a path component. +### Added + +- Added `module_filter_filename` parameter to koji source, to select only a subset + of modulemd files from a build (e.g. limit to certain arches). + ### Changed - On `ErratumPushItem`, the `type` attribute will now be automatically converted from legacy values found in the wild such as "RHBA", "RHSA". Values are diff --git a/docs/sources/koji.rst b/docs/sources/koji.rst index 508be10c..4c9ad418 100644 --- a/docs/sources/koji.rst +++ b/docs/sources/koji.rst @@ -54,6 +54,12 @@ text files as archives (shown as "Module Archives" in the Koji UI). ``koji:https://koji.fedoraproject.org/kojihub?module_build=flatpak-common-f32-3220200518173809.caf21102`` +If you want to process only a subset of the modules on the build +(i.e. select only certain arches), you may use ``module_filter_filename`` to select +the desired modulemd files, as in example: + +``koji:https://koji.fedoraproject.org/kojihub?module_build=flatpak-common-f32-3220200518173809.caf21102&module_filter_filename=modulemd.x86_64.txt,modulemd.s390x.txt`` + Setting the destination for push items ...................................... diff --git a/src/pushsource/_impl/backend/errata_source.py b/src/pushsource/_impl/backend/errata_source.py index 3d595a41..f6806395 100644 --- a/src/pushsource/_impl/backend/errata_source.py +++ b/src/pushsource/_impl/backend/errata_source.py @@ -152,16 +152,13 @@ def _module_push_items_from_build(self, erratum, build_nvr, build_info): modules = build_info.get("modules") or {} # Get a koji source which will yield all modules from the build - koji_source = self._koji_source(module_build=[build_nvr]) + koji_source = self._koji_source( + module_build=[build_nvr], module_filter_filename=list(modules.keys()) + ) out = [] for push_item in koji_source: - # The koji source yielded *all* modulemds on the build. - # We filter to only those requested by Errata Tool. - if push_item.name not in modules: - continue - dest = modules[push_item.name] # Fill in more push item details based on the info provided by ET. diff --git a/src/pushsource/_impl/backend/koji_source.py b/src/pushsource/_impl/backend/koji_source.py index 0c4896a5..9aab3477 100644 --- a/src/pushsource/_impl/backend/koji_source.py +++ b/src/pushsource/_impl/backend/koji_source.py @@ -102,6 +102,7 @@ def __init__( dest=None, rpm=None, module_build=None, + module_filter_filename=None, signing_key=None, basedir=None, threads=4, @@ -125,8 +126,21 @@ def __init__( module_build (list[str, int]) Build identifier(s). Can be koji IDs (integers) or build NVRs. - The source will yield all modulemd files belonging to these - build(s). + The source will yield modulemd files belonging to these + build(s), filtered by ``module_filter_filename`` (if given). + + module_filter_filename (list[str]) + Filename(s) of modulemd archives to include in output. + + When ``module_build`` is given, modulemd files are located as + koji archives with names of the form ``modulemd..txt``. + By default, all of these archives will be processed. + + Providing a non-empty list for ``module_filter_filename`` will + instead only process modulemd files with those filenames. This + can be used to select only certain arches. + + Has no effect if ``module_build`` is not provided. signing_key (list[str]) GPG signing key ID(s). If provided, content must be signed @@ -161,6 +175,13 @@ def __init__( self._url = url self._rpm = [try_int(x) for x in list_argument(rpm)] self._module_build = [try_int(x) for x in list_argument(module_build)] + self._module_filter_filename = ( + # retain None values so we can differentiate between + # "caller asked for empty filter" vs "caller did not ask for any filter" + list_argument(module_filter_filename) + if module_filter_filename is not None + else None + ) self._signing_key = list_argument(signing_key) self._dest = list_argument(dest) self._timeout = timeout @@ -244,6 +265,15 @@ def _push_items_from_rpm_meta(self, rpm, meta): ) ] + def _module_filtered(self, file_path): + ok_names = self._module_filter_filename + filename = os.path.basename(file_path) + if ok_names is not None and filename not in ok_names: + LOG.debug("Skipping module %s due to module_filter_filename", file_path) + return True + + return False + def _push_items_from_module_build(self, nvr, meta): LOG.debug("Looking for modulemd on %s, %s", nvr, meta) @@ -261,6 +291,10 @@ def _push_items_from_module_build(self, nvr, meta): file_path = os.path.join( self._pathinfo.typedir(meta, "module"), module["filename"] ) + + if self._module_filtered(file_path): + continue + # Possible TODO: koji also provides a checksum, which could be filled # in here. However, it seems to be only MD5? out.append( diff --git a/tests/koji/conftest.py b/tests/koji/conftest.py new file mode 100644 index 00000000..eb8b54d7 --- /dev/null +++ b/tests/koji/conftest.py @@ -0,0 +1,17 @@ +from pytest import fixture +from mock import patch + +from .fake_koji import FakeKojiController + + +@fixture +def fake_koji(): + controller = FakeKojiController() + with patch("koji.ClientSession") as mock_session: + mock_session.side_effect = controller.session + yield controller + + +@fixture +def koji_dir(tmpdir): + yield str(tmpdir.mkdir("koji")) diff --git a/tests/koji/test_koji.py b/tests/koji/test_koji.py index 4849aaa8..1a89f372 100644 --- a/tests/koji/test_koji.py +++ b/tests/koji/test_koji.py @@ -1,25 +1,9 @@ import os -from pytest import raises, fixture -from mock import patch +from pytest import raises from pushsource import Source, RpmPushItem -from .fake_koji import FakeKojiController - - -@fixture -def fake_koji(): - controller = FakeKojiController() - with patch("koji.ClientSession") as mock_session: - mock_session.side_effect = controller.session - yield controller - - -@fixture -def koji_dir(tmpdir): - yield str(tmpdir.mkdir("koji")) - def test_koji_needs_url(): """Can't obtain source without giving URL""" diff --git a/tests/koji/test_koji_module.py b/tests/koji/test_koji_module.py new file mode 100644 index 00000000..df04e52c --- /dev/null +++ b/tests/koji/test_koji_module.py @@ -0,0 +1,84 @@ +import os + +from pytest import raises, fixture +from mock import patch + +from pushsource import Source, ModuleMdPushItem + +from .fake_koji import FakeKojiController + + +def test_koji_modules(fake_koji, koji_dir): + """Koji source yields requested modules""" + + source = Source.get( + "koji:https://koji.example.com/?module_build=foo-1.0-1", basedir=koji_dir + ) + + fake_koji.insert_rpms(["foo-1.0-1.x86_64.rpm"], build_nvr="foo-1.0-1") + fake_koji.insert_modules( + ["modulemd.x86_64.txt", "modulemd.s390x.txt"], build_nvr="foo-1.0-1" + ) + + # Eagerly fetch + items = list(source) + + # It should have returned push items for the two modules + assert len(items) == 2 + + items = sorted(items, key=lambda pi: pi.name) + + assert items[0] == ModuleMdPushItem( + name="modulemd.s390x.txt", + state="PENDING", + src=os.path.join( + koji_dir, "packages/foo/1.0/1/files/module/modulemd.s390x.txt" + ), + dest=[], + md5sum=None, + sha256sum=None, + origin=None, + build="foo-1.0-1", + signing_key=None, + ) + + assert items[1] == ModuleMdPushItem( + name="modulemd.x86_64.txt", + state="PENDING", + src=os.path.join( + koji_dir, "packages/foo/1.0/1/files/module/modulemd.x86_64.txt" + ), + dest=[], + md5sum=None, + sha256sum=None, + origin=None, + build="foo-1.0-1", + signing_key=None, + ) + + +def test_koji_modules_filter_filename(fake_koji, koji_dir): + """Koji source can filter modules by filename""" + + source = Source.get( + "koji:https://koji.example.com/?module_build=foo-1.0-1", + basedir=koji_dir, + module_filter_filename="modulemd.x86_64.txt,modulemd.aarch64.txt", + ) + + fake_koji.insert_rpms(["foo-1.0-1.x86_64.rpm"], build_nvr="foo-1.0-1") + fake_koji.insert_modules( + ["modulemd.x86_64.txt", "modulemd.aarch64.txt", "modulemd.s390x.txt"], + build_nvr="foo-1.0-1", + ) + + # Eagerly fetch + items = list(source) + + # It should have returned push items for the two modules which matched filter + assert len(items) == 2 + + item_names = sorted([item.name for item in items]) + + # Should be only those two matching the filter + assert item_names == ["modulemd.aarch64.txt", "modulemd.x86_64.txt"]