diff --git a/lib/spack/spack/oci/oci.py b/lib/spack/spack/oci/oci.py index 9dd035da5cdc00..09e79df347dcd7 100644 --- a/lib/spack/spack/oci/oci.py +++ b/lib/spack/spack/oci/oci.py @@ -70,15 +70,16 @@ def with_query_param(url: str, param: str, value: str) -> str: ) -def list_tags(ref: ImageReference) -> List[str]: +def list_tags(ref: ImageReference, _urlopen: spack.oci.opener.MaybeOpen = None) -> List[str]: """Retrieves the list of tags associated with an image, handling pagination.""" + _urlopen = _urlopen or spack.oci.opener.urlopen tags = set() fetch_url = ref.tags_url() while True: # Fetch tags request = Request(url=fetch_url) - response = spack.oci.opener.urlopen(request) + response = _urlopen(request) spack.oci.opener.ensure_status(request, response, 200) tags.update(json.load(response)["tags"]) diff --git a/lib/spack/spack/test/oci/mock_registry.py b/lib/spack/spack/test/oci/mock_registry.py index 4be131cbdada18..288598089d7052 100644 --- a/lib/spack/spack/test/oci/mock_registry.py +++ b/lib/spack/spack/test/oci/mock_registry.py @@ -151,7 +151,9 @@ class InMemoryOCIRegistry(DummyServer): A third option is to use the chunked upload, but this is not implemented here, because it's typically a major performance hit in upload speed, so we're not using it in Spack.""" - def __init__(self, domain: str, allow_single_post: bool = True) -> None: + def __init__( + self, domain: str, allow_single_post: bool = True, tags_per_page: int = 100 + ) -> None: super().__init__(domain) self.router.register("GET", r"/v2/", self.index) self.router.register("HEAD", r"/v2/(?P.+)/blobs/(?P.+)", self.head_blob) @@ -165,6 +167,9 @@ def __init__(self, domain: str, allow_single_post: bool = True) -> None: # If True, allow single POST upload, not all registries support this self.allow_single_post = allow_single_post + # How many tags are returned in a single request + self.tags_per_page = tags_per_page + # Used for POST + PUT upload. This is a map from session ID to image name self.sessions: Dict[str, str] = {} @@ -280,33 +285,34 @@ def handle_upload(self, req: Request, name: str, digest: Digest): return MockHTTPResponse(201, "Created", headers={"Location": f"/v2/{name}/blobs/{digest}"}) def list_tags(self, req: Request, name: str): + # Paginate using Link headers, this was added to the spec in the following commit: + # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9 + # List all tags, exclude digests. - tags = [_tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag] - tags.sort() + all_tags = sorted( + _tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag + ) - # Handle pagination as described in the distribution spec: - # https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#content-discovery - url = urllib.parse.urlparse(req.full_url) - query = urllib.parse.parse_qs(url.query) - n = int(query["n"][0]) if "n" in query else 3 - last = query["last"][0] if "last" in query else None + query = urllib.parse.parse_qs(urllib.parse.urlparse(req.full_url).query) + + n = int(query["n"][0]) if "n" in query else self.tags_per_page - index_of_first = 0 - if last: + if "last" in query: try: - index_of_first = tags.index(last) + 1 + offset = all_tags.index(query["last"][0]) + 1 except ValueError: return MockHTTPResponse(404, "Not found") + else: + offset = 0 - slice_end = index_of_first + n - returned_tags = tags[index_of_first:slice_end] + tags = all_tags[offset : offset + n] - headers = None - if len(tags) > slice_end: - last_tag = returned_tags[-1] - headers = {"Link": f'; rel="next"'} + if offset + n < len(all_tags): + headers = {"Link": f'; rel="next"'} + else: + headers = None - return MockHTTPResponse.with_json(200, "OK", headers=headers, body={"tags": returned_tags}) + return MockHTTPResponse.with_json(200, "OK", headers=headers, body={"tags": tags}) class DummyServerUrllibHandler(urllib.request.BaseHandler): diff --git a/lib/spack/spack/test/oci/urlopen.py b/lib/spack/spack/test/oci/urlopen.py index 78d713f7e84af2..efc3f3c2b06fb4 100644 --- a/lib/spack/spack/test/oci/urlopen.py +++ b/lib/spack/spack/test/oci/urlopen.py @@ -6,6 +6,7 @@ import hashlib import json +import random import urllib.error import urllib.parse import urllib.request @@ -19,6 +20,7 @@ copy_missing_layers, get_manifest_and_config, image_from_mirror, + list_tags, upload_blob, upload_manifest, ) @@ -670,3 +672,31 @@ def test_retry(url, max_retries, expect_failure, expect_requests): assert len(server.requests) == expect_requests assert sleep_time == [2**i for i in range(expect_requests - 1)] + + +def test_list_tags(): + # Follows a relatively new rewording of the OCI distribution spec, which is not yet tagged. + # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9 + N = 20 + urlopen = create_opener(InMemoryOCIRegistry("example.com", tags_per_page=5)).open + image = ImageReference.from_string("example.com/image") + to_tag = lambda i: f"tag-{i:02}" + + # Create N tags in arbitrary order + _tags_to_create = [to_tag(i) for i in range(N)] + random.shuffle(_tags_to_create) + for tag in _tags_to_create: + upload_manifest(image.with_tag(tag), default_manifest(), tag=True, _urlopen=urlopen) + + # list_tags should return all tags from all pages in order + tags = list_tags(image, urlopen) + assert len(tags) == N + assert [to_tag(i) for i in range(N)] == tags + + # Test a single request, which should give the first 5 tags + assert json.loads(urlopen(image.tags_url()).read())["tags"] == [to_tag(i) for i in range(5)] + + # Test response at an offset, which should exclude the `last` tag. + assert json.loads(urlopen(image.tags_url() + f"?last={to_tag(N - 3)}").read())["tags"] == [ + to_tag(i) for i in range(N - 2, N) + ]