From da71c5e6b9d0f7b0a4be74f325125825691c4992 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Tue, 9 Jan 2024 13:48:43 +1000 Subject: [PATCH] Fix missing files from kickstart indexes [RHELDST-22387] RHELDST-22148 introduced the concept of a partial autoindex, which is triggered as soon as entry points are added onto a publish without waiting until commit. This introduced a problem for kickstart repos. The problem is that a kickstart repo is simultaneously a valid yum repo, containing both yum and kickstart entrypoints. There is no guarantee in which order the client will add these onto a publish. If the client added yum repodata first, partial autoindex could be triggered and successfully parse the repo as a yum repo (only). If the client then later added kickstart repodata, the autoindex would not be regenerated to include kickstart files, because the index is treated as immutable and assumed valid if it already exists on the publish. This commit fixes the problem by effectively disabling partial autoindex for kickstart repos, allowing them to instead generate indexes at commit time as it worked originally. This can be considered more of a workaround than a proper fix. A better fix might be done later to make autoindex_partial work correctly for kickstart repos rather than disabling it. --- exodus_gw/routers/publish.py | 15 ++++++++++- exodus_gw/settings.py | 5 ++++ tests/routers/test_publish.py | 49 +++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/exodus_gw/routers/publish.py b/exodus_gw/routers/publish.py index b5f58a53..7096af8d 100644 --- a/exodus_gw/routers/publish.py +++ b/exodus_gw/routers/publish.py @@ -339,10 +339,23 @@ def update_publish_items( if item.object_key == "absent": # deleted items don't get indexed continue + basename = os.path.basename(item.web_uri) for entrypoint_basename in settings.entry_point_files: if basename == entrypoint_basename: - entrypoint_paths.add(item.web_uri) + if any( + [ + exclude in item.web_uri + for exclude in settings.autoindex_partial_excludes + ] + ): + # Not eligible for partial autoindex, e.g. /kickstart/ repos + # because the kickstart and yum repodata might arrive separately. + LOG.info( + "%s: excluded from partial autoindex", item.web_uri + ) + else: + entrypoint_paths.add(item.web_uri) if entrypoint_paths: msg = worker.autoindex_partial.send( diff --git a/exodus_gw/settings.py b/exodus_gw/settings.py index 5cc8751e..e8b56331 100644 --- a/exodus_gw/settings.py +++ b/exodus_gw/settings.py @@ -170,6 +170,11 @@ class Settings(BaseSettings): Can be set to an empty string to disable generation of indexes. """ + autoindex_partial_excludes: List[str] = ["/kickstart/"] + """Background processing of autoindexes will be disabled for paths matching + any of these values. + """ + config_cache_ttl: int = 2 """Time (in minutes) config is expected to live in components that consume it. diff --git a/tests/routers/test_publish.py b/tests/routers/test_publish.py index 66f8b1ab..85ec2c65 100644 --- a/tests/routers/test_publish.py +++ b/tests/routers/test_publish.py @@ -1,4 +1,5 @@ import json +import logging from datetime import datetime, timedelta import mock @@ -290,6 +291,54 @@ def test_update_publish_items_autoindex(db, auth_header): ] +def test_update_publish_items_autoindex_excluded( + db, auth_header, caplog: pytest.LogCaptureFixture +): + """PUTting items including entry points under an excluded path will NOT trigger + partial autoindex. + """ + + publish_id = "11224567-e89b-12d3-a456-426614174000" + + publish = Publish(id=publish_id, env="test", state="PENDING") + + with TestClient(app) as client: + # Ensure a publish object exists + db.add(publish) + db.commit() + + # Try to add some items to it + r = client.put( + "/test/publish/%s" % publish_id, + json=[ + { + "web_uri": "/some/kickstart/repo1/repodata/repomd.xml", + "object_key": "1" * 64, + }, + { + "web_uri": "/some/kickstart/repo1/other", + "object_key": "2" * 64, + }, + ], + headers=auth_header(roles=["test-publisher"]), + ) + + # It should have succeeded + assert r.status_code == 200 + + # Check the enqueued messages... + messages: list[DramatiqMessage] = db.query(DramatiqMessage).all() + + # It should have enqueued no messages + assert len(messages) == 0 + + # And it should have logged about the exclusion + assert ( + "/some/kickstart/repo1/repodata/repomd.xml: excluded from partial autoindex" + in caplog.text + ) + + def test_update_publish_items_path_normalization(db, auth_header): """URI and link target paths are normalized in PUT items."""