From 0ae5049a0a453a1ae471e4047edfd44cb8b5b031 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Wed, 8 May 2024 09:51:00 +1000 Subject: [PATCH] cache: skip non-kickstart treeinfo [RHELDST-24308] It turns out there is a special case in the current CDN config forcing a fast 404 response on treeinfo URLs not ending in "/kickstart/treeinfo". A side-effect of forcing that response is that any requests to cache flush on such a path will fail, e.g. flush /some/kickstart/treeinfo => OK flush /some/treeinfo => always fails As such, we'll need a special case to filter out these paths from any attempted cache flushes. There is never any need for cache flush if the response is forced to 404 anyway. Note this is unlikely to come up in any real publishes of kickstart repos since those are always using /kickstart in the path. It came up in the context of migrating some old content to exodus-cdn, because some files named "treeinfo" exist underneath /origin/files which will trigger the usual cache flush attempts. --- exodus_gw/worker/cache.py | 14 +++++++++++++- tests/worker/test_cdn_cache.py | 8 ++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/exodus_gw/worker/cache.py b/exodus_gw/worker/cache.py index c0471fe3..333a56a6 100644 --- a/exodus_gw/worker/cache.py +++ b/exodus_gw/worker/cache.py @@ -18,6 +18,18 @@ LOG = logging.getLogger("exodus-gw") +def exclude_path(path: str) -> bool: + # Returns True for certain paths which should be excluded from cache flushing. + if path.endswith("/treeinfo") and not path.endswith("/kickstart/treeinfo"): + # RHELDST-24308: paths matching these conditions get a forced 404 response + # without going to the CDN origin. This has the side-effect of breaking + # cache flushing for those paths - if we request flush for these paths + # we'll get an error. + LOG.debug("Skipping %s: treeinfo fast-404 case", path) + return True + return False + + class Flusher: def __init__( self, @@ -26,7 +38,7 @@ def __init__( env: str, aliases: list[tuple[str, str]], ): - self.paths = paths + self.paths = [p for p in paths if not exclude_path(p)] self.settings = settings self.aliases = aliases diff --git a/tests/worker/test_cdn_cache.py b/tests/worker/test_cdn_cache.py index b43f310c..2453899b 100644 --- a/tests/worker/test_cdn_cache.py +++ b/tests/worker/test_cdn_cache.py @@ -208,9 +208,12 @@ def test_flush_cdn_cache_typical( # - different TTL values for different types of file # - leading "/" vs no leading "/" - both should be tolerated # - alias resolution + # - treeinfo special case "/path/one/repodata/repomd.xml", "path/two/listing", "third/path", + "/some/misc/treeinfo", + "/some/kickstart/treeinfo", ], env="cachetest", settings=settings, @@ -242,11 +245,14 @@ def test_flush_cdn_cache_typical( # after alias resolution are flushed. "S/=/123/4567/10m/cdn1.example.com/path/two-dest/listing cid=///", "S/=/123/4567/10m/cdn1.example.com/path/two/listing cid=///", + # note only the kickstart treeinfo appears, the other is filtered. + "S/=/123/4567/30d/cdn1.example.com/some/kickstart/treeinfo cid=///", "S/=/123/4567/30d/cdn1.example.com/third/path cid=///", "S/=/123/4567/4h/cdn1.example.com/path/one-dest/repodata/repomd.xml cid=///", "S/=/123/4567/4h/cdn1.example.com/path/one/repodata/repomd.xml cid=///", "S/=/234/6677/10m/cdn2.example.com/other/path/two-dest/listing x/y/z", "S/=/234/6677/10m/cdn2.example.com/other/path/two/listing x/y/z", + "S/=/234/6677/30d/cdn2.example.com/other/some/kickstart/treeinfo x/y/z", "S/=/234/6677/30d/cdn2.example.com/other/third/path x/y/z", "S/=/234/6677/4h/cdn2.example.com/other/path/one-dest/repodata/repomd.xml x/y/z", "S/=/234/6677/4h/cdn2.example.com/other/path/one/repodata/repomd.xml x/y/z", @@ -255,11 +261,13 @@ def test_flush_cdn_cache_typical( "https://cdn1.example.com/path/one/repodata/repomd.xml", "https://cdn1.example.com/path/two-dest/listing", "https://cdn1.example.com/path/two/listing", + "https://cdn1.example.com/some/kickstart/treeinfo", "https://cdn1.example.com/third/path", # Used the CDN URL which had a leading path. "https://cdn2.example.com/root/path/one-dest/repodata/repomd.xml", "https://cdn2.example.com/root/path/one/repodata/repomd.xml", "https://cdn2.example.com/root/path/two-dest/listing", "https://cdn2.example.com/root/path/two/listing", + "https://cdn2.example.com/root/some/kickstart/treeinfo", "https://cdn2.example.com/root/third/path", ]