Skip to content

Commit

Permalink
Merge pull request #688 from rohanpm/cache-flush-alias
Browse files Browse the repository at this point in the history
Add alias resolution to cache flush endpoint [RHELDST-23276]
  • Loading branch information
rohanpm committed Apr 8, 2024
2 parents e61a55c + cc72019 commit 16ceae1
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 5 deletions.
51 changes: 47 additions & 4 deletions exodus_gw/worker/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@
import os
import re
from datetime import datetime
from typing import Any

import dramatiq
import fastpurge
from dramatiq.middleware import CurrentMessage
from sqlalchemy.orm import Session

from exodus_gw import models
from exodus_gw.aws.dynamodb import DynamoDB
from exodus_gw.aws.util import uri_alias
from exodus_gw.database import db_engine
from exodus_gw.schemas import TaskStates
from exodus_gw.settings import Settings
from exodus_gw.settings import Settings, get_environment

LOG = logging.getLogger("exodus-gw")

Expand All @@ -22,16 +25,26 @@ def __init__(
paths: list[str],
settings: Settings,
env: str,
cdn_definitions: dict[str, Any],
):
self.paths = [p.removeprefix("/") for p in paths]
self.paths = paths
self.settings = settings
self.cdn_definitions = cdn_definitions

for environment in settings.environments:
if environment.name == env:
self.env = environment

assert self.env

@property
def aliases(self):
uri_aliases = []
for k, v in self.cdn_definitions.items():
if k in ("origin_alias", "releasever_alias", "rhui_alias"):
uri_aliases.extend(v)
return uri_aliases

def arl_ttl(self, path: str):
# Return an appropriate TTL value for certain paths.
#
Expand All @@ -57,12 +70,32 @@ def arl_ttl(self, path: str):
def urls_for_flush(self):
out: list[str] = []

paths = set()

# Use aliases to inflate the paths.
# e.g. if there is a path of /foo/bar/8/baz and there is an alias
# of /foo/bar/8 => /foo/bar/8.9, then 'paths' should contain both
# sides of that alias.
for path in self.paths:
# We accept paths both with and without leading '/', normalize.
path = path.removeprefix("/")

# This path always goes into the set we'll process.
paths.add(path)

# The path after alias resolution also goes into the set.
# Alias resolution needs the leading '/'.
path_resolved = uri_alias("/" + path, self.aliases)
paths.add(path_resolved.removeprefix("/"))

path_list = sorted(paths)

for cdn_base_url in self.env.cache_flush_urls:
for path in self.paths:
for path in path_list:
out.append(os.path.join(cdn_base_url, path))

for arl_template in self.env.cache_flush_arl_templates:
for path in self.paths:
for path in path_list:
out.append(
arl_template.format(
path=path,
Expand Down Expand Up @@ -149,10 +182,20 @@ def flush_cdn_cache(
db.commit()
return

# The CDN config is needed for alias resolution.
ddb = DynamoDB(
env=env,
settings=settings,
from_date=str(datetime.utcnow()),
env_obj=get_environment(env, settings),
)
definitions = ddb.query_definitions()

flusher = Flusher(
paths=paths,
settings=settings,
env=env,
cdn_definitions=definitions,
)
flusher.run()

Expand Down
35 changes: 34 additions & 1 deletion tests/worker/test_cdn_cache.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import pathlib
from datetime import datetime, timedelta

Expand Down Expand Up @@ -129,6 +130,7 @@ def test_flush_cdn_cache_fastpurge_disabled(
def test_flush_cdn_cache_typical(
db: Session,
caplog: pytest.LogCaptureFixture,
mock_boto3_client,
fake_message_id: str,
tmp_path: pathlib.Path,
monkeypatch: pytest.MonkeyPatch,
Expand Down Expand Up @@ -178,12 +180,34 @@ def test_flush_cdn_cache_typical(
db.add(task)
db.commit()

# Set up some aliases to exercise alias resolution.
mock_boto3_client.query.return_value = {
"Items": [
{
"config": {
"S": json.dumps(
{
"origin_alias": [],
"releasever_alias": [
{"src": "/path/one", "dest": "/path/one-dest"},
],
"rhui_alias": [
{"src": "/path/two", "dest": "/path/two-dest"},
],
}
)
}
}
]
}

# It should run to completion...
flush_cdn_cache(
paths=[
# Paths here are chosen to exercise:
# - different TTL values for different types of file
# - leading "/" vs no leading "/" - both should be tolerated
# - alias resolution
"/path/one/repodata/repomd.xml",
"path/two/listing",
"third/path",
Expand Down Expand Up @@ -214,19 +238,28 @@ def test_flush_cdn_cache_typical(
# using both the CDN root URLs and the ARL templates
assert sorted(fp_client._purged_urls) == [
# Used the ARL templates. Note the different TTL values
# for different paths.
# for different paths, and also the paths both before and
# after alias resolution are flushed.
"S/=/123/4567/10m/cdn1.example.com/path/two-dest/listing cid=///",
"S/=/123/4567/10m/cdn1.example.com/path/two/listing cid=///",
"S/=/123/4567/30d/cdn1.example.com/third/path cid=///",
"S/=/123/4567/4h/cdn1.example.com/path/one-dest/repodata/repomd.xml cid=///",
"S/=/123/4567/4h/cdn1.example.com/path/one/repodata/repomd.xml cid=///",
"S/=/234/6677/10m/cdn2.example.com/other/path/two-dest/listing x/y/z",
"S/=/234/6677/10m/cdn2.example.com/other/path/two/listing x/y/z",
"S/=/234/6677/30d/cdn2.example.com/other/third/path x/y/z",
"S/=/234/6677/4h/cdn2.example.com/other/path/one-dest/repodata/repomd.xml x/y/z",
"S/=/234/6677/4h/cdn2.example.com/other/path/one/repodata/repomd.xml x/y/z",
# Used the CDN URL which didn't have a leading path.
"https://cdn1.example.com/path/one-dest/repodata/repomd.xml",
"https://cdn1.example.com/path/one/repodata/repomd.xml",
"https://cdn1.example.com/path/two-dest/listing",
"https://cdn1.example.com/path/two/listing",
"https://cdn1.example.com/third/path",
# Used the CDN URL which had a leading path.
"https://cdn2.example.com/root/path/one-dest/repodata/repomd.xml",
"https://cdn2.example.com/root/path/one/repodata/repomd.xml",
"https://cdn2.example.com/root/path/two-dest/listing",
"https://cdn2.example.com/root/path/two/listing",
"https://cdn2.example.com/root/third/path",
]

0 comments on commit 16ceae1

Please sign in to comment.