Skip to content

Commit

Permalink
Merge pull request #686 from rohanpm/cdn-flush
Browse files Browse the repository at this point in the history
Add cache flush endpoint [RHELDST-23276]
  • Loading branch information
rohanpm committed Apr 5, 2024
2 parents d39baca + a8df9ad commit e61a55c
Show file tree
Hide file tree
Showing 18 changed files with 1,025 additions and 36 deletions.
7 changes: 7 additions & 0 deletions .safety-policy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,11 @@ security:
sqlalchemy 2 at time of writing.
See RHELDST-15252.
expires: '2023-03-01'
65213:
# CVE-2023-6129, pyopenssl>=22.0.0,
# POLY1305 MAC issue on PowerPC CPUs
reason: >-
Vulnerability is specific to PPC architecture, which is not
used or relevant for this service.
expires: '2025-04-04'
continue-on-vulnerability-error: False
60 changes: 60 additions & 0 deletions docs/deployment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,63 @@ users may specify a logger name and the level at which to set said logger.
exodus-gw = INFO
s3 = DEBUG
...
CDN cache flush
...............

exodus-gw supports flushing the cache of an Akamai CDN edge via
the `Fast Purge API <https://techdocs.akamai.com/purge-cache/reference/api>`_.

This feature is optional. If configuration is not provided, related APIs in
exodus-gw will continue to function but will skip cache flush operations.

Enabling the feature requires the deployment of two sets of configuration.

Firstly, in the ``exodus-gw.ini`` section for the relevant environment,
set ``cache_flush_urls`` to enable cache flush by URL and/or
``cache_flush_arl_templates`` to enable cache flushing by ARL. Both options
can be used together as needed.

.. code-block:: ini
[env.live]
# Root URL(s) of CDN properties for which to flush cache.
# Several can be provided.
cache_flush_urls =
https://cdn1.example.com
https://cdn2.example.com
# Templates of ARL(s) for which to flush cache.
# Templates can use placeholders:
# - path: path of a file under CDN root
# - ttl (optional): a TTL value will be substituted
cache_flush_arl_templates =
S/=/123/22334455/{ttl}/cdn1.example.com/{path}
S/=/123/22334455/{ttl}/cdn2.example.com/{path}
Secondly, use environment variables to deploy credentials for the
Fast Purge API, according to the below table. The fields here correspond
to those used by the `.edgerc file <https://techdocs.akamai.com/developer/docs/set-up-authentication-credentials>`_
as found in Akamai's documentation.

Note that "<env>" should be replaced with the specific corresponding
environment name, e.g. ``EXODUS_GW_FASTPURGE_HOST_LIVE`` for a ``live``
environment.

.. list-table:: Fast Purge credentials

* - Variable
- ``.edgerc`` field
- Example
* - ``EXODUS_GW_FASTPURGE_CLIENT_SECRET_<env>``
- ``client_secret``
- ``abcdEcSnaAt123FNkBxy456z25qx9Yp5CPUxlEfQeTDkfh4QA=I``
* - ``EXODUS_GW_FASTPURGE_HOST_<env>``
- ``host``
- ``akab-lmn789n2k53w7qrs10cxy-nfkxaa4lfk3kd6ym.luna.akamaiapis.net``
* - ``EXODUS_GW_FASTPURGE_ACCESS_TOKEN_<env>``
- ``access_token``
- ``akab-zyx987xa6osbli4k-e7jf5ikib5jknes3``
* - ``EXODUS_GW_FASTPURGE_CLIENT_TOKEN_<env>``
- ``client_token``
- ``akab-nomoflavjuc4422-fa2xznerxrm3teg7``
111 changes: 111 additions & 0 deletions examples/exodus-flush
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python3
#
# Flush CDN cache for certain path(s).
#
# Example:
#
# $ examples/exodus-flush /some/path /another/path ...
#

import argparse
import logging
import os
import sys
import time
from urllib.parse import urljoin

import requests

LOG = logging.getLogger("exodus-publish")

DEFAULT_URL = "https://localhost:8010"


def assert_success(response: requests.Response):
"""Raise if 'response' was not successful.
This is the same as response.raise_for_status(), merely wrapping it
to ensure the body is logged when possible."""

try:
response.raise_for_status()
except Exception as outer:
try:
body = response.json()
except:
raise outer

LOG.error("Unsuccessful response from exodus-gw: %s", body)
raise


def flush_cache(args):
session = requests.Session()
if args.cert:
session.cert = (args.cert, args.key)

url = os.path.join(args.url, f"{args.env}/cdn-flush")
r = session.post(url, json=[{"web_uri": path} for path in args.path])
assert_success(r)

# We have a task, now wait for it to complete.
task = r.json()

task_id = task["id"]
task_url = urljoin(args.url, task["links"]["self"])
task_state = task["state"]

while task_state not in ["COMPLETE", "FAILED"]:
LOG.info("Task %s: %s", task_id, task_state)
time.sleep(5)

r = session.get(task_url)
assert_success(r)

task = r.json()
task_state = task["state"]

LOG.info("Task %s: %s", task_id, task_state)

if task_state == "COMPLETE":
LOG.info("Cache flush completed at %s", task["updated"])
else:
LOG.error("Cache flush failed!")
sys.exit(41)


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--debug", action="store_true", help="Enable verbose logging"
)
parser.add_argument("path", nargs="+", help="Path(s) to flush")

gw = parser.add_argument_group("exodus-gw settings")

gw.add_argument(
"--cert",
default=os.path.expandvars("${HOME}/certs/${USER}.crt"),
help="Certificate for HTTPS authentication with exodus-gw (must match --key)",
)
gw.add_argument(
"--key",
default=os.path.expandvars("${HOME}/certs/${USER}.key"),
help="Private key for HTTPS authentication with exodus-gw (must match --cert)",
)
gw.add_argument("--url", default=DEFAULT_URL)
gw.add_argument("--env", default="test")

args = parser.parse_args()

if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARN, format="%(message)s")
LOG.setLevel(logging.INFO)

return flush_cache(args)


if __name__ == "__main__":
main()
30 changes: 29 additions & 1 deletion exodus_gw/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import logging
import sys
from asyncio import LifoQueue
from datetime import datetime, timedelta

from fastapi import Depends, Path, Request
from fastapi import Depends, HTTPException, Path, Query, Request

from .auth import call_context as get_call_context
from .aws.client import S3ClientWrapper
Expand Down Expand Up @@ -87,6 +88,32 @@ async def get_s3_client(
await queue.put(client)


async def get_deadline_from_query(
deadline: str | None = Query(
default=None,
examples=["2022-07-25T15:47:47Z"],
description=(
"A timestamp by which this task may be abandoned if not completed.\n\n"
"When omitted, a server default will apply."
),
),
settings: Settings = Depends(get_settings),
) -> datetime:
now = datetime.utcnow()

if isinstance(deadline, str):
try:
deadline_obj = datetime.strptime(deadline, "%Y-%m-%dT%H:%M:%SZ")
except Exception as exc_info:
raise HTTPException(
status_code=400, detail=repr(exc_info)
) from exc_info
else:
deadline_obj = now + timedelta(hours=settings.task_deadline)

return deadline_obj


# These are the preferred objects for use in endpoints,
# e.g.
#
Expand All @@ -95,5 +122,6 @@ async def get_s3_client(
db = Depends(get_db)
call_context = Depends(get_call_context)
env = Depends(get_environment_from_path)
deadline = Depends(get_deadline_from_query)
settings = Depends(get_settings)
s3_client = Depends(get_s3_client)
7 changes: 6 additions & 1 deletion exodus_gw/dramatiq/middleware/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ def before_declare_actor(self, broker, actor):

@wraps(original_fn)
def new_fn(*args, **kwargs):
kwargs["settings"] = self.__settings()
# Settings are automatically injected if there is no
# value present.
# If a value is present, it's not overwritten; this allows
# calling actors with specific settings during tests.
if not kwargs.get("settings"):
kwargs["settings"] = self.__settings()
return original_fn(*args, **kwargs)

actor.fn = new_fn
2 changes: 2 additions & 0 deletions exodus_gw/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def __init__(self, datefmt=None):
"publish_id": "publish_id",
"message_id": "message_id",
"duration_ms": "duration_ms",
"url": "url",
"response": "response",
}
self.datefmt = datefmt

Expand Down
68 changes: 66 additions & 2 deletions exodus_gw/routers/cdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import padding
from fastapi import APIRouter, HTTPException, Path, Query
from fastapi import APIRouter, Body, HTTPException, Path, Query
from fastapi.responses import Response
from sqlalchemy.orm import Session

from exodus_gw import auth, schemas
from exodus_gw import auth, models, schemas, worker

from .. import deps
from ..settings import Environment, Settings
Expand Down Expand Up @@ -283,3 +284,66 @@ def cdn_access(
"expires": expires.isoformat(timespec="minutes") + "Z",
"cookie": cookie_str,
}


@router.post(
"/{env}/cdn-flush",
summary="Flush cache",
status_code=200,
dependencies=[auth.needs_role("cdn-flusher")],
response_model=schemas.Task,
)
def flush_cdn_cache(
items: list[schemas.FlushItem] = Body(
...,
examples=[
[
{
"web_uri": "/some/path/i/want/to/flush",
},
{
"web_uri": "/another/path/i/want/to/flush",
},
]
],
),
deadline: datetime = deps.deadline,
env: Environment = deps.env,
db: Session = deps.db,
) -> models.Task:
"""Flush given paths from CDN cache(s) corresponding to this environment.
This API may be used to request CDN edge servers downstream from exodus-gw
and exodus-cdn to discard cached versions of content, ensuring that
subsequent requests will receive up-to-date content.
The API is provided for troubleshooting and for scenarios where it's
known that explicit cache flushes are needed. It's not necessary to use
this API during a typical upload and publish workflow.
Returns a task. Successful completion of the task indicates that CDN
caches have been flushed.
**Required roles**: `{env}-cdn-flusher`
"""
paths = sorted(set([item.web_uri for item in items]))

msg = worker.flush_cdn_cache.send(
env=env.name,
paths=paths,
)

LOG.info(
"Enqueued cache flush for %s path(s) (%s, ...)",
len(paths),
paths[0] if paths else "<empty>",
)

task = models.Task(
id=msg.message_id,
state="NOT_STARTED",
deadline=deadline,
)
db.add(task)

return task
18 changes: 3 additions & 15 deletions exodus_gw/routers/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@

import logging
import os
from datetime import datetime, timedelta
from datetime import datetime
from uuid import uuid4

from fastapi import APIRouter, Body, HTTPException, Query
Expand Down Expand Up @@ -382,9 +382,7 @@ def commit_publish(
env: Environment = deps.env,
db: Session = deps.db,
settings: Settings = deps.settings,
deadline: str | None = Query(
default=None, examples=["2022-07-25T15:47:47Z"]
),
deadline: datetime = deps.deadline,
commit_mode: models.CommitModes | None = Query(
default=None,
title="commit mode",
Expand Down Expand Up @@ -435,16 +433,6 @@ def commit_publish(
commit_mode_str = (commit_mode or models.CommitModes.phase2).value
now = datetime.utcnow()

if isinstance(deadline, str):
try:
deadline_obj = datetime.strptime(deadline, "%Y-%m-%dT%H:%M:%SZ")
except Exception as exc_info:
raise HTTPException(
status_code=400, detail=repr(exc_info)
) from exc_info
else:
deadline_obj = now + timedelta(hours=settings.task_deadline)

db_publish = (
db.query(models.Publish)
# Publish should be locked, but if doing a phase1 commit we will only
Expand Down Expand Up @@ -508,7 +496,7 @@ def commit_publish(
id=msg.message_id,
publish_id=msg.kwargs["publish_id"],
state="NOT_STARTED",
deadline=deadline_obj,
deadline=deadline,
commit_mode=commit_mode,
)
db.add(task)
Expand Down

0 comments on commit e61a55c

Please sign in to comment.