Skip to content

Commit

Permalink
Make it possible to bypass /origin/files policy [RHELDST-22253]
Browse files Browse the repository at this point in the history
RHELDST-23443 started to enforce that content published under
/origin/files must abide by the established naming scheme (e.g. checksum
in path must match checksum of content).

Problem: content from legacy storage is still being migrated via
exodus-gw, and some of that content fails to comply. We should permit
migration of such content as-is.

As there is a use-case for bypassing these checks, but only for the user
performing the migration, add a new role supporting this. If the calling
user has e.g. "live-ignore-policy", they will be permitted to bypass
this specific check in "live". This will be granted to the user
performing the migration.

The relevant code was rewritten a bit and placed next to the other
validation code so that it's cleaner to catch/ignore the exception when
needed.
  • Loading branch information
rohanpm committed May 13, 2024
1 parent 34af728 commit cf6858c
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 70 deletions.
2 changes: 2 additions & 0 deletions exodus_gw/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from fastapi import Depends, HTTPException, Path, Query, Request

from .auth import call_context as get_call_context
from .auth import caller_roles as get_caller_roles
from .aws.client import S3ClientWrapper
from .settings import Environment, Settings, get_environment

Expand Down Expand Up @@ -121,6 +122,7 @@ async def get_deadline_from_query(
#
db = Depends(get_db)
call_context = Depends(get_call_context)
caller_roles = Depends(get_caller_roles)
env = Depends(get_environment_from_path)
deadline = Depends(get_deadline_from_query)
settings = Depends(get_settings)
Expand Down
80 changes: 11 additions & 69 deletions exodus_gw/routers/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def update_publish_items(
db: Session = deps.db,
settings: Settings = deps.settings,
call_context: auth.CallContext = deps.call_context,
caller_roles: set[str] = deps.caller_roles,
) -> dict[None, None]:
"""Add publish items to an existing publish object.
Expand Down Expand Up @@ -273,75 +274,16 @@ def update_publish_items(

db_publish.resolve_links(ln_items=resolvable)

# Enforce correct usage of the /origin/files directory layout.
#
# Paths published under /origin/files must always match the format:
# /origin/files/sha256/(first two characters of sha256sum)/(full sha256sum)/(basename)
#
# Additionally, every object_key must either be "absent" or equal to the full sha256sum
# present in the web_uri.
origin_base_regex = re.compile("^(/content)?/origin/files/sha256/.*$")
origin_pattern = (
"^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$"
)
origin_regex = re.compile(origin_pattern)
origin_items = [i for i in items if re.match(origin_base_regex, i.web_uri)]
for i in origin_items:
# All content under /origin/files/sha256 must match the regex
if not re.match(origin_regex, i.web_uri):
LOG.error(
"Origin path %s does not match regex %s",
i.web_uri,
origin_pattern,
extra={
"publish_id": publish_id,
"event": "publish",
"success": False,
},
)
raise HTTPException(
400,
detail="Origin path %s does not match regex %s"
% (i.web_uri, origin_pattern),
)
# Verify that the two-character partial sha256sum matches the first two characters of the
# full sha256sum.
parts = i.web_uri.partition("/files/sha256/")[2].split("/")
if not parts[1].startswith(parts[0]):
LOG.error(
"Origin path %s contains mismatched sha256sum (%s, %s)",
i.web_uri,
parts[0],
parts[1],
extra={
"publish_id": publish_id,
"event": "publish",
"success": False,
},
)
raise HTTPException(
400,
detail="Origin path %s contains mismatched sha256sum (%s, %s)"
% (i.web_uri, parts[0], parts[1]),
)
# Ensure every object_key is either "absent" or is equal to the full sha256sum
# present in the web_uri.
if not i.object_key in ("absent", parts[1]):
LOG.error(
"Invalid object_key %s for web_uri %s",
i.object_key,
i.web_uri,
extra={
"publish_id": publish_id,
"event": "publish",
"success": False,
},
)
raise HTTPException(
400,
detail="Invalid object_key %s for web_uri %s"
% (i.object_key, i.web_uri),
)
# Apply additional policy checks to the incoming items (e.g. do paths
# abide by certain conventions). Users with a certain role are allowed
# to bypass these checks.
can_ignore_policy = f"{env.name}-ignore-policy" in caller_roles
for item in items:
try:
item.validate_policy()
except schemas.ItemPolicyError:
if not can_ignore_policy:
raise

# Prevent unauthorized users from publishing to restricted paths within
# a particular CDN environment.
Expand Down
71 changes: 70 additions & 1 deletion exodus_gw/schemas.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import logging
import re
from datetime import datetime
from enum import Enum
from os.path import join, normpath
from uuid import UUID

from fastapi import Path
from fastapi import HTTPException, Path
from pydantic import BaseModel, Field, model_validator

from .settings import Settings

LOG = logging.getLogger("exodus-gw")

PathPublishId = Path(
...,
title="publish ID",
Expand All @@ -32,11 +35,30 @@ def normalize_path(path: str):
# TYPE/SUBTYPE[+SUFFIX][;PARAMETER=VALUE]
MIMETYPE_PATTERN = re.compile(r"^[-\w]+/[-.\w]+(\+[-\w]*)?(;[-\w]+=[-\w]+)?")

# Pattern matching anything under /origin/files/sha256 subtree
ORIGIN_FILES_BASE_PATTERN = re.compile("^(/content)?/origin/files/sha256/.*$")

# Pattern which all files under the above base *should* match in order to avoid
# a validation error
ORIGIN_FILES_PATTERN = re.compile(
"^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$"
)


# Note: it would be preferable if we could reuse a settings object loaded by the
# app, however we need this value from within a @classmethod validator.
AUTOINDEX_FILENAME = Settings().autoindex_filename


class ItemPolicyError(HTTPException):
"""Exception type raised when an item provided by the user is
structurally valid but fails to comply with certain policies.
"""

def __init__(self, message: str):
super().__init__(400, detail=message)


class ItemBase(BaseModel):
web_uri: str = Field(
...,
Expand Down Expand Up @@ -114,6 +136,53 @@ def validate_item(self) -> "ItemBase":

return self

def validate_policy(self):
# Validate additional properties of the item against certain
# embedded policies.
#
# It's a little clumsy that this cannot happen in the @model_validator
# above. The point is that certain users are allowed to bypass the
# policy here, whereas the @model_validator is applied too early and
# too strictly to allow any bypassing.
self.validate_origin_files()

def validate_origin_files(self):
# Enforce correct usage of the /origin/files directory layout.
if not ORIGIN_FILES_BASE_PATTERN.match(self.web_uri):
# Not under /origin/files => passes this validation
return

# OK, it exists under /origin/files.
#
# Paths published under /origin/files must always match the format:
# /origin/files/sha256/(first two characters of sha256sum)/(full sha256sum)/(basename)
#
def policy_error(message: str):
LOG.warning(message)
raise ItemPolicyError(message)

# All content under /origin/files/sha256 must match the regex
if not re.match(ORIGIN_FILES_PATTERN, self.web_uri):
policy_error(
f"Origin path {self.web_uri} does not match regex {ORIGIN_FILES_PATTERN.pattern}"
)

# Verify that the two-character partial sha256sum matches the first two characters of the
# full sha256sum.
parts = self.web_uri.partition("/files/sha256/")[2].split("/")
if not parts[1].startswith(parts[0]):
policy_error(
f"Origin path {self.web_uri} contains mismatched sha256sum "
f"({parts[0]}, {parts[1]})"
)

# Additionally, every object_key must either be "absent" or equal to the full sha256sum
# present in the web_uri.
if not self.object_key in ("absent", parts[1]):
policy_error(
f"Invalid object_key {self.object_key} for web_uri {self.web_uri}"
)


class Item(ItemBase):
publish_id: UUID = Field(
Expand Down
43 changes: 43 additions & 0 deletions tests/routers/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,49 @@ def test_update_invalid_path_unmatched_regex(db, auth_header):
}


def test_update_invalid_origin_files_bypassed(
db, auth_header, caplog: pytest.LogCaptureFixture
):
"""When a user publishes to a /content/origin/ path and violates the policy,
the violation is allowed if the user has {env}-ignore-policy role.
"""

publish_id = "11224567-e89b-12d3-a456-426614174000"

publish = Publish(id=publish_id, env="test", state="PENDING")

db.add(publish)
db.commit()

with TestClient(app) as client:
# Try to add some items to it
r = client.put(
"/test/publish/%s" % publish_id,
json=[
{
"web_uri": "/content/origin/files/sha256/01/44/0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a/test.rpm",
"object_key": "0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a",
"content_type": "application/octet-stream",
},
],
headers=auth_header(
roles=["test-publisher", "test-ignore-policy"]
),
)

# It should have succeeded
assert r.status_code == 200

# But at least warned about the suspicious path
assert (
"Origin path {} does not match regex {}".format(
"/content/origin/files/sha256/01/44/0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a/test.rpm",
"^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$",
)
in caplog.messages
)


def test_update_invalid_path_sha256sum_mismatch(db, auth_header):
"""When a user publishes to a /content/origin/ path, ensure that the two-character
portion of the web_uri matches the first two characters of the sha256sum portion of the
Expand Down

0 comments on commit cf6858c

Please sign in to comment.