Skip to content

Commit

Permalink
As a user, I can perform a content-only mirror of a repo.
Browse files Browse the repository at this point in the history
backports #9316

closes: #9620
https://pulp.plan.io/issues/9620

(cherrypicked from commit 3b275d9)
  • Loading branch information
dralley committed Dec 21, 2021
1 parent 9493d73 commit 2803223
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 59 deletions.
2 changes: 2 additions & 0 deletions CHANGES/9620.feature
@@ -0,0 +1,2 @@
Added a `sync_policy` parameter to the `/sync/` endpoint which will replace the `mirror` parameter and provides options for how the sync should be carried out. The `mirror` parameter is deprecated but will retain its current function.
(backported from #9316)
10 changes: 7 additions & 3 deletions docs/workflows/create_sync_publish.rst
Expand Up @@ -146,9 +146,13 @@ Sync repository ``foo`` using remote ``bar``
.. literalinclude:: ../_scripts/sync.sh
:language: bash

You can specify ``mirror=True`` for a mirror mode. It means Pulp won't update
repository using previous repository version but create a new copy of remote
repository as a new repository version.
There are 3 sync modes to choose from, using the ``sync_policy`` option.

- ``additive`` (the default) will retain the existing contents of the Pulp repository and add the contents of the remote repository being synced.
- ``mirror_content_only`` will synchronize the Pulp repository to contain the same content as the one remote repository being synced - removing any existing content that isn't present in the remote repo.
- ``mirror_complete`` will act as ``mirror_content_only`` does, but additionally it will automatically create a publication that will be an _exact_ bit-for-bit copy of the remote repository being synced, rather than requiring a separate step (or ``autopublish``) to generate the metadata later. This will keep repo metadata checksums intact, but is not possible for all repositories, as some use features which are incompatible with creating local clones that are exact copies.

The ``mirror`` option is deprecated, ``sync_policy`` should be used instead. If the ``mirror`` option used, a value of ``true`` will change the default ``sync_policy`` to ``mirror_complete``, while a value of ``false`` will not change the default ``sync_policy``.

Optionally, you can skip ``SRPM`` packages by using ``skip_types:="[\"srpm\"]"``
option.
Expand Down
12 changes: 12 additions & 0 deletions pulp_rpm/app/constants.py
Expand Up @@ -26,6 +26,18 @@
ALLOWED_CHECKSUM_ERROR_MSG = """Checksum must be one of the allowed checksum types.
You can adjust these with the 'ALLOWED_CONTENT_CHECKSUMS' setting."""

SYNC_POLICIES = SimpleNamespace(
ADDITIVE="additive",
MIRROR_COMPLETE="mirror_complete",
MIRROR_CONTENT_ONLY="mirror_content_only",
)

SYNC_POLICY_CHOICES = (
(SYNC_POLICIES.ADDITIVE, SYNC_POLICIES.ADDITIVE),
(SYNC_POLICIES.MIRROR_COMPLETE, SYNC_POLICIES.MIRROR_COMPLETE),
(SYNC_POLICIES.MIRROR_CONTENT_ONLY, SYNC_POLICIES.MIRROR_CONTENT_ONLY),
)

CR_PACKAGE_ATTRS = SimpleNamespace(
ARCH="arch",
CHANGELOGS="changelogs",
Expand Down
40 changes: 40 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Expand Up @@ -24,6 +24,7 @@
ALLOWED_CHECKSUM_ERROR_MSG,
CHECKSUM_CHOICES,
SKIP_TYPES,
SYNC_POLICY_CHOICES,
)
from pulp_rpm.app.models import (
RpmDistribution,
Expand Down Expand Up @@ -308,6 +309,29 @@ class RpmRepositorySyncURLSerializer(RepositorySyncURLSerializer):
Serializer for RPM Sync.
"""

mirror = serializers.BooleanField(
required=False,
allow_null=True,
help_text=_(
"DEPRECATED: If ``True``, ``sync_policy`` will default to 'mirror_complete' "
"instead of 'additive'."
),
)
sync_policy = serializers.ChoiceField(
help_text=_(
"Options: 'additive', 'mirror_complete', 'mirror_content_only'. Default: 'additive'. "
"Modifies how the sync is performed. 'mirror_complete' will clone the original "
"metadata and create an automatic publication from it, but comes with some "
"limitations and does not work for certain repositories. 'mirror_content_only' will "
"change the repository contents to match the remote but the metadata will be "
"regenerated and will not be bit-for-bit identical. 'additive' will retain the "
"existing contents of the repository and add the contents of the repository being "
"synced."
),
choices=SYNC_POLICY_CHOICES,
required=False,
allow_null=True,
)
skip_types = serializers.ListField(
help_text=_("List of content types to skip during sync."),
required=False,
Expand All @@ -318,6 +342,22 @@ class RpmRepositorySyncURLSerializer(RepositorySyncURLSerializer):
help_text=_("Whether or not to optimize sync."), required=False, default=True
)

def validate(self, data):
"""
Validate sync parameters.
"""
data = super().validate(data)

if "mirror" in data and "sync_policy" in data:
raise serializers.ValidationError(
_(
"Cannot use 'mirror' and 'sync_policy' options simultaneously. The 'mirror' "
"option is deprecated, please use 'sync_policy' only."
)
)

return data


class CopySerializer(serializers.Serializer):
"""
Expand Down
48 changes: 28 additions & 20 deletions pulp_rpm/app/tasks/synchronizing.py
Expand Up @@ -52,6 +52,7 @@
PACKAGE_REPODATA,
PULP_MODULE_ATTR,
PULP_MODULEDEFAULTS_ATTR,
SYNC_POLICIES,
UPDATE_REPODATA,
)
from pulp_rpm.app.models import (
Expand Down Expand Up @@ -340,24 +341,26 @@ def is_optimized_sync(repository, remote, url):
return is_optimized


def synchronize(remote_pk, repository_pk, mirror, skip_types, optimize):
def synchronize(remote_pk, repository_pk, sync_policy, skip_types, optimize):
"""
Sync content from the remote repository.
Create a new version of the repository that is synchronized with the remote.
If mirror=True, a publication will be created with a copy of the original metadata.
In this event, SRPMs and other types listed in "skip_types" will *not* be skipped.
If sync_policy=mirror_complete, a publication will be created with a copy of the original
metadata. This comes with some limitations, namely:
If mirror=True and the repository uses the xml:base / location_base feature, then
the sync will fail. This feature is incompatible with the intentions of most Pulp
users, as it will tell clients to look for metadata / packages from a source outside
of the repository.
* SRPMs and other types listed in "skip_types" will *not* be skipped.
* If the repository uses the xml:base / location_base feature, then the sync will fail.
This feature is incompatible with the intentions of most Pulp users, because the metadata
will tell clients to look for files at some source outside of the Pulp-hosted repo.
* If the repository uses Delta RPMs, the sync will fail, because Pulp does not support them,
and cannot change the repository metadata to remove them.
Args:
remote_pk (str): The remote PK.
repository_pk (str): The repository PK.
mirror (bool): Mirror mode.
sync_policy (str): How to perform the sync.
skip_types (list): List of content to skip.
optimize(bool): Optimize mode.
Expand Down Expand Up @@ -435,6 +438,9 @@ def get_treeinfo_data(remote, remote_url):

sub_repos = []

mirror = sync_policy.startswith("mirror")
mirror_metadata = sync_policy == SYNC_POLICIES.MIRROR_COMPLETE

if treeinfo:
treeinfo["repositories"] = {}
for repodata in set(treeinfo["download"]["repodatas"]):
Expand Down Expand Up @@ -464,12 +470,12 @@ def get_treeinfo_data(remote, remote_url):
remote,
sub_repo,
deferred_download,
mirror,
mirror_metadata,
skip_types=skip_types,
new_url=new_url,
namespace=directory,
)
dv = RpmDeclarativeVersion(first_stage=stage, repository=sub_repo)
dv = RpmDeclarativeVersion(first_stage=stage, repository=sub_repo, mirror=mirror)
subrepo_version = dv.create()
if subrepo_version:
sub_repo.last_sync_remote = remote
Expand All @@ -481,7 +487,7 @@ def get_treeinfo_data(remote, remote_url):
remote,
repository,
deferred_download,
mirror,
mirror_metadata,
skip_types=skip_types,
treeinfo=treeinfo,
new_url=remote_url,
Expand All @@ -492,7 +498,7 @@ def get_treeinfo_data(remote, remote_url):
repository.last_sync_remote = remote
repository.last_sync_repo_version = version.number
repository.save()
if mirror:
if mirror_metadata:
with RpmPublication.create(version, pass_through=False) as publication:
add_metadata_to_publication(publication, version)
for (name, subrepo_version) in sub_repos:
Expand Down Expand Up @@ -546,7 +552,7 @@ def __init__(
remote,
repository,
deferred_download,
mirror,
mirror_metadata,
skip_types=None,
new_url=None,
treeinfo=None,
Expand All @@ -558,8 +564,10 @@ def __init__(
Args:
remote (RpmRemote or UlnRemote): The remote data to be used when syncing
repository (RpmRepository): The repository to be compared when optimizing sync
deferred_download (bool): if True the downloading will not happen now. If False, it will
deferred_download (bool): If True the downloading will not happen now. If False, it will
happen immediately.
mirror_metadata (bool): Influences which metadata files are downloaded and what
is done with them.
Keyword Args:
skip_types (list): List of content to skip
Expand All @@ -573,7 +581,7 @@ def __init__(
self.remote = remote
self.repository = repository
self.deferred_download = deferred_download
self.mirror = mirror
self.mirror_metadata = mirror_metadata

# How many directories deep this repo is nested within another repo (if at all).
# Backwards relative paths that are shallower than this depth are permitted (in mirror
Expand Down Expand Up @@ -655,14 +663,14 @@ async def run_repomdrecord_download(name, location_href, downloader):
checksum_types[record.type] = record_checksum_type
record.checksum_type = record_checksum_type

if self.mirror:
if self.mirror_metadata:
uses_base_url = record.location_base
illegal_relative_path = self.is_illegal_relative_path(record.location_href)

if uses_base_url or illegal_relative_path or record.type == "prestodelta":
raise ValueError(MIRROR_INCOMPATIBLE_REPO_ERR_MSG)

if not self.mirror and record.type not in types_to_download:
if not self.mirror_metadata and record.type not in types_to_download:
continue

base_url = record.location_base or self.remote_url
Expand All @@ -688,7 +696,7 @@ async def run_repomdrecord_download(name, location_href, downloader):
except FileNotFoundError:
raise

if self.mirror:
if self.mirror_metadata:
# optional signature and key files for repomd metadata
for file_href in ["repodata/repomd.xml.asc", "repodata/repomd.xml.key"]:
try:
Expand Down Expand Up @@ -1041,15 +1049,15 @@ async def parse_packages(self, primary_xml, filelists_xml, other_xml, file_exten

with ProgressReport(**progress_data) as packages_pb:
# skip SRPM if defined
skip_srpms = "srpm" in self.skip_types and not self.mirror
skip_srpms = "srpm" in self.skip_types and not self.mirror_metadata

async def on_package(pkg):
"""Callback when handling a completed package.
Args:
pkg (createrepo_c.Package): A completed createrepo_c package.
"""
if self.mirror:
if self.mirror_metadata:
uses_base_url = pkg.location_base
illegal_relative_path = self.is_illegal_relative_path(pkg.location_href)

Expand Down
21 changes: 18 additions & 3 deletions pulp_rpm/app/viewsets.py
Expand Up @@ -25,6 +25,7 @@
)

from pulp_rpm.app import tasks
from pulp_rpm.app.constants import SYNC_POLICIES
from pulp_rpm.app.models import (
DistributionTree,
Modulemd,
Expand Down Expand Up @@ -129,17 +130,31 @@ def sync(self, request, pk):
serializer.is_valid(raise_exception=True)
remote = serializer.validated_data.get("remote", repository.remote)
mirror = serializer.validated_data.get("mirror")
sync_policy = serializer.validated_data.get("sync_policy")
skip_types = serializer.validated_data.get("skip_types")
optimize = serializer.validated_data.get("optimize")

if repository.retain_package_versions > 0 and mirror:
raise DRFValidationError("Cannot use 'retain_package_versions' with mirror-mode sync")
if not sync_policy:
sync_policy = SYNC_POLICIES.ADDITIVE if not mirror else SYNC_POLICIES.MIRROR_COMPLETE

# validate some invariants that involve repository-wide settings.
if sync_policy in (SYNC_POLICIES.MIRROR_COMPLETE, SYNC_POLICIES.MIRROR_CONTENT_ONLY):
err_msg = (
"Cannot use '{}' in combination with a 'mirror_complete' or "
"'mirror_content_only' sync policy."
)
if repository.retain_package_versions > 0:
raise DRFValidationError(err_msg.format("retain_package_versions"))
elif sync_policy == SYNC_POLICIES.MIRROR_COMPLETE:
err_msg = "Cannot use '{}' in combination with a 'mirror_complete' sync policy."
if repository.autopublish:
raise DRFValidationError(err_msg.format("autopublish"))

result = dispatch(
tasks.synchronize,
[repository, remote],
kwargs={
"mirror": mirror,
"sync_policy": sync_policy,
"remote_pk": str(remote.pk),
"repository_pk": str(repository.pk),
"skip_types": skip_types,
Expand Down

0 comments on commit 2803223

Please sign in to comment.