Skip to content

Commit

Permalink
Make "is this version valid" checks more lenient.
Browse files Browse the repository at this point in the history
Handles the case of "same name, multiple locations, diff content".

fixes #7208
  • Loading branch information
ggainey committed Jul 28, 2021
1 parent 7673cab commit 0152ff4
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGES/7208.bugfix
@@ -0,0 +1 @@
Taught pulp_rpm to be more lenient in the face of non-standard repos.
29 changes: 27 additions & 2 deletions pulp_rpm/app/models/repository.py
Expand Up @@ -21,7 +21,11 @@
Distribution,
Task,
)
from pulpcore.plugin.repo_version_utils import remove_duplicates, validate_repo_version
from pulpcore.plugin.repo_version_utils import (
remove_duplicates,
validate_duplicate_content,
validate_version_paths,
)

from pulp_rpm.app.constants import CHECKSUM_CHOICES
from pulp_rpm.app.models import (
Expand Down Expand Up @@ -344,7 +348,28 @@ def finalize_new_version(self, new_version):
from pulp_rpm.app.advisory import resolve_advisories # avoid circular import

resolve_advisories(new_version, previous_version)
validate_repo_version(new_version)

#
# Some repositories are odd. A given NEVRA with different checksums can appear at
# different locations in the repo, or a single Artifact can be referenced by more than one
# name.
#
# validate_duplicate_content() takes repo-keys into account - so same-NEVRA, diff-location
# passes the test.
#
# The validate_version_paths() test checks for different-nevras, but same relative-path,
# and raises an exception. Because of these odd repositories, this can't be fatal - so
# we warn about it, but continue. At publish, we will have to pick one.
validate_duplicate_content(new_version)
try:
validate_version_paths(new_version)
except ValueError as ve:
log.warning(
_(
"New version of repository {repo} reports duplicate/overlap errors : "
"{value_errors}"
).format(repo=new_version.repository.name, value_errors=str(ve))
)

def _apply_retention_policy(self, new_version):
"""Apply the repository's "retain_package_versions" settings to the new version.
Expand Down
34 changes: 33 additions & 1 deletion pulp_rpm/app/tasks/publishing.py
Expand Up @@ -117,18 +117,51 @@ def publish_artifacts(self, content, prefix=""):
content__pulp_type=Package.get_pulp_type()
)

paths = set()
duplicated_paths = []
for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator():
relative_path = content_artifact["relative_path"]
relative_path = os.path.join(
prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path
)
#
# Some Suboptimal Repos have the 'same' artifact living in multiple places.
# Specifically, the same NEVRA, in more than once place, **with different checksums**
# (since if all that was different was location_href there would be only one
# ContentArtifact in the first place).
#
# pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM
# "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm".
# Because the assumption is that Packages don't "own" their path, only the filename
# is kept as relative_path.
#
# In this case, we have to pick one - which is essentially what the rest of the RPM
# Ecosystem does when faced with the impossible. This code takes the first-found. We
# could implement something more complicated, if there are better options
# (choose by last-created maybe?)
#
# Note that this only impacts user-created publications, which produce the "standard"
# RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains
# whatever layout their "upstream" repo-metadata dictates.
#
if relative_path in paths:
duplicated_paths.append(f'{relative_path}:{content_artifact["pk"]}')
continue
else:
paths.add(relative_path)
published_artifacts.append(
PublishedArtifact(
relative_path=relative_path,
publication=self.publication,
content_artifact_id=content_artifact["pk"],
)
)
if duplicated_paths:
log.warning(
_("Duplicate paths found at publish : {problems} ").format(
problems="; ".join(duplicated_paths)
)
)

# Handle everything else
is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
Expand Down Expand Up @@ -301,7 +334,6 @@ def publish(
version=repository_version.number,
)
)

with tempfile.TemporaryDirectory("."):
with RpmPublication.create(repository_version) as publication:
publication.metadata_checksum_type = get_checksum_type("primary", checksum_types)
Expand Down
39 changes: 0 additions & 39 deletions pulp_rpm/tests/functional/api/test_crud_content_unit.py
Expand Up @@ -20,7 +20,6 @@
RPM_PACKAGE_FILENAME,
RPM_PACKAGE_FILENAME2,
RPM_SIGNED_URL,
RPM_SIGNED_URL2,
)
from pulp_rpm.tests.functional.utils import set_up_module as setUpModule # noqa:F401

Expand Down Expand Up @@ -132,44 +131,6 @@ def test_05_duplicate_raise_error(self):
msg = "There is already a package with"
self.assertTrue(msg in task_result["error"]["description"])

def test_06_second_unit_raises_error(self):
"""
Create a duplicate content unit with different ``artifacts`` and same ``repo_key_fields``.
"""
delete_orphans()
client = gen_rpm_client()
repo_api = RepositoriesRpmApi(client)

repo = repo_api.create(gen_repo())
self.addCleanup(repo_api.delete, repo.pulp_href)

artifact = gen_artifact()

# create first content unit.
content_attrs = {"artifact": artifact["pulp_href"], "relative_path": "test_package"}
response = self.rpm_content_api.create(**content_attrs)
monitor_task(response.task)

artifact = gen_artifact(url=RPM_SIGNED_URL2)

# create second content unit.
second_content_attrs = {"artifact": artifact["pulp_href"]}
second_content_attrs["relative_path"] = content_attrs["relative_path"]
response = self.rpm_content_api.create(**second_content_attrs)
monitor_task(response.task)

data = {"add_content_units": [c.pulp_href for c in self.rpm_content_api.list().results]}
response = repo_api.modify(repo.pulp_href, data)
with self.assertRaises(PulpTaskError) as cm:
monitor_task(response.task)
task = cm.exception.task.to_dict()

error_message = "Cannot create repository version. Path is duplicated: {}.".format(
content_attrs["relative_path"]
)

self.assertEqual(task["error"]["description"], error_message)


class ContentUnitRemoveTestCase(PulpTestCase):
"""
Expand Down
17 changes: 17 additions & 0 deletions pulp_rpm/tests/functional/api/test_sync.py
Expand Up @@ -23,6 +23,7 @@
from pulp_smash.utils import get_pulp_setting

from pulp_rpm.tests.functional.constants import (
CENTOS7_OPSTOOLS,
PULP_TYPE_ADVISORY,
PULP_TYPE_MODULEMD,
PULP_TYPE_PACKAGE,
Expand Down Expand Up @@ -1240,6 +1241,22 @@ def test_sync_packages_with_unsupported_checksum_type(self):
ctx.exception.task.error["description"],
)

def test_one_nevra_two_locations_and_checksums(self):
"""Sync a repository known to have one nevra, in two locations, with different content.
While 'odd', this is a real-world occurrence.
"""
repo = self.repo_api.create(gen_repo())
self.addCleanup(self.repo_api.delete, repo.pulp_href)

body = gen_rpm_remote(url=CENTOS7_OPSTOOLS, policy="on_demand")
remote = self.remote_api.create(body)
self.addCleanup(self.remote_api.delete, remote.pulp_href)

repository_sync_data = RpmRepositorySyncURL(remote=remote.pulp_href)
sync_response = self.repo_api.sync(repo.pulp_href, repository_sync_data)
monitor_task(sync_response.task)

@skip_if(bool, "md5_allowed", True)
def test_sync_metadata_with_unsupported_checksum_type(self):
"""
Expand Down
3 changes: 3 additions & 0 deletions pulp_rpm/tests/functional/constants.py
Expand Up @@ -483,12 +483,15 @@

RPM_MD5_REPO_FIXTURE_URL = urljoin(PULP_FIXTURES_BASE_URL, "rpm-with-md5/")

RPM_DIFF_NAME_SAME_CONTENT_URL = urljoin(PULP_FIXTURES_BASE_URL, "diff-name-same-content/")

CENTOS6_URL = "http://mirror.centos.org/centos-6/6.10/os/x86_64/"
CENTOS7_URL = "http://mirror.centos.org/centos-7/7/os/x86_64/"
CENTOS8_KICKSTART_APP_URL = "http://mirror.centos.org/centos-8/8/AppStream/x86_64/kickstart/"
CENTOS8_KICKSTART_BASEOS_URL = "http://mirror.centos.org/centos-8/8/BaseOS/x86_64/kickstart/"
CENTOS8_APPSTREAM_URL = "http://mirror.centos.org/centos-8/8/AppStream/x86_64/os/"
CENTOS8_BASEOS_URL = "http://mirror.centos.org/centos-8/8/BaseOS/x86_64/os/"
CENTOS7_OPSTOOLS = "http://ftp.cs.stanford.edu/centos/7/opstools/x86_64/"
EPEL7_URL = "https://dl.fedoraproject.org/pub/epel/7/x86_64/"
RPM_CDN_APPSTREAM_URL = "https://cdn.redhat.com/content/dist/rhel8/8.2/x86_64/appstream/os/"
RPM_CDN_BASEOS_URL = "https://cdn.redhat.com/content/dist/rhel8/8.2/x86_64/baseos/os/"
Expand Down

0 comments on commit 0152ff4

Please sign in to comment.