Skip to content

Commit

Permalink
Fix .treeinfo metadata being written improperly for Pulp users
Browse files Browse the repository at this point in the history
backports: #8622
https://pulp.plan.io/issues/8622

fixes #9218

(cherry picked from commit 00853cb)
  • Loading branch information
dralley committed Aug 11, 2021
1 parent 55340c7 commit 8602488
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 122 deletions.
2 changes: 2 additions & 0 deletions CHANGES/9218.bugfix
@@ -0,0 +1,2 @@
Fixed multiple bugs in distribution tree metadata generation regarding "variant" and "variants" metadata.
(backported from #8622)
3 changes: 2 additions & 1 deletion coverage.md
Expand Up @@ -26,7 +26,8 @@ This file contains list of features and their test coverage.
| As a user, I can re-sync custom repository metadata when it was the only change in a repository | YES | |
| As a user, I can sync an existing advisory whose dates are timestamps (as opposed to datetimes) | NO | Example: https://updates.suse.com/SUSE/Updates/SLE-SERVER/12-SP5/x86_64/update/ |
| As a user, I can sync repos with repomd.xml files whose filenames contain dashes (e.g., app-icons.tar.gz) | NO | Example: https://updates.suse.com/SUSE/Updates/SLE-SERVER/12-SP5/x86_64/update/ |
| As a user, the metadata being produced by Pulp (and being saved to Pulp) is correct | PART | Primary, Filelists and Other metadata is mostly covered. Updateinfo, Groups, and Distribution Tree metadata is not covered. "Weird" cases not covered. |
| As a user, the content metadata being saved to Pulp is correct | PART | Packages and distribution trees are covered. Updateinfo and Groups metadata is not covered. "Weird" cases not covered. |
| As a user, the metadata being produced by Pulp is correct | PART | Primary, Filelists, and Other metadata is covered, Distribution Tree (.treeinfo) metadata is covered, Updateinfo and Groups metadata is not covered. "Weird" cases not covered. |
| **Duplicates** | | |
| As a user, I have only one advisory with the same id in a repo version | YES | |
| As a user, I have only one module with the same NSVCA in a repo version | NO | |
Expand Down
9 changes: 9 additions & 0 deletions pulp_rpm/app/kickstart/treeinfo.py
Expand Up @@ -120,6 +120,15 @@ def parsed_sections(self):

return parser._sections

def rewrite_subrepo_paths(self, treeinfo_data):
"""Rewrite the variant repository paths to be local.
Ensure that the variant / sub-repo path is in a sub-directory.
"""
for variant in self.variants.get_variants():
variant.paths.repository = treeinfo_data.variants[variant.id]["repository"]
variant.paths.packages = treeinfo_data.variants[variant.id]["packages"]


class TreeinfoData:
"""
Expand Down
18 changes: 12 additions & 6 deletions pulp_rpm/app/tasks/publishing.py
Expand Up @@ -209,13 +209,19 @@ def handle_sub_repos(self, distribution_tree):
temp_file.flush()
treeinfo = PulpTreeInfo()
treeinfo.load(f=temp_file.name)
treeinfo_parsed = treeinfo.parsed_sections()
treeinfodata = TreeinfoData(treeinfo_parsed)
for variant in treeinfo.variants.get_variants():
variant.paths.repository = treeinfodata.variants[variant.id]["repository"]
variant.paths.packages = treeinfodata.variants[variant.id]["packages"]
treeinfo_data = TreeinfoData(treeinfo.parsed_sections())

# rewrite the treeinfo file such that the variant repository and package location
# is a relative subtree
treeinfo.rewrite_subrepo_paths(treeinfo_data)

# TODO: better way to do this?
main_variant = treeinfo.original_parser._sections.get("general", {}).get(
"variant", None
)
treeinfo_file = tempfile.NamedTemporaryFile()
treeinfo.dump(treeinfo_file.name)
treeinfo.dump(treeinfo_file.name, main_variant=main_variant)

PublishedMetadata.create_from_file(
relative_path=original_treeinfo_content_artifact.relative_path,
publication=self.publication,
Expand Down
46 changes: 32 additions & 14 deletions pulp_rpm/app/tasks/synchronizing.py
Expand Up @@ -103,16 +103,16 @@
pkgid_to_location_href = collections.defaultdict(dict)


def store_metadata_for_mirroring(repo, dl_result, relative_path):
def store_metadata_for_mirroring(repo, md_path, relative_path):
"""Used to store data about the downloaded metadata for mirror-publishing after the sync.
Args:
repo: Which repository the metadata is associated with
dl_result: The DownloadResult from downloading the metadata
md_path: The path to the metadata file
relative_path: The relative path to the metadata file within the repository
"""
global metadata_files_for_mirroring
metadata_files_for_mirroring[str(repo.pk)][relative_path] = dl_result
metadata_files_for_mirroring[str(repo.pk)][relative_path] = md_path


def store_package_for_mirroring(repo, pkgid, location_href):
Expand Down Expand Up @@ -149,9 +149,9 @@ def add_metadata_to_publication(publication, version, prefix=""):
publication.repo_gpgcheck = has_repomd_signature
publication.sqlite_metadata = has_sqlite

for (relative_path, result) in repo_metadata_files.items():
for (relative_path, metadata_file_path) in repo_metadata_files.items():
PublishedMetadata.create_from_file(
file=File(open(result.path, "rb")),
file=File(open(metadata_file_path, "rb")),
relative_path=os.path.join(prefix, relative_path),
publication=publication,
)
Expand Down Expand Up @@ -344,17 +344,33 @@ def get_treeinfo_data(remote, remote_url):

try:
result = downloader.fetch()
store_metadata_for_mirroring(repository, result, namespace)
except FileNotFoundError:
continue

treeinfo = PulpTreeInfo()
treeinfo.load(f=result.path)
treeinfo_parsed = treeinfo.parsed_sections()
sha256 = result.artifact_attributes["sha256"]
treeinfo_serialized = TreeinfoData(treeinfo_parsed).to_dict(
hash=sha256, filename=namespace
treeinfo_data = TreeinfoData(treeinfo.parsed_sections())

# import pydevd_pycharm
# pydevd_pycharm.settrace(
# "localhost", port=12735, stdoutToServer=True, stderrToServer=True
# )

# get the data we need before changing the original
treeinfo_serialized = treeinfo_data.to_dict(hash=sha256, filename=namespace)

# rewrite the treeinfo file such that the variant repository and package location
# is a relative subtree
treeinfo.rewrite_subrepo_paths(treeinfo_data)

# TODO: better way to do this?
main_variant = treeinfo.original_parser._sections.get("general", {}).get(
"variant", None
)
treeinfo_file = tempfile.NamedTemporaryFile(delete=False)
treeinfo.dump(treeinfo_file.name, main_variant=main_variant)
store_metadata_for_mirroring(repository, treeinfo_file.name, namespace)
break

return treeinfo_serialized
Expand Down Expand Up @@ -607,7 +623,7 @@ async def run(self):
url=urlpath_sanitize(self.remote_url, "repodata/repomd.xml")
)
result = await downloader.run()
store_metadata_for_mirroring(self.repository, result, "repodata/repomd.xml")
store_metadata_for_mirroring(self.repository, result.path, "repodata/repomd.xml")
metadata_pb.increment()

repomd_path = result.path
Expand Down Expand Up @@ -657,7 +673,7 @@ async def run_repomdrecord_download(name, location_href, downloader):
try:
for future in asyncio.as_completed(list(repomd_downloaders.values())):
name, location_href, result = await future
store_metadata_for_mirroring(self.repository, result, location_href)
store_metadata_for_mirroring(self.repository, result.path, location_href)
repomd_files[name] = result
metadata_pb.increment()
except ClientResponseError as exc:
Expand All @@ -673,7 +689,7 @@ async def run_repomdrecord_download(name, location_href, downloader):
url=urlpath_sanitize(self.remote_url, file_href)
)
result = await downloader.run()
store_metadata_for_mirroring(self.repository, result, file_href)
store_metadata_for_mirroring(self.repository, result.path, file_href)
metadata_pb.increment()
except (ClientResponseError, FileNotFoundError):
pass
Expand All @@ -684,7 +700,9 @@ async def run_repomdrecord_download(name, location_href, downloader):
url=urlpath_sanitize(self.remote_url, "extra_files.json")
)
result = await downloader.run()
store_metadata_for_mirroring(self.repository, result, "extra_files.json")
store_metadata_for_mirroring(
self.repository, result.path, "extra_files.json"
)
metadata_pb.increment()
except (ClientResponseError, FileNotFoundError):
pass
Expand All @@ -705,7 +723,7 @@ async def run_repomdrecord_download(name, location_href, downloader):
)
result = await downloader.run()
store_metadata_for_mirroring(
self.repository, result, data["file"]
self.repository, result.path, data["file"]
)
metadata_pb.increment()
except ClientResponseError as exc:
Expand Down
191 changes: 117 additions & 74 deletions pulp_rpm/tests/functional/api/test_publish.py
Expand Up @@ -23,7 +23,6 @@
from pulp_rpm.tests.functional.constants import (
RPM_ALT_LAYOUT_FIXTURE_URL,
RPM_COMPLEX_FIXTURE_URL,
RPM_COMPLEX_PACKAGE_DATA,
RPM_FIXTURE_SUMMARY,
RPM_KICKSTART_FIXTURE_URL,
RPM_KICKSTART_REPOSITORY_ROOT_CONTENT,
Expand Down Expand Up @@ -185,78 +184,6 @@ def do_test(self, url):
self.assertIsNotNone(publication_href)


class DistributionTreeMetadataTestCase(PulpTestCase):
"""Publish repository and validate the metadata is the same.
This Test does the following:
1. Create a rpm repo and a remote.
2. Sync the repo with the remote.
3. Publish and distribute the repo.
4. Download the metadata of both the original and Pulp-created repos.
5. Convert it into a canonical form (XML -> JSON -> Dict -> apply a sorting order).
6. Compare the metadata.
"""

@classmethod
def setUpClass(cls):
"""Create class-wide variables."""
cls.cfg = config.get_config()
cls.client = gen_rpm_client()
cls.repo_api = RepositoriesRpmApi(cls.client)
cls.remote_api = RemotesRpmApi(cls.client)
cls.publications = PublicationsRpmApi(cls.client)
cls.distributions = DistributionsRpmApi(cls.client)

def test_complex_repo(self):
"""Test the "complex" fixture that covers more of the metadata cases.
The standard fixtures have no changelogs and don't cover "ghost" files. The repo
with the "complex-package" does, and also does a better job of covering rich deps
and other atypical metadata.
"""
delete_orphans()
self.do_test(RPM_KICKSTART_FIXTURE_URL)

def do_test(self, repo_url):
"""Sync and publish an RPM repository and verify the metadata is what was expected."""
# 1. create repo and remote
repo = self.repo_api.create(gen_repo())
self.addCleanup(self.repo_api.delete, repo.pulp_href)

body = gen_rpm_remote(repo_url, policy="on_demand")
remote = self.remote_api.create(body)
self.addCleanup(self.remote_api.delete, remote.pulp_href)

# 2. Sync it
repository_sync_data = RpmRepositorySyncURL(remote=remote.pulp_href)
sync_response = self.repo_api.sync(repo.pulp_href, repository_sync_data)
monitor_task(sync_response.task)

# 3. Publish and distribute
publish_data = RpmRpmPublication(repository=repo.pulp_href)
publish_response = self.publications.create(publish_data)
created_resources = monitor_task(publish_response.task).created_resources
publication_href = created_resources[0]
self.addCleanup(self.publications.delete, publication_href)

body = gen_distribution()
body["publication"] = publication_href
distribution_response = self.distributions.create(body)
created_resources = monitor_task(distribution_response.task).created_resources
distribution = self.distributions.read(created_resources[0])
self.addCleanup(self.distributions.delete, distribution.pulp_href)

# 4. Download and parse the metadata.
original_treeinfo = ElementTree.fromstring(
http_get(os.path.join(repo_url, ".treeinfo"))
)

reproduced_treeinfo = ElementTree.fromstring(
http_get(os.path.join(distribution.base_url, ".treeinfo"))
)


# TODO: check the updateinfo metadata to confirm the references are there.
class SyncPublishReferencesUpdateTestCase(PulpTestCase):
"""Sync/publish a repo that ``updateinfo.xml`` contains references."""
Expand Down Expand Up @@ -388,7 +315,7 @@ def _get_updateinfo_xml_path(root_elem):
return data_elems[0].find(xpath).get("href")


class MetadataTestCase(PulpTestCase):
class CoreMetadataTestCase(PulpTestCase):
"""Publish repository and validate the metadata is the same.
This Test does the following:
Expand Down Expand Up @@ -565,6 +492,122 @@ def compare_metadata_file(self, original_metadata_text, generated_metadata_text,
self.assertListEqual(list(diff), [], list(diff))


class DistributionTreeMetadataTestCase(PulpTestCase):
"""Publish repository and validate the distribution tree metadata is correct.
This test does the following:
1. Create a rpm repo and a remote.
2. Sync the repo with the remote.
3. Publish and distribute the repo.
4. Download the treeinfo metadata of both the original and Pulp-created repos.
5. Sort fields so that it is directly comparable.
6. Compare the metadata, ignoring the differences that are expected, such as path rewriting.
"""

@classmethod
def setUpClass(cls):
"""Create class-wide variables."""
cls.cfg = config.get_config()
cls.client = gen_rpm_client()
cls.repo_api = RepositoriesRpmApi(cls.client)
cls.remote_api = RemotesRpmApi(cls.client)
cls.publications = PublicationsRpmApi(cls.client)
cls.distributions = DistributionsRpmApi(cls.client)

def test_mirror_publish(self):
"""Test the "complex" fixture that covers more of the metadata cases.
The standard fixtures have no changelogs and don't cover "ghost" files. The repo
with the "complex-package" does, and also does a better job of covering rich deps
and other atypical metadata.
"""
delete_orphans()
self.do_test(True)

def test_standard_publish(self):
"""Test the "complex" fixture that covers more of the metadata cases.
The standard fixtures have no changelogs and don't cover "ghost" files. The repo
with the "complex-package" does, and also does a better job of covering rich deps
and other atypical metadata.
"""
delete_orphans()
self.do_test(False)

def do_test(self, mirror):
"""Sync and publish an RPM repository and verify the metadata is what was expected."""
from configparser import ConfigParser

# 1. create repo and remote
repo = self.repo_api.create(gen_repo())
self.addCleanup(self.repo_api.delete, repo.pulp_href)

body = gen_rpm_remote(RPM_KICKSTART_FIXTURE_URL, policy="on_demand")
remote = self.remote_api.create(body)
self.addCleanup(self.remote_api.delete, remote.pulp_href)

# 2. Sync it
repository_sync_data = RpmRepositorySyncURL(remote=remote.pulp_href, mirror=mirror)
sync_response = self.repo_api.sync(repo.pulp_href, repository_sync_data)
monitor_task(sync_response.task)

# 3. Publish and distribute
publish_data = RpmRpmPublication(repository=repo.pulp_href)
publish_response = self.publications.create(publish_data)
created_resources = monitor_task(publish_response.task).created_resources
publication_href = created_resources[0]
self.addCleanup(self.publications.delete, publication_href)

body = gen_distribution()
body["publication"] = publication_href
distribution_response = self.distributions.create(body)
created_resources = monitor_task(distribution_response.task).created_resources
distribution = self.distributions.read(created_resources[0])
self.addCleanup(self.distributions.delete, distribution.pulp_href)

# 4. Download and parse the metadata.
original_treeinfo = http_get(os.path.join(RPM_KICKSTART_FIXTURE_URL, ".treeinfo"))
generated_treeinfo = http_get(os.path.join(distribution.base_url, ".treeinfo"))

config = ConfigParser()
config.read_string(original_treeinfo.decode("utf-8"))
original_treeinfo = config._sections

config = ConfigParser()
config.read_string(generated_treeinfo.decode("utf-8"))
generated_treeinfo = config._sections

# 5. Re-arrange the metadata so that it can be compared.
# TODO: These really should be in the same order they were in originally.
# https://pulp.plan.io/issues/9208
for metadata_dict in [original_treeinfo, generated_treeinfo]:
metadata_dict["general"]["variants"] = ",".join(
sorted(metadata_dict["general"]["variants"].split(","))
)
metadata_dict["tree"]["variants"] = ",".join(
sorted(metadata_dict["tree"]["variants"].split(","))
)

diff = dictdiffer.diff(original_treeinfo, generated_treeinfo)
differences = []

# skip any differences that are "correct" i.e. rewritten "repository" and "packages" paths
for d in diff:
(diff_type, diff_name, _, new_value) = (d[0], d[1], d[2][0], d[2][1])
# ('change', 'variant-Land.packages', ('Packages', 'Land/Packages'))
if diff_type == "change":
if diff_name.endswith(".packages") or diff_name.endswith(".repository"):
# TODO: this is ignoring problems with the generated metadata
# https://pulp.plan.io/issues/9208
if "../" not in new_value:
continue

differences.append(d)

self.assertListEqual(differences, [], differences)


class ChecksumTypeTestCase(PulpTestCase):
"""Publish repository and validate the updateinfo.
Expand Down

0 comments on commit 8602488

Please sign in to comment.