Skip to content

Commit

Permalink
Regenerate PULP_DISTRIBUTION.xml on publish if necessary
Browse files Browse the repository at this point in the history
The PULP_DISTRIBUTION.xml file used to be saved from an upstream
repository and republished without modification. This is problematic
because files referenced by that file are filtered out during a publish.
This commit is a short-term work-around to that problematic workflow.
Without it, Pulp (or anything else using PULP_DISTRIBUTION.xml) will
attempt to download files that don't exist in the published repository.

fixes #1843
  • Loading branch information
jeremycline committed Apr 19, 2016
1 parent 551f282 commit 9f97669
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 13 deletions.
55 changes: 49 additions & 6 deletions plugins/pulp_rpm/plugins/distributors/yum/publish.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import copy
from gettext import gettext as _
import os
import subprocess
from gettext import gettext as _
from xml.etree import cElementTree

import mongoengine
from pulp.common import dateutils
Expand Down Expand Up @@ -786,19 +787,61 @@ def _publish_distribution_files(self, distribution_unit):
logger.warning(msg)
return

distro_files = distribution_unit.files
# The files from `treeinfo` and `PULP_DISTRIBUTION.xml` are mashed into the
# files list on the unit. This is a hacky work-around to unmash them, filter
# out anything that is in the main repodata directory (which is potentially not
# safe, but was happening before I got here and we don't have time to fix this
# properly right now), and generate a new `PULP_DISTRIBUTION.xml` that doesn't
# reference files that don't exist in this publish.
pulp_distribution_file = False
distro_files = [f['relativepath'] for f in distribution_unit.files]
if constants.DISTRIBUTION_XML in distro_files:
distro_files.remove(constants.DISTRIBUTION_XML)
pulp_distribution_file = True
distro_files = filter(lambda f: not f.startswith('repodata/'), distro_files)
total_files = len(distro_files)
logger.debug("Found %s distribution files to symlink" % total_files)

source_path_dir = distribution_unit._storage_path
symlink_dir = self.get_working_dir()
for dfile in distro_files:
if dfile['relativepath'].startswith('repodata/'):
continue
source_path = os.path.join(source_path_dir, dfile['relativepath'])
symlink_path = os.path.join(symlink_dir, dfile['relativepath'])
source_path = os.path.join(source_path_dir, dfile)
symlink_path = os.path.join(symlink_dir, dfile)
plugin_misc.create_symlink(source_path, symlink_path)

# Not all repositories have this file so this is only done if the upstream repo
# had the file.
if pulp_distribution_file:
xml_file_path = os.path.join(source_path_dir, constants.DISTRIBUTION_XML)
self._write_pulp_distribution_file(distro_files, xml_file_path)

def _write_pulp_distribution_file(self, distro_files, old_xml_file_path):
"""
This method re-creates the `PULP_DISTRIBUTION.xml` file.
It only adds a file to the new `PULP_DISTRIBUTION.xml` if the file existed in the
old one fetched from the upstream repository. This is to stop it from including
files added to the Distribution from the treeinfo file. It's messy and it should
go away as soon as we re-work Distributions.
:param distro_files: A list of file paths pulled from a `Distribution` unit.
:type distro_files: list of str
:param old_xml_file_path: The absolute path to the old PULP_DISTRIBUTION.xml
from upstream. The files referenced in here act as
a filter for the new PULP_DISTRIBUTION.xml file.
"""
old_xml_tree = cElementTree.parse(old_xml_file_path)
old_xml_root = old_xml_tree.getroot()
old_files = [old_element.text for old_element in old_xml_root.iter('file')]
distro_files = filter(lambda f: f in old_files, distro_files)
new_xml_root = cElementTree.Element("pulp_distribution", {'version': '1'})
for distro_file in distro_files:
element = cElementTree.SubElement(new_xml_root, 'file')
element.text = distro_file

tree = cElementTree.ElementTree(new_xml_root)
tree.write(os.path.join(self.get_working_dir(), constants.DISTRIBUTION_XML))

def _publish_distribution_packages_link(self, distribution_unit):
"""
Create a Packages directory in the repo that is a sym link back to the root directory
Expand Down
13 changes: 8 additions & 5 deletions plugins/pulp_rpm/plugins/importers/yum/parse/treeinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def _run(self, tmp_dir):
return

# Process the distribution
dist_files = self.process_distribution(tmp_dir)
dist_files, pulp_dist_xml_path = self.process_distribution(tmp_dir)
files.extend(dist_files)

self.update_unit_files(unit, files)
Expand All @@ -184,9 +184,11 @@ def _run(self, tmp_dir):
# Update deferred downloading catalog
self.update_catalog_entries(unit, files)

# The treeinfo file is always imported into platform
# The treeinfo and PULP_DISTRIBTION.xml files are always imported into platform
# # storage regardless of the download policy
unit.safe_import_content(treeinfo_path, os.path.basename(treeinfo_path))
if pulp_dist_xml_path is not None:
unit.safe_import_content(pulp_dist_xml_path, os.path.basename(pulp_dist_xml_path))

# The downloaded files are imported into platform storage.
if downloaded:
Expand Down Expand Up @@ -403,8 +405,9 @@ def process_distribution(self, tmp_dir):
:param tmp_dir: The absolute path to the temporary directory
:type tmp_dir: str
:return: A list of file dictionaries
:rtype: list
:return: A tuple that contains the list of file dictionaries and the absolute path
to the distribution file, or None if no PULP_DISTRIBUTION.xml was found.
:rtype: (list, basestring)
"""
# Get the Distribution file
result = self.get_distribution_file(tmp_dir)
Expand Down Expand Up @@ -440,7 +443,7 @@ def process_distribution(self, tmp_dir):
CHECKSUM: None,
CHECKSUM_TYPE: None,
})
return files
return files, result

def get_distribution_file(self, tmp_dir):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ def test_parse_good_file(self, mock_get_dist):
tmp_dir = Mock()
parent = Mock()
dist = DistSync(parent, '')
files = dist.process_distribution(tmp_dir)
files, dist_file = dist.process_distribution(tmp_dir)

self.assertEqual(dist_file, DISTRIBUTION_GOOD_FILE)
self.assertEquals(3, len(files))
self.assertEquals('foo/bar.txt', files[0]['relativepath'])
self.assertEquals('baz/qux.txt', files[1]['relativepath'])
Expand All @@ -108,9 +109,10 @@ def test_no_distribution(self, mock_get_dist):
parent = Mock()
tmp_dir = Mock()
dist = DistSync(parent, '')
files = dist.process_distribution(tmp_dir)
files, dist_file = dist.process_distribution(tmp_dir)

self.assertEquals(0, len(files))
self.assertEqual(dist_file, None)

@patch('pulp_rpm.plugins.importers.yum.parse.treeinfo.DistSync.get_distribution_file',
return_value=DISTRIBUTION_BAD_SYNTAX_FILE)
Expand Down

0 comments on commit 9f97669

Please sign in to comment.