Skip to content

Commit

Permalink
published updateinfo only contains units in repo
Browse files Browse the repository at this point in the history
Errata units in Pulp contain all units in all repos that are linked to
errata with the same id, which was resulting in published errata
referencing packages that weren't actually available in the published
repo. This limits packages in published errata updateinfo XML to only
the packages that are contained in the published repo.

fixes #1366
https://pulp.plan.io/issues/1366

fixes #1548
https://pulp.plan.io/issues/1548
  • Loading branch information
Sean Myers committed Mar 14, 2016
1 parent b78c1c0 commit 73d67dd
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 3 deletions.
74 changes: 73 additions & 1 deletion plugins/pulp_rpm/plugins/distributors/yum/metadata/updateinfo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
from xml.etree import ElementTree

import mongoengine
from pulp.plugins.util.metadata_writer import XmlFileContext
from pulp.server.db.model import RepositoryContentUnit

from pulp_rpm.plugins.distributors.yum.metadata.metadata import REPO_DATA_DIR_NAME
from pulp_rpm.plugins.db import models
from pulp_rpm.yum_plugin import util


Expand All @@ -13,12 +16,65 @@


class UpdateinfoXMLFileContext(XmlFileContext):
def __init__(self, working_dir, checksum_type=None):
def __init__(self, working_dir, checksum_type=None, conduit=None):
metadata_file_path = os.path.join(working_dir, REPO_DATA_DIR_NAME,
UPDATE_INFO_XML_FILE_NAME)
self.conduit = conduit
super(UpdateinfoXMLFileContext, self).__init__(
metadata_file_path, 'updates', checksum_type=checksum_type)

def _repo_unit_nevra(self, erratum_unit, repo_id):
"""
Return a list of NEVRA dicts for units in a single repo referenced by the given errata.
Pulp errata units combine the known packages from all synced repos. Given an errata unit
and a repo, return a list of NEVRA dicts that can be used to filter out packages not
linked to that repo when generating a repo's updateinfo XML file. While returning that
list of NEVRA dicts is the main goal, doing so quickly and without running out of memory
is what makes this a little bit tricky.
Build up a super-fancy query to get the unit ids for all NEVRA seen in these errata
check repo/unit associations for this errata to limit the packages in the published
updateinfo to the units in the repo being currently published.
:param erratum_unit: The erratum unit that should be written to updateinfo.xml.
:type erratum_unit: pulp_rpm.plugins.db.models.Errata
:param repo_id: The repo_id of a pulp repository in which to find units
:type repo_id: str
:return: a list of NEVRA dicts for units in a single repo referenced by the given errata
:rtype: list
"""
nevra_fields = ('name', 'epoch', 'version', 'release', 'arch')
nevra_q = mongoengine.Q()
for pkglist in erratum_unit.pkglist:
for pkg in pkglist['packages']:
pkg_nevra = dict((field, pkg[field]) for field in nevra_fields)
nevra_q |= mongoengine.Q(**pkg_nevra)

# Aim the super-fancy query at mongo to get the units that this errata refers to
# The scaler method on the end returns a list of tuples to try to save some memory
# and also cut down on mongoengine model instance hydration costs.
nevra_units = models.RPM.objects.filter(nevra_q).scalar('id', *nevra_fields)

# Split up the nevra unit entries into a mapping of the unit id to its nevra fields
nevra_unit_map = dict((nevra_unit[0], nevra_unit[1:]) for nevra_unit in nevra_units)

# Get all of the unit ids from this errata that are associated with the current repo.
# Cast this as a set for speedier lookups when iterating of the nevra unit map.
repo_unit_ids = set(RepositoryContentUnit.objects.filter(
unit_id__in=nevra_unit_map.keys(), repo_id=repo_id).scalar('unit_id'))

# Finally(!), intersect the repo unit ids with the unit nevra ids to
# create a list of nevra dicts that can be easily compared to the
# errata package nevra and exclude unrelated packages
repo_unit_nevra = []
for nevra_unit_id, nevra_field_values in nevra_unit_map.items():
# based on the args to scalar when nevra_units was created:
if nevra_unit_id in repo_unit_ids:
repo_unit_nevra.append(dict(zip(nevra_fields, nevra_field_values)))

return repo_unit_nevra

def add_unit_metadata(self, item):
"""
Write the XML representation of erratum_unit to self.metadata_file_handle
Expand Down Expand Up @@ -80,6 +136,12 @@ def add_unit_metadata(self, item):
'href': reference['href']}
ElementTree.SubElement(references_element, 'reference', reference_attributes)

# If we can pull a repo_id off the conduit, use that to generate repo-specific nevra
if self.conduit and hasattr(self.conduit, 'repo_id'):
repo_unit_nevra = self._repo_unit_nevra(erratum_unit, self.conduit.repo_id)
else:
repo_unit_nevra = None

for pkglist in erratum_unit.pkglist:

pkglist_element = ElementTree.SubElement(update_element, 'pkglist')
Expand All @@ -102,6 +164,16 @@ def add_unit_metadata(self, item):
'epoch': package['epoch'] or '0',
'arch': package['arch'],
'src': package.get('src', '') or ''}

if repo_unit_nevra is not None:
# If repo_unit_nevra can be used for comparison, take the src attr out of a
# copy of this package's attrs to get a nevra dict for comparison
package_nevra = package_attributes.copy()
del(package_nevra['src'])
if package_nevra not in repo_unit_nevra:
# current package not in the specified repo, don't add it to the output
continue

package_element = ElementTree.SubElement(collection_element, 'package',
package_attributes)

Expand Down
3 changes: 2 additions & 1 deletion plugins/pulp_rpm/plugins/distributors/yum/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,8 @@ def initialize(self):
one that is built into the UpdateinfoXMLFileContext
"""
checksum_type = self.parent.get_checksum_type()
self.context = UpdateinfoXMLFileContext(self.get_working_dir(), checksum_type)
self.context = UpdateinfoXMLFileContext(self.get_working_dir(), checksum_type,
self.get_conduit())
self.context.initialize()
# set the self.process_unit method to the corresponding method on the
# UpdateInfoXMLFileContext as there is no other processing to be done for each unit.
Expand Down
111 changes: 110 additions & 1 deletion plugins/test/unit/plugins/distributors/yum/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import unittest
from xml.etree import cElementTree as et

from mock import patch
from mock import Mock, patch
from mongoengine.queryset.visitor import QCombination
from pulp.plugins.model import Unit

from pulp_rpm.common.ids import TYPE_ID_RPM
Expand Down Expand Up @@ -396,6 +397,114 @@ def test_updateinfo_unit_metadata(self):
self.assertEqual(content.count('<package'), 2)
self.assertEqual(content.count('<sum type="md5">f3c197a29d9b66c5b65c5d62b25db5b4</sum>'), 1)

@patch.object(UpdateinfoXMLFileContext, '_repo_unit_nevra')
def test_updateinfo_unit_metadata_with_repo(self, repo_unit_nevra):

path = os.path.join(self.metadata_file_dir,
REPO_DATA_DIR_NAME,
UPDATE_INFO_XML_FILE_NAME)

handle = open(os.path.join(DATA_DIR, 'updateinfo.xml'), 'r')
generator = packages.package_list_generator(handle, 'update',
updateinfo.process_package_element)

# mock out the repo/unit nevra matcher so that only one unit in the referenced errata
# is included in the output updateinfo XML
repo_unit_nevra.return_value = [
{'name': 'patb', 'epoch': '0', 'version': '0.1',
'release': '2', 'arch': 'x86_64'},
]

erratum_unit = next(generator)

# just checking
self.assertEqual(erratum_unit.unit_key['errata_id'], 'RHEA-2010:9999')

mock_conduit = Mock()
mock_conduit.repo_id = 'mock_conduit_repo'
context = UpdateinfoXMLFileContext(self.metadata_file_dir, conduit=mock_conduit)
context._open_metadata_file_handle()
context.add_unit_metadata(erratum_unit)
context._close_metadata_file_handle()

self.assertNotEqual(os.path.getsize(path), 0)

updateinfo_handle = gzip.open(path, 'r')
content = updateinfo_handle.read()
updateinfo_handle.close()

self.assertEqual(content.count('from="enhancements@redhat.com"'), 1)
self.assertEqual(content.count('status="final"'), 1)
self.assertEqual(content.count('type="enhancements"'), 1)
self.assertEqual(content.count('version="1"'), 1)
self.assertEqual(content.count('<id>RHEA-2010:9999</id>'), 1)
self.assertEqual(content.count('<collection short="F13PTP">'), 1)
self.assertEqual(content.count('<package'), 1)
self.assertEqual(content.count('<sum type="md5">f3c197a29d9b66c5b65c5d62b25db5b4</sum>'), 1)

@patch('pulp_rpm.plugins.db.models.RPM')
def test_updateinfo_repo_unit_nevra_q_filter(self, mock_rpm):
# A mongoengine "QCombination" object is used to efficiently search for units
# by nevra. This checks that the QCombination object is properly created based
# on the errata unit parsed from the test updateinfo XML.
with open(os.path.join(DATA_DIR, 'updateinfo.xml'), 'r') as handle:
generator = packages.package_list_generator(
handle, 'update', updateinfo.process_package_element)
erratum_unit = next(generator)

context = UpdateinfoXMLFileContext(self.metadata_file_dir)
context._repo_unit_nevra(erratum_unit, 'mock_repo')

# Call 0 to mock_rpm's filter should have one arg, which should be the QCombination
# object that is built with an OR operator, with two children (one for each package
# in the errata unit that was passed to the method under test.
qcombination = mock_rpm.objects.filter.call_args_list[0][0][0]
self.assertTrue(isinstance(qcombination, QCombination))
self.assertEqual(qcombination.operation, qcombination.OR)
self.assertEqual(len(qcombination.children), 2)

@patch('pulp_rpm.plugins.db.models.RPM')
@patch('pulp_rpm.plugins.distributors.yum.metadata.updateinfo.RepositoryContentUnit')
def test_updateinfo_repo_unit_nevra_return(self, mock_rcu, mock_rpm):
# Build up the mock data as well as the expected returns
nevra_fields = ('name', 'epoch', 'version', 'release', 'arch')
unit1_nevra = ('n1', 'e1', 'v1', 'r1', 'a1')
unit1_nevra_dict = dict(zip(nevra_fields, unit1_nevra))
unit2_nevra = ('n2', 'e2', 'v2', 'r2', 'a2')
unit2_nevra_dict = dict(zip(nevra_fields, unit2_nevra))

# This is the result to the query for all units with a given nevra
# The expected value is a list of tuples containing unit ids and nevra fields;
mock_rpm.objects.filter().scalar.return_value = [
('id1',) + unit1_nevra,
('id2',) + unit2_nevra,
]
# The expected value here is a list of unit IDs from the previous query that are
# associated with our mock repo.
mock_rcu.objects.filter().scalar.return_value = ['id1']

# Load the updateinfo XML to get an erratum unit to process
with open(os.path.join(DATA_DIR, 'updateinfo.xml'), 'r') as handle:
generator = packages.package_list_generator(
handle, 'update', updateinfo.process_package_element)
erratum_unit = next(generator)

context = UpdateinfoXMLFileContext(self.metadata_file_dir)
repo_unit_nevra = context._repo_unit_nevra(erratum_unit, 'mock_repo')

# Call 0 created the scalar mock, so we're interested in call 1. In this case, check
# that filter was called at least once with the expected filter kwargs and values.
mock_rcu.objects.filter.assert_any_call(unit_id__in=['id2', 'id1'], repo_id='mock_repo')

# And finally, make sure the return value is actually good!
# We made the RPM mock simulate two units known to pulp with the nevra seen in our errata.
# Then, we made the RepositoryContentUnit mock simulate that only one of those units is
# associated with the passed-in repo. The return value should be a list with only the
# single matching unit's nevra dict in it.
self.assertEqual(len(repo_unit_nevra), 1)
self.assertTrue(unit1_nevra_dict in repo_unit_nevra)
self.assertTrue(unit2_nevra_dict not in repo_unit_nevra)

# -- prestodelta.xml testing -----------------------------------------------

@skip_broken
Expand Down

0 comments on commit 73d67dd

Please sign in to comment.