Skip to content

Commit

Permalink
Use aggregation to identify unique errata pkglists
Browse files Browse the repository at this point in the history
To improve both performance and memory consumption of celery
workers during applicability regeneration.
Serializer for Errata now deals wit unique pkglists as well.

closes #3172
https://pulp.plan.io/issues/3172

(cherry picked from commit 6a40090)
  • Loading branch information
goosemania authored and pcreech committed Jun 18, 2018
1 parent 2b0e459 commit 759ff1c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 14 deletions.
20 changes: 20 additions & 0 deletions plugins/pulp_rpm/plugins/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,26 @@ def rpm_search_dicts(self):
ret.append(unit_key)
return ret

@classmethod
def get_unique_pkglists(cls, errata_id):
"""
Generate a list of unique pkglists for a specified erratum.
Those pkglists contain only information about packages.
:param errata_id: The erratum to generate a unique set of pkglists for
:type errata_id: str
:return: unique pkglists for a specified erratum
:rtype: list of dicts
"""
match_stage = {'$match': {'errata_id': errata_id}}
group_stage = {'$group': {'_id': '$errata_id',
'pkglists': {'$addToSet': '$collections.packages'}}}
pkglists = ErratumPkglist.objects.aggregate(match_stage, group_stage,
allowDiskUse=True,
batchSize=5).next()['pkglists']
return pkglists

def create_legacy_metadata_dict(self):
"""
Generate metadata dict and add erratum pkglist to it since it is stored
Expand Down
22 changes: 11 additions & 11 deletions plugins/pulp_rpm/plugins/profilers/yum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from pulp.server.db.model.criteria import UnitAssociationCriteria

from pulp_rpm.common.ids import TYPE_ID_ERRATA, TYPE_ID_RPM
from pulp_rpm.plugins.db import models
from pulp_rpm.yum_plugin import util


_logger = util.getLogger(__name__)

NVREA_KEYS = ['name', 'version', 'release', 'epoch', 'arch']
Expand Down Expand Up @@ -309,24 +309,24 @@ def _is_errata_applicable(errata, profile_lookup_table, available_rpm_nevras):
:param profile_lookup_table: lookup table of a unit profile keyed by "name arch"
:type profile_lookup_table: dict
:param available_rpm_nevras: NEVRA of packages available in a repo
:type available_rpm_nevras: list of tuples
:return: true if applicable, false otherwise
:rtype: boolean
"""
# Get rpms from errata
errata_rpms = YumProfiler._get_rpms_from_errata(errata)
pkglists = models.Errata.get_unique_pkglists(errata.unit_key.get('errata_id'))

# RHBZ #1171280: ensure we are only checking applicability against RPMs
# we have access to in the repo. This is to prevent a RHEL6 machine
# from finding RHEL7 packages, for example.
available_errata_rpms = []
for errata_rpm in errata_rpms:
if YumProfiler._create_nevra(errata_rpm) in available_rpm_nevras:
available_errata_rpms.append(errata_rpm)

# Check if any rpm from errata is applicable to the consumer
for errata_rpm in available_errata_rpms:
if YumProfiler._is_rpm_applicable(errata_rpm, profile_lookup_table):
return True
for pkglist in pkglists:
for collection in pkglist:
for errata_rpm in collection:
if YumProfiler._create_nevra(errata_rpm) in available_rpm_nevras and \
YumProfiler._is_rpm_applicable(errata_rpm, profile_lookup_table):
return True

# Return false if none of the errata rpms are applicable
return False
Expand Down
7 changes: 4 additions & 3 deletions plugins/pulp_rpm/plugins/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,15 @@ def serialize(self, unit):
Convert a single unit to it's dictionary form.
Add to errratum unit its pkglist.
Duplicated pkglists are eliminated.
:param unit: The object to be converted
:type unit: object
"""
from pulp_rpm.plugins.db.models import ErratumPkglist
from pulp_rpm.plugins.db import models

pkglists = ErratumPkglist.objects(errata_id=unit.get('errata_id'))
unit['pkglist'] = [coll for pkglist in pkglists for coll in pkglist['collections']]
pkglists = models.Errata.get_unique_pkglists(unit.get('errata_id'))
unit['pkglist'] = [coll for pkglist in pkglists for coll in pkglist]
return super(Errata, self).serialize(unit)


Expand Down

0 comments on commit 759ff1c

Please sign in to comment.