Skip to content

Commit

Permalink
Add a migration for the "vendor" field
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Sep 4, 2018
1 parent eb8a6d0 commit 458efe5
Showing 1 changed file with 66 additions and 0 deletions.
66 changes: 66 additions & 0 deletions plugins/pulp_rpm/plugins/migrations/0045_populate_vendor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import gzip
import sys

from pulp.server.db.connection import get_collection
from pulp.server.db.migrations.lib import utils

if sys.version_info < (2, 7):
from xml.etree import ElementTree as ET
else:
from xml.etree import cElementTree as ET


_NAMESPACES = {
'common': "http://linux.duke.edu/metadata/common",
'rpm': 'http://linux.duke.edu/metadata/rpm',
}

# The unit metadata snippets miss header/encoding and namespace references
# which aren't included until publish time. This breaks the parsing.
_HEADER = ('<?mxl version="1.0" encoding="UTF-8"?>\n'
'<metadata xmlns="http://linux.duke.edu/metadata/common" '
'xmlns:rpm="http://linux.duke.edu/metadata/rpm">\n {}'
'</metadata>\n')


def migrate_rpm(collection, unit):
"""
Uncompress single rpm unit primary metadata and populate the unit vendor attribute
accordingly.
:param collection: a collection of RPM units
:type collection: pymongo.collection.Collection
:param unit: the RPM unit being migrated
:type unit: dict
"""
primary_xml = unit.get('repodata', {}).get('primary', '')
primary_xml = _HEADER.format(gzip.zlib.decompress(primary_xml))
root_element = ET.fromstring(primary_xml)
delta = {}
delta['vendor'] = root_element.find(
'./common:package/common:format/rpm:vendor', _NAMESPACES
).text
collection.update_one({'_id': unit['_id']}, {'$set': delta})


def migrate(*args, **kwargs):
"""
Populate the RPM unit vendor attribute.
Migration can be safely re-run multiple times.
:param args: unused
:type args: list
:param kwargs: unused
type kwargs: dict
"""
rpm_collection = get_collection('units_rpm')
# select only units without the vendor attribute, fetch just the
# 'repodata.primary' attribute; the _id is always included
rpm_selection = rpm_collection.find(
{'vendor': {'$exists': False}}, ['repodata.primary']
).batch_size(100)
total_rpm_units = rpm_selection.count()
with utils.MigrationProgressLog('RPM', total_rpm_units) as progress_log:
for rpm in rpm_selection:
migrate_rpm(rpm_collection, rpm)
progress_log.progress()

0 comments on commit 458efe5

Please sign in to comment.