Skip to content

Commit

Permalink
Fix RemoveOldRepodataStep for yum publisher
Browse files Browse the repository at this point in the history
Previously RemoveOldRepodataStep step didn't actually
check the files within the repodata.xml to find the files
that should be deleted. Instead it made the assumption
that the latest one of each file type (to be removed) was
present, and removed the files older than the threshold

fixes #3551
https://pulp.plan.io/issues/3551

Co-authored-by: Blake McIvor <bmcivor@redhat.com>
  • Loading branch information
Muhammad Tahir and bmcivor committed Jun 21, 2018
1 parent 67c7836 commit 3774e38
Show file tree
Hide file tree
Showing 21 changed files with 137 additions and 77 deletions.
8 changes: 7 additions & 1 deletion plugins/pulp_rpm/plugins/distributors/yum/metadata/repomd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, working_dir, checksum_type=CONFIG_DEFAULT_CHECKSUM,
super(RepomdXMLFileContext, self).__init__(metadata_file_path, checksum_type)
self.gpg_sign = gpg_sign
self.sign_options = sign_options
self.metadata_file_locations = []

def __exit__(self, exc_type, exc_val, exc_tb):

Expand All @@ -39,8 +40,11 @@ def finalize(self):

super(RepomdXMLFileContext, self).finalize()

repomd = os.path.basename(self.metadata_file_path)
self.metadata_file_locations.append(repomd)
if self.gpg_sign:
assert self.sign_options
self.metadata_file_locations.append(repomd + '.asc')
signer = util.Signer(options=self.sign_options)
signer.sign(self.metadata_file_path)

Expand Down Expand Up @@ -83,8 +87,10 @@ def add_metadata_file_metadata(self, data_type, file_path, precalculated_checksu
data_attributes = {'type': data_type}
data_element = ElementTree.Element('data', data_attributes)

location_attributes = {'href': os.path.join(REPO_DATA_DIR_NAME, file_name)}
location = os.path.join(REPO_DATA_DIR_NAME, file_name)
location_attributes = {'href': location}
ElementTree.SubElement(data_element, 'location', location_attributes)
self.metadata_file_locations.append(location)

timestamp_element = ElementTree.SubElement(data_element, 'timestamp')
# Convert the float mtime to an integer before stringifying since
Expand Down
67 changes: 25 additions & 42 deletions plugins/pulp_rpm/plugins/distributors/yum/publish.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import copy
import datetime
import glob
import itertools
import os
import re
import subprocess
from gettext import gettext as _
from xml.etree import cElementTree
Expand Down Expand Up @@ -1136,7 +1134,7 @@ def process_main(self, item=None):

class RemoveOldRepodataStep(platform_steps.PluginStep):
"""
Remove repodata files that are not in repomd and are older then 14 day.
Remove repodata files that are not in repomd and are older than 14 days.
"""
def __init__(self, content_dir, **kwargs):
"""
Expand All @@ -1155,7 +1153,8 @@ def __init__(self, content_dir, **kwargs):
def is_skipped(self):
"""
Check the repo for the config option to remove old repodata.
Skip clean up if retain_old_repodata is true
Clean up if remove_old_repodata is True, which is the default
behaviour.
:returns: return if step should clean old repodata or not
:rtype: bool
Expand All @@ -1168,50 +1167,34 @@ def remove_repodata_file(self, repodata_file):
msg = _('Removed outdated %s' % os.path.basename(repodata_file))
logger.info(msg)

def filter_old_repodata(self, files):
exp = re.compile("([a-z0-9A-Z]+)-([a-z]+)(\.xml|\.xml\.gz|\.sqlite.bz2)")
to_remove = {}
groupped = {}
for f in files:
fname = os.path.basename(f)
if fname in ["repodata.xml"]:
continue
matched = exp.match(fname)
if matched:
delta = datetime.datetime.today() -\
datetime.datetime.fromtimestamp(os.path.getmtime(f))
def filter_old_repodata(self):
repomd = self.parent.repomd_file_context.metadata_file_path
repodata_dir = os.path.dirname(repomd)

files_in_dir = [os.path.join(repodata_dir, f) for f in os.listdir(repodata_dir)
if os.path.isfile(os.path.join(repodata_dir, f))]

to_keep = self.parent.repomd_file_context.metadata_file_locations

groupped.setdefault(matched.group(2), [])
groupped[matched.group(2)].append((f, delta))
threshold = self.get_config().get(
'remove_old_repodata_threshold',
datetime.timedelta(days=14).total_seconds())

threshold = self.get_config().get(
'remove_old_repodata_threshold',
1209600)
to_remove = []
for f in files_in_dir:
delta = datetime.datetime.today() - \
datetime.datetime.fromtimestamp(os.path.getmtime(f))
if delta.total_seconds() > threshold and f not in to_keep:
to_remove.append(f)

# 2 weeks
if delta.days * 24 * 60 * 60 + delta.seconds > threshold:
to_remove.setdefault(matched.group(2), [])
# [type] = (filename, timedelta)
to_remove[matched.group(2)].append((f, delta))
return groupped, to_remove
return to_remove

def process_main(self):
"""
Check files times and remove ones older then 14 days.
Check files times and remove ones older than 14 days.
"""
files = glob.glob(os.path.join(self.content_dir, "repodata", "*.xml*")) +\
glob.glob(os.path.join(self.content_dir, "repodata", "*.sqlite*"))

groupped, to_remove = self.filter_old_repodata(files)

for xkey, val in to_remove.items():
to_remove[xkey] = sorted(val, key=lambda x: x[1], reverse=True)

for key, val in to_remove.iteritems():
# preserve at least one file of each kind - pop out latest
if not set(groupped[key]) - set(val):
val.pop(0)
for f in val:
self.remove_repodata_file(f[0])
to_remove = self.filter_old_repodata()
for f in to_remove:
self.remove_repodata_file(f)

self.total_units = self.progress_successes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -496,3 +496,46 @@ def test_finalize_sign(self, _signer):
_signer.assert_called_once_with(options=sign_options)
_signer.return_value.sign.assert_called_once_with(
os.path.join(self.metadata_file_dir, "repodata", "repomd.xml"))

@mock.patch("pulp_rpm.yum_plugin.util.Signer")
def test_signed_repomd_is_added_to_metadata_file_locations(self, _signer):
sign_options = mock.MagicMock()
context = RepomdXMLFileContext(self.metadata_file_dir,
gpg_sign=True,
sign_options=sign_options)
context.finalize()
expected_file_locations = ['repomd.xml', 'repomd.xml.asc']

self.assertEqual(expected_file_locations, context.metadata_file_locations)

@mock.patch('os.path.getmtime')
@mock.patch('os.path.getsize')
def test_repomd_metadata_adds_metadata_file_locations(self, mock_getmtime,
mock_getsize):
"""
Check that as add_metadata_file_metadata is called it adds the location
to metadata_file_locations.
"""
path = os.path.join(self.metadata_file_dir,
REPO_DATA_DIR_NAME,
REPOMD_FILE_NAME)

file_list_name = os.path.basename(tempfile.NamedTemporaryFile().name)
update_list_name = os.path.basename(tempfile.NamedTemporaryFile().name)
file_list_path = os.path.join('repodata', file_list_name)
update_list_path = os.path.join('repodata', update_list_name)

# file_locations in RepomdXMLFileContext adds
expected_locations = [file_list_path, update_list_path]

context = RepomdXMLFileContext(path)
context.metadata_file_handle = mock.Mock()

# add supporting metadata files with arbitrary values for
# precalculated_checksum as we aren't interested in opening these files
context.add_metadata_file_metadata('file_list', file_list_path, 'bcc5c')
context.add_metadata_file_metadata('update', update_list_path, 'bcc5c')

# should contain 2 file_locations
self.assertEqual(len(context.metadata_file_locations), 2)
self.assertEqual(expected_locations, context.metadata_file_locations)
96 changes: 62 additions & 34 deletions plugins/test/unit/plugins/distributors/yum/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import os
import shutil
import tempfile
import inspect
import test
from xml.etree import cElementTree as ET

from pulp.common.compat import json, unittest
Expand Down Expand Up @@ -134,6 +136,7 @@ def add_mock_context_to_step(self, step):

class BaseYumRepoPublisherTests(BaseYumDistributorPublishTests):

@mock.patch('pulp_rpm.plugins.distributors.yum.publish.RemoveOldRepodataStep')
@mock.patch('pulp_rpm.plugins.distributors.yum.publish.GenerateSqliteForRepoStep')
@mock.patch('pulp_rpm.plugins.distributors.yum.publish.PublishCompsStep')
@mock.patch('pulp_rpm.plugins.distributors.yum.publish.Publisher._build_final_report')
Expand All @@ -145,7 +148,7 @@ class BaseYumRepoPublisherTests(BaseYumDistributorPublishTests):
def test_publish(self, mock_publish_distribution, mock_publish_rpms, mock_publish_drpms,
mock_publish_errata, mock_publish_metadata,
mock_build_final_report, mock_publish_comps,
mock_generate_sqlite):
mock_generate_sqlite, mock_remove_old_repodata):

self._init_publisher()
self.publisher.repo.content_unit_counts = {}
Expand All @@ -161,6 +164,7 @@ def test_publish(self, mock_publish_distribution, mock_publish_rpms, mock_publis
mock_build_final_report.assert_called_once()
mock_publish_comps.assert_called_once_with()
mock_generate_sqlite.assert_called_once_with(self.working_dir)
mock_remove_old_repodata.assert_called_once_with(self.working_dir)

@mock.patch('pulp_rpm.plugins.distributors.yum.publish.configuration.get_repo_checksum_type')
def test_get_checksum_type(self, mock_get_checksum):
Expand Down Expand Up @@ -1364,13 +1368,17 @@ def test_is_skipped_config_true(self):

class RemoveOldRepodataStepTests(BaseYumDistributorPublishStepTests):
"""
Test RepoOldRepodataStep of the publish process.
Test RemoveOldRepodataStep of the publish process.
"""

def setUp(self):
super(RemoveOldRepodataStepTests, self).setUp()
file_map = {"0abcde-primary.xml.gz": 1497601154,
"1abcde-primary.xml.gz": 1496395553}

def fake_mtime(test_file):
if '-tmp' in os.path.basename(test_file):
# more than 14 days old
return 1496395553
else:
return 1497601154

self.dtpatcher = mock.patch(
'pulp_rpm.plugins.distributors.yum.publish.datetime.datetime',
Expand All @@ -1379,45 +1387,65 @@ def setUp(self):
self.mtime_patcher = mock.patch(
'pulp_rpm.plugins.distributors.yum.publish.os.path.getmtime'
)
self.glob_patcher = mock.patch(
'pulp_rpm.plugins.distributors.yum.publish.glob.glob'
)

mocked_dt = self.dtpatcher.start()
mocked_mtime = self.mtime_patcher.start()
mocked_glob = self.glob_patcher.start()
self.mocked_dt = self.dtpatcher.start()
self.mocked_mtime = self.mtime_patcher.start()

mocked_dt.today.return_value = datetime.datetime.fromtimestamp(1497605154)
mocked_glob.return_value = file_map.keys()
mocked_mtime.side_effect = lambda x: file_map[x]
self.mocked_dt.today.return_value = datetime.datetime.fromtimestamp(1497605154)
self.mocked_mtime.side_effect = fake_mtime

def test_process_main(self):
def test_remove_old_repodata(self):
"""
Test that repoview tool was called with proper parameters.
Test that RemoveOldRepoData step is called with the correct filtered
repodata files to remove.
"""
publisher_conf = {}
def test_data_path():
return '%s/data' % os.path.dirname(inspect.getfile(test))

step = publish.RemoveOldRepodataStep('/foo')
step.config = publisher_conf
step.parent = mock.MagicMock()
step.parent.get_config.return_value.get.side_effect = publisher_conf.get

filter_old_repodata_mock = mock.Mock(side_effect=step.filter_old_repodata)
with mock.patch.multiple(
"pulp_rpm.plugins.distributors.yum.publish.RemoveOldRepodataStep",
remove_repodata_file=mock.DEFAULT,
filter_old_repodata=filter_old_repodata_mock
) as mocked_step_dict:
step.process_main()
filter_old_repodata_mock.assert_called_with(
['1abcde-primary.xml.gz', '0abcde-primary.xml.gz',
'1abcde-primary.xml.gz', '0abcde-primary.xml.gz'])
mocked_step_dict["remove_repodata_file"].assert_called_with("1abcde-primary.xml.gz")
repo_root = os.path.join(test_data_path(), self._testMethodName)

self._init_publisher()
remove_step = publish.RemoveOldRepodataStep(repo_root)
remove_step.parent = self.publisher

metadata_file_path = os.path.join(repo_root, 'repodata', 'repomd.xml')

# file_paths under plugins/test/data/test_remove_old_repodata "to_keep"
metadata_file_locations = [
metadata_file_path,
os.path.join(repo_root, 'repodata', 'repomd-tmp.xml'),
os.path.join(repo_root, 'repodata', 'repomd.xml.asc'),
os.path.join(repo_root, 'repodata', 'repomd-tmp.xml.asc')
]

self.publisher.repomd_file_context.metadata_file_path = metadata_file_path
self.publisher.repomd_file_context.metadata_file_locations = metadata_file_locations

with mock.patch(
'pulp_rpm.plugins.distributors.yum.publish.'
'RemoveOldRepodataStep.remove_repodata_file'
) as mock_remove:
remove_step.process_main()
calls = mock_remove.mock_calls
# while repomd-tmp.xml and repomd-tmp.xml.asc have been patched with
# a modified timestamp, they are to be kept and should not be removed
expected_filenames = set([
'filelists-tmp.sqlite.bz2',
'primary-tmp.xml.gz',
'464c4a0ef57d030eff7d8700f8f26fd5c0b965449438cef3ea462adb21a055fa-tmp'])

self.assertEqual(len(calls), 3)

actual_filenames = set()
for call in calls:
_, args, kwargs = call
actual_filenames.add(os.path.basename(args[0]))

self.assertEqual(actual_filenames, expected_filenames)

def tearDown(self):
self.dtpatcher.stop()
self.mtime_patcher.stop()
self.glob_patcher.stop()


class GenerateRepoviewStepTests(BaseYumDistributorPublishStepTests):
Expand Down

0 comments on commit 3774e38

Please sign in to comment.