diff --git a/CHANGES/plugin_api/5559.feature b/CHANGES/plugin_api/5559.feature new file mode 100644 index 00000000000..5bc72255da3 --- /dev/null +++ b/CHANGES/plugin_api/5559.feature @@ -0,0 +1,3 @@ +Added artifact path overlap checks for repo versions and publications. Plugin writers should call +``validate_version_paths()`` or ``validate_publication_paths()`` during the finalize step when +creating RepositoryVersions or Publications (respectively). diff --git a/doc_requirements.txt b/doc_requirements.txt index c60b55d1c89..0a60a2d768a 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -11,6 +11,7 @@ drf-yasg dynaconf plantuml pyyaml +pygtrie rq sphinx sphinx-rtd-theme diff --git a/pulpcore/app/files.py b/pulpcore/app/files.py index ba284174e59..4662a1cb6c3 100644 --- a/pulpcore/app/files.py +++ b/pulpcore/app/files.py @@ -1,8 +1,10 @@ import hashlib import os +from gettext import gettext as _ from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadhandler import TemporaryFileUploadHandler +from pygtrie import StringTrie class PulpTemporaryUploadedFile(TemporaryUploadedFile): @@ -90,3 +92,41 @@ def __init__(self, file, name=None): if name is None: name = getattr(file, 'name', None) self.name = name + + +def validate_file_paths(paths): + """ + Check for valid POSIX paths (ie ones that aren't duplicated and don't overlap). + + Overlapping paths are where one path terminates inside another (e.g. a/b and a/b/c). + + This function will raise an exception at the first dupe or overlap it detects. We use a trie (or + prefix tree) to keep track of which paths we've already seen. + + Args: + paths (list): A list of strings each representing a relative path + + Raises: + ValueError: If any path overlaps another + """ + overlap_error = _("The path for file '{path}' overlaps: {conflicts}") + + path_trie = StringTrie(separator="/") + for path in paths: + if path_trie.has_key(path): + # path duplicates a path already in the trie + raise ValueError(_("Path is duplicated: {path}").format(path=path)) + + if path_trie.has_subtrie(path): + # overlap where path is 'a/b' and trie has 'a/b/c' + conflicts = [item[0] for item in path_trie.items(prefix=path)] + raise ValueError(overlap_error.format(path=path, conflicts=(", ").join(conflicts))) + + prefixes = list(path_trie.prefixes(path)) + if prefixes: + # overlap where path is 'a/b/c' and trie has 'a/b' + conflicts = [prefix.key for prefix in prefixes] + raise ValueError(overlap_error.format(path=path, conflicts=(", ").join(conflicts))) + + # if there are no overlaps, add it to our trie and continue + path_trie[path] = True diff --git a/pulpcore/plugin/publication_utils.py b/pulpcore/plugin/publication_utils.py new file mode 100644 index 00000000000..8e3bbcad17c --- /dev/null +++ b/pulpcore/plugin/publication_utils.py @@ -0,0 +1,22 @@ +from pulpcore.app.models import ContentArtifact +from pulpcore.app.files import validate_file_paths + + +def validate_publication_paths(publication): + """ + Validate artifact relative paths for dupes or overlap (e.g. a/b and a/b/c). + + Raises: + ValueError: If two artifact relative paths are dupes or overlap + """ + paths = list(publication.published_artifact.values_list("relative_path", flat=True)) + + if publication.pass_through: + paths += ContentArtifact.objects. \ + filter(content__pk__in=publication.repository_version.content). \ + values_list("relative_path", flat=True) + + try: + validate_file_paths(paths) + except ValueError as e: + raise ValueError(_("Cannot create publication. {err}.").format(err=e)) diff --git a/pulpcore/plugin/repo_version_utils.py b/pulpcore/plugin/repo_version_utils.py index 1736c69fb3f..494355f0db5 100644 --- a/pulpcore/plugin/repo_version_utils.py +++ b/pulpcore/plugin/repo_version_utils.py @@ -4,6 +4,9 @@ from django.db.models import Q +from pulpcore.app.models import ContentArtifact +from pulpcore.app.files import validate_file_paths + _logger = logging.getLogger(__name__) @@ -41,3 +44,20 @@ def remove_duplicates(repository_version): _logger.debug(_("Removing duplicates for type: {}".format(model))) qs = model.objects.filter(query_for_repo_duplicates_by_type[model]) repository_version.remove_content(qs) + + +def validate_version_paths(version): + """ + Validate artifact relative paths for dupes or overlap (e.g. a/b and a/b/c). + + Raises: + ValueError: If two artifact relative paths overlap + """ + paths = ContentArtifact.objects. \ + filter(content__pk__in=version.content). \ + values_list("relative_path", flat=True) + + try: + validate_file_paths(paths) + except ValueError as e: + raise ValueError(_("Cannot create repository version. {err}.").format(err=e)) diff --git a/pulpcore/tests/unit/test_files.py b/pulpcore/tests/unit/test_files.py new file mode 100644 index 00000000000..c76239d6d00 --- /dev/null +++ b/pulpcore/tests/unit/test_files.py @@ -0,0 +1,36 @@ +from unittest import TestCase + +from pulpcore.app.files import validate_file_paths + + +class TestValidateFilePaths(TestCase): + def test_valid_paths(self): + """ + Test for valid paths. + """ + paths = ['a/b', 'a/c/b', 'PULP_MANIFEST', 'b'] + validate_file_paths(paths) + + def test_dupes(self): + """ + Test for two duplicate paths. + """ + paths = ['a/b', 'PULP_MANIFEST', 'PULP_MANIFEST'] + with self.assertRaisesRegex(ValueError, "Path is duplicated: PULP_MANIFEST"): + validate_file_paths(paths) + + def test_overlaps(self): + """ + Test for overlapping paths. + """ + paths = ['a/b', 'a/b/c'] + with self.assertRaises(ValueError): + validate_file_paths(paths) + + paths = ['a/b/c', 'a/b'] + with self.assertRaises(ValueError): + validate_file_paths(paths) + + paths = ['b/c', 'a/b', 'b'] + with self.assertRaises(ValueError): + validate_file_paths(paths) diff --git a/setup.py b/setup.py index 59b7cf57c47..fbb2fb88d31 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ 'setuptools>=39.2.0,<42.1.0', 'dynaconf>=2.2,<2.3', 'whitenoise~=4.1.3', + 'pygtrie~=2.3.2', ] setup(