Skip to content

Commit

Permalink
Add validation to check for file path overlaps
Browse files Browse the repository at this point in the history
  • Loading branch information
David Davis committed Dec 2, 2019
1 parent b617ad4 commit eda7d3d
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGES/plugin_api/5559.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added artifact path overlap checks for repo versions and publications. Plugin writers should call
``validate_version_paths()`` or ``validate_publication_paths()`` during the finalize step when
creating RepositoryVersions or Publications (respectively).
1 change: 1 addition & 0 deletions doc_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ drf-yasg
dynaconf
plantuml
pyyaml
pygtrie
rq
sphinx
sphinx-rtd-theme
Expand Down
40 changes: 40 additions & 0 deletions pulpcore/app/files.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import hashlib
import os
from gettext import gettext as _

from django.core.files.uploadedfile import TemporaryUploadedFile
from django.core.files.uploadhandler import TemporaryFileUploadHandler
from pygtrie import StringTrie


class PulpTemporaryUploadedFile(TemporaryUploadedFile):
Expand Down Expand Up @@ -90,3 +92,41 @@ def __init__(self, file, name=None):
if name is None:
name = getattr(file, 'name', None)
self.name = name


def validate_file_paths(paths):
"""
Check for valid POSIX paths (ie ones that aren't duplicated and don't overlap).
Overlapping paths are where one path terminates inside another (e.g. a/b and a/b/c).
This function will raise an exception at the first dupe or overlap it detects. We use a trie (or
prefix tree) to keep track of which paths we've already seen.
Args:
paths (list): A list of strings each representing a relative path
Raises:
ValueError: If any path overlaps another
"""
overlap_error = _("The path for file '{path}' overlaps: {conflicts}")

path_trie = StringTrie(separator="/")
for path in paths:
if path_trie.has_key(path):
# path duplicates a path already in the trie
raise ValueError(_("Path is duplicated: {path}").format(path=path))

if path_trie.has_subtrie(path):
# overlap where path is 'a/b' and trie has 'a/b/c'
conflicts = [item[0] for item in path_trie.items(prefix=path)]
raise ValueError(overlap_error.format(path=path, conflicts=(", ").join(conflicts)))

prefixes = list(path_trie.prefixes(path))
if prefixes:
# overlap where path is 'a/b/c' and trie has 'a/b'
conflicts = [prefix.key for prefix in prefixes]
raise ValueError(overlap_error.format(path=path, conflicts=(", ").join(conflicts)))

# if there are no overlaps, add it to our trie and continue
path_trie[path] = True
22 changes: 22 additions & 0 deletions pulpcore/plugin/publication_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from pulpcore.app.models import ContentArtifact
from pulpcore.app.files import validate_file_paths


def validate_publication_paths(publication):
"""
Validate artifact relative paths for dupes or overlap (e.g. a/b and a/b/c).
Raises:
ValueError: If two artifact relative paths are dupes or overlap
"""
paths = list(publication.published_artifact.values_list("relative_path", flat=True))

if publication.pass_through:
paths += ContentArtifact.objects. \
filter(content__pk__in=publication.repository_version.content). \
values_list("relative_path", flat=True)

try:
validate_file_paths(paths)
except ValueError as e:
raise ValueError(_("Cannot create publication. {err}.").format(err=e))
20 changes: 20 additions & 0 deletions pulpcore/plugin/repo_version_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

from django.db.models import Q

from pulpcore.app.models import ContentArtifact
from pulpcore.app.files import validate_file_paths


_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,3 +44,20 @@ def remove_duplicates(repository_version):
_logger.debug(_("Removing duplicates for type: {}".format(model)))
qs = model.objects.filter(query_for_repo_duplicates_by_type[model])
repository_version.remove_content(qs)


def validate_version_paths(version):
"""
Validate artifact relative paths for dupes or overlap (e.g. a/b and a/b/c).
Raises:
ValueError: If two artifact relative paths overlap
"""
paths = ContentArtifact.objects. \
filter(content__pk__in=version.content). \
values_list("relative_path", flat=True)

try:
validate_file_paths(paths)
except ValueError as e:
raise ValueError(_("Cannot create repository version. {err}.").format(err=e))
36 changes: 36 additions & 0 deletions pulpcore/tests/unit/test_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from unittest import TestCase

from pulpcore.app.files import validate_file_paths


class TestValidateFilePaths(TestCase):
def test_valid_paths(self):
"""
Test for valid paths.
"""
paths = ['a/b', 'a/c/b', 'PULP_MANIFEST', 'b']
validate_file_paths(paths)

def test_dupes(self):
"""
Test for two duplicate paths.
"""
paths = ['a/b', 'PULP_MANIFEST', 'PULP_MANIFEST']
with self.assertRaisesRegex(ValueError, "Path is duplicated: PULP_MANIFEST"):
validate_file_paths(paths)

def test_overlaps(self):
"""
Test for overlapping paths.
"""
paths = ['a/b', 'a/b/c']
with self.assertRaises(ValueError):
validate_file_paths(paths)

paths = ['a/b/c', 'a/b']
with self.assertRaises(ValueError):
validate_file_paths(paths)

paths = ['b/c', 'a/b', 'b']
with self.assertRaises(ValueError):
validate_file_paths(paths)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'setuptools>=39.2.0,<42.1.0',
'dynaconf>=2.2,<2.3',
'whitenoise~=4.1.3',
'pygtrie~=2.3.2',
]

setup(
Expand Down

0 comments on commit eda7d3d

Please sign in to comment.