/
repo_version_utils.py
138 lines (107 loc) · 4.98 KB
/
repo_version_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from gettext import gettext as _
import logging
from django.db.models import Q
from pulpcore.app.files import validate_file_paths
from pulpcore.app.models import Content, ContentArtifact
from pulpcore.app.util import batch_qs
_logger = logging.getLogger(__name__)
__all__ = ["remove_duplicates"]
def remove_duplicates(repository_version):
"""
Inspect content additions in the `RepositoryVersion` and remove existing repository duplicates.
This function will inspect the content being added to a repo version and remove any existing
content which would collide with the content being added to the repository version. It does not
inspect the content being added for duplicates.
Some content can have two instances A and B which are unique, but cannot both exist together in
one repository. For example, pulp_file's content has `relative_path` for that file within the
repository.
Any content newly added to the :class:`~pulpcore.plugin.models.RepositoryVersion` is checked
against existing content in the :class:`~pulpcore.plugin.models.RepositoryVersion` with newer
"repository duplicates" replace existing "repository duplicates". Each Content model can define
a `repo_key_fields` attribute with the field names to be compared. If all `repo_key_fields`
contain the same value for two content units, they are considered "repository duplicates".
Args:
repository_version: The :class:`~pulpcore.plugin.models.RepositoryVersion` to be checked
and possibly modified.
"""
added_content = repository_version.added(base_version=repository_version.base_version)
if repository_version.base_version:
existing_content = repository_version.base_version.content
else:
try:
existing_content = repository_version.previous().content
except repository_version.DoesNotExist:
existing_content = Content.objects.none()
repository = repository_version.repository.cast()
content_types = {type_obj.get_pulp_type(): type_obj for type_obj in repository.CONTENT_TYPES}
for pulp_type, type_obj in content_types.items():
repo_key_fields = type_obj.repo_key_fields
new_content_qs = type_obj.objects.filter(
pk__in=added_content.filter(pulp_type=pulp_type)
).values(*repo_key_fields)
if type_obj.repo_key_fields == ():
continue
if new_content_qs.count() and existing_content.count():
_logger.debug(_("Removing duplicates for type: {}".format(type_obj.get_pulp_type())))
for batch in batch_qs(new_content_qs):
find_dup_qs = Q()
for content_dict in batch:
item_query = Q(**content_dict)
find_dup_qs |= item_query
duplicates_qs = (
type_obj.objects.filter(pk__in=existing_content).filter(find_dup_qs).only("pk")
)
repository_version.remove_content(duplicates_qs)
def validate_duplicate_content(version):
"""
Validate that a repository version doesn't contain duplicate content.
Uses repo_key_fields to determine if content is duplicated.
Raises:
ValueError: If repo version has duplicate content.
"""
error_messages = []
for type_obj in version.repository.CONTENT_TYPES:
if type_obj.repo_key_fields == ():
continue
pulp_type = type_obj.get_pulp_type()
repo_key_fields = type_obj.repo_key_fields
new_content_total = type_obj.objects.filter(
pk__in=version.content.filter(pulp_type=pulp_type)
).count()
unique_new_content_total = (
type_obj.objects.filter(pk__in=version.content.filter(pulp_type=pulp_type))
.distinct(*repo_key_fields)
.count()
)
if unique_new_content_total < new_content_total:
error_messages.append(
_(
"More than one {pulp_type} content with the duplicate values for {fields}."
).format(pulp_type=pulp_type, fields=", ".join(repo_key_fields))
)
if error_messages:
raise ValueError(
_("Cannot create repository version. {msg}").format(msg=", ".join(error_messages))
)
def validate_version_paths(version):
"""
Validate artifact relative paths for dupes or overlap (e.g. a/b and a/b/c).
Raises:
ValueError: If two artifact relative paths overlap
"""
paths = ContentArtifact.objects.filter(content__pk__in=version.content).values_list(
"relative_path", flat=True
)
try:
validate_file_paths(paths)
except ValueError as e:
raise ValueError(_("Repository version errors : {err}").format(err=e))
def validate_repo_version(version):
"""
Validate a repo version.
Checks for duplicate content, duplicate relative paths, etc.
Raises:
ValueError: If repo version is not valid.
"""
validate_duplicate_content(version)
validate_version_paths(version)