/
_basequery.py
235 lines (184 loc) · 7.15 KB
/
_basequery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import logging
from peewee import fn, PeeweeException
from cachetools.func import lru_cache
from datetime import datetime, timedelta
from data.model import DataModelException, config
from data.readreplica import ReadOnlyModeException
from data.database import (
Repository,
RepositoryState,
User,
Team,
TeamMember,
RepositoryPermission,
TeamRole,
Namespace,
Visibility,
ImageStorage,
Image,
RepositoryKind,
db_for_update,
db_count_estimator,
db,
)
from functools import reduce
logger = logging.getLogger(__name__)
def reduce_as_tree(queries_to_reduce):
"""
This method will split a list of queries into halves recursively until we reach individual
queries, at which point it will start unioning the queries, or the already unioned subqueries.
This works around a bug in peewee SQL generation where reducing linearly generates a chain of
queries that will exceed the recursion depth limit when it has around 80 queries.
"""
mid = len(queries_to_reduce) // 2
left = queries_to_reduce[:mid]
right = queries_to_reduce[mid:]
to_reduce_right = right[0]
if len(right) > 1:
to_reduce_right = reduce_as_tree(right)
if len(left) > 1:
to_reduce_left = reduce_as_tree(left)
elif len(left) == 1:
to_reduce_left = left[0]
else:
return to_reduce_right
return to_reduce_left.union_all(to_reduce_right)
def get_existing_repository(namespace_name, repository_name, for_update=False, kind_filter=None):
query = (
Repository.select(Repository, Namespace)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Namespace.username == namespace_name, Repository.name == repository_name)
.where(Repository.state != RepositoryState.MARKED_FOR_DELETION)
)
if kind_filter:
query = (
query.switch(Repository).join(RepositoryKind).where(RepositoryKind.name == kind_filter)
)
if for_update:
query = db_for_update(query)
return query.get()
@lru_cache(maxsize=1)
def get_public_repo_visibility():
return Visibility.get(name="public")
def _lookup_team_role(name):
return _lookup_team_roles()[name]
@lru_cache(maxsize=1)
def _lookup_team_roles():
return {role.name: role for role in TeamRole.select()}
def filter_to_repos_for_user(
query, user_id=None, namespace=None, repo_kind="image", include_public=True, start_id=None
):
if not include_public and not user_id:
return Repository.select().where(Repository.id == "-1")
# Filter on the type of repository.
if repo_kind is not None:
try:
query = query.where(Repository.kind == Repository.kind.get_id(repo_kind))
except RepositoryKind.DoesNotExist:
raise DataModelException("Unknown repository kind")
# Add the start ID if necessary.
if start_id is not None:
query = query.where(Repository.id >= start_id)
# Add a namespace filter if necessary.
if namespace:
query = query.where(Namespace.username == namespace)
# Build a set of queries that, when unioned together, return the full set of visible repositories
# for the filters specified.
queries = []
if include_public:
queries.append(query.where(Repository.visibility == get_public_repo_visibility()))
if user_id is not None:
AdminTeam = Team.alias()
AdminTeamMember = TeamMember.alias()
# Add repositories in which the user has permission.
queries.append(
query.switch(RepositoryPermission).where(RepositoryPermission.user == user_id)
)
# Add repositories in which the user is a member of a team that has permission.
queries.append(
query.switch(RepositoryPermission)
.join(Team)
.join(TeamMember)
.where(TeamMember.user == user_id)
)
# Add repositories under namespaces in which the user is the org admin.
queries.append(
query.switch(Repository)
.join(AdminTeam, on=(Repository.namespace_user == AdminTeam.organization))
.join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team))
.where(AdminTeam.role == _lookup_team_role("admin"))
.where(AdminTeamMember.user == user_id)
)
return reduce(lambda l, r: l | r, queries)
def get_user_organizations(username):
UserAlias = User.alias()
return (
User.select()
.distinct()
.join(Team)
.join(TeamMember)
.join(UserAlias, on=(UserAlias.id == TeamMember.user))
.where(User.organization == True, UserAlias.username == username)
)
def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
ancestors = ancestors_str.split("/")[1:-1]
if not ancestors:
return image_size
if parent_image is None:
raise DataModelException("Could not load parent image")
ancestor_size = parent_image.aggregate_size
if ancestor_size is not None:
return ancestor_size + image_size
# Fallback to a slower path if the parent doesn't have an aggregate size saved.
# TODO: remove this code if/when we do a full backfill.
ancestor_size = (
ImageStorage.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar()
)
if ancestor_size is None:
return None
return ancestor_size + image_size
def update_last_accessed(token_or_user):
"""
Updates the `last_accessed` field on the given token or user.
If the existing field's value is within the configured threshold, the update is skipped.
"""
if not config.app_config.get("FEATURE_USER_LAST_ACCESSED"):
return
threshold = timedelta(seconds=config.app_config.get("LAST_ACCESSED_UPDATE_THRESHOLD_S", 120))
if (
token_or_user.last_accessed is not None
and datetime.utcnow() - token_or_user.last_accessed < threshold
):
# Skip updating, as we don't want to put undue pressure on the database.
return
model_class = token_or_user.__class__
last_accessed = datetime.utcnow()
try:
(
model_class.update(last_accessed=last_accessed)
.where(model_class.id == token_or_user.id)
.execute()
)
token_or_user.last_accessed = last_accessed
except ReadOnlyModeException:
pass
except PeeweeException as ex:
# If there is any form of DB exception, only fail if strict logging is enabled.
strict_logging_disabled = config.app_config.get("ALLOW_PULLS_WITHOUT_STRICT_LOGGING")
if strict_logging_disabled:
data = {
"exception": ex,
"token_or_user": token_or_user.id,
"class": str(model_class),
}
logger.exception("update last_accessed for token/user failed", extra=data)
else:
raise
def estimated_row_count(model_cls):
""" Returns the estimated number of rows in the given model. If available, uses engine-specific
estimation (which is very fast) and otherwise falls back to .count()
"""
return db_count_estimator(model_cls, db)