This repository has been archived by the owner on Dec 7, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 170
/
applicability.py
709 lines (606 loc) · 34.5 KB
/
applicability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
"""
Contains content applicability management classes
"""
from gettext import gettext as _
from logging import getLogger
from uuid import uuid4
from celery import task
from pymongo.errors import DuplicateKeyError
from pulp.plugins.conduits.profiler import ProfilerConduit
from pulp.plugins.config import PluginCallConfiguration
from pulp.plugins.loader import api as plugin_api, exceptions as plugin_exceptions
from pulp.plugins.profiler import Profiler
from pulp.server.async.tasks import Task
from pulp.server.db import model
from pulp.server.db.model.consumer import Bind, RepoProfileApplicability, UnitProfile
from pulp.server.db.model.criteria import Criteria
from pulp.server.managers import factory as managers
from pulp.server.managers.consumer.query import ConsumerQueryManager
from pulp.plugins.util.misc import paginate
_logger = getLogger(__name__)
class ApplicabilityRegenerationManager(object):
@staticmethod
def regenerate_applicability_for_consumers(consumer_criteria):
"""
Regenerate and save applicability data for given updated consumers.
:param consumer_criteria: The consumer selection criteria
:type consumer_criteria: dict
"""
consumer_criteria = Criteria.from_dict(consumer_criteria)
consumer_query_manager = managers.consumer_query_manager()
bind_manager = managers.consumer_bind_manager()
consumer_profile_manager = managers.consumer_profile_manager()
# Process consumer_criteria and get all the consumer ids satisfied by the criteria
consumer_criteria.fields = ['id']
consumer_ids = [c['id'] for c in consumer_query_manager.find_by_criteria(consumer_criteria)]
# Following logic of checking existing applicability and getting required data
# to generate applicability is a bit more complicated than what it could be 'by design'.
# It is to optimize the number of db queries and improving applicability generation
# performance. Please consider the implications for applicability generation time
# when making any modifications to this code.
# Get all unit profiles associated with given consumers
unit_profile_criteria = Criteria(
filters={'consumer_id': {'$in': consumer_ids}},
fields=['consumer_id', 'profile_hash', 'content_type', 'id'])
all_unit_profiles = consumer_profile_manager.find_by_criteria(unit_profile_criteria)
# Create a consumer-profile map with consumer id as the key and list of tuples
# with profile details as the value
consumer_unit_profiles_map = {}
# Also create a map of profile_id keyed by profile_hash for profile lookup.
profile_hash_profile_id_map = {}
for unit_profile in all_unit_profiles:
profile_hash = unit_profile['profile_hash']
content_type = unit_profile['content_type']
consumer_id = unit_profile['consumer_id']
profile_id = unit_profile['id']
profile_tuple = (profile_hash, content_type)
# Add this tuple to the list of profile tuples for a consumer
consumer_unit_profiles_map.setdefault(consumer_id, []).append(profile_tuple)
# We need just one profile_id per profile_hash to be used in regenerate_applicability
# method to get the actual profile corresponding to given profile_hash.
if profile_hash not in profile_hash_profile_id_map:
profile_hash_profile_id_map[profile_hash] = profile_id
# Get all repos bound to given consumers
bind_criteria = Criteria(filters={'consumer_id': {'$in': consumer_ids}},
fields=['repo_id', 'consumer_id'])
all_repo_bindings = bind_manager.find_by_criteria(bind_criteria)
# Create a repo-consumer map with repo_id as the key and consumer_id list as the value
repo_consumers_map = {}
for binding in all_repo_bindings:
repo_consumers_map.setdefault(binding['repo_id'], []).append(binding['consumer_id'])
# Create a set of (repo_id, (profile_hash, content_type))
repo_profile_hashes = set()
for repo_id, consumer_id_list in repo_consumers_map.items():
for consumer_id in consumer_id_list:
if consumer_id in consumer_unit_profiles_map:
for unit_profile_tuple in consumer_unit_profiles_map[consumer_id]:
repo_profile_hashes.add((repo_id, unit_profile_tuple))
# Iterate through each tuple in repo_profile_hashes set and regenerate applicability,
# if it doesn't exist. These are all guaranteed to be unique tuples because of the logic
# used to create maps and sets above, eliminating multiple unnecessary queries
# to check for existing applicability for same profiles.
manager = managers.applicability_regeneration_manager()
for repo_id, (profile_hash, content_type) in repo_profile_hashes:
# Check if applicability for given profile_hash and repo_id already exists
if ApplicabilityRegenerationManager._is_existing_applicability(repo_id, profile_hash):
continue
# If applicability does not exist, generate applicability data for given profile
# and repo id.
profile_id = profile_hash_profile_id_map[profile_hash]
manager.regenerate_applicability(profile_hash, content_type, profile_id, repo_id)
@staticmethod
def regenerate_applicability_for_repos(repo_criteria):
"""
Regenerate and save applicability data affected by given updated repositories.
:param repo_criteria: The repo selection criteria
:type repo_criteria: dict
"""
repo_criteria = Criteria.from_dict(repo_criteria)
# Process repo criteria
repo_criteria.fields = ['id']
repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)]
for repo_id in repo_ids:
# Find all existing applicabilities for given repo_id. Setting batch size of 5 ensures
# the MongoDB cursor does not time out. See https://pulp.plan.io/issues/998#note-6 for
# more details.
existing_applicabilities = RepoProfileApplicability.get_collection().find(
{'repo_id': repo_id}).batch_size(5)
for existing_applicability in existing_applicabilities:
existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
profile_hash = existing_applicability['profile_hash']
unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
projection=['id',
'content_type'])
if unit_profile is None:
# Unit profiles change whenever packages are installed or removed on consumers,
# and it is possible that existing_applicability references a UnitProfile
# that no longer exists. This is harmless, as Pulp has a monthly cleanup task
# that will identify these dangling references and remove them.
continue
# Regenerate applicability data for given unit_profile and repo id
ApplicabilityRegenerationManager.regenerate_applicability(
profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
existing_applicability)
@staticmethod
def queue_regenerate_applicability_for_repos(repo_criteria):
"""
Queue a group of tasks to generate and save applicability data affected by given updated
repositories.
:param repo_criteria: The repo selection criteria
:type repo_criteria: dict
"""
repo_criteria = Criteria.from_dict(repo_criteria)
# Process repo criteria
repo_criteria.fields = ['id']
repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)]
task_group_id = uuid4()
for repo_id in repo_ids:
profile_hashes = RepoProfileApplicability.get_collection().find(
{'repo_id': repo_id}, {'profile_hash': 1})
for batch in paginate(profile_hashes, 10):
batch_regenerate_applicability_task.apply_async((repo_id, batch),
**{'group_id': task_group_id})
return task_group_id
@staticmethod
def batch_regenerate_applicability(repo_id, profile_hashes):
"""
Regenerate and save applicability data for a batch of existing applicabilities
:param repo_id: Repository id for which applicability is being calculated
:type repo_id: str
:param profile_hashes: Tuple of consumer profile hashes for applicability profiles.
Don't pass too much of these, all the profile data
associated with these hashes is loaded into the memory.
:type profile_hashes: tuple of dicts in form of {'profile_hash': str}
"""
profile_hash_list = [phash['profile_hash'] for phash in profile_hashes]
existing_applicabilities = RepoProfileApplicability.get_collection().find(
{"repo_id": repo_id, "profile_hash": {"$in": profile_hash_list}})
for existing_applicability in list(existing_applicabilities):
# Convert cursor to RepoProfileApplicability object
existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
profile_hash = existing_applicability['profile_hash']
unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
projection=['id', 'content_type'])
if unit_profile is None:
# Unit profiles change whenever packages are installed or removed on consumers,
# and it is possible that existing_applicability references a UnitProfile
# that no longer exists. This is harmless, as Pulp has a monthly cleanup task
# that will identify these dangling references and remove them.
continue
# Regenerate applicability data for given unit_profile and repo id
ApplicabilityRegenerationManager.regenerate_applicability(
profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
existing_applicability)
@staticmethod
def regenerate_applicability(profile_hash, content_type, profile_id,
bound_repo_id, existing_applicability=None):
"""
Regenerate and save applicability data for given profile and bound repo id.
If existing_applicability is not None, replace it with the new applicability data.
:param profile_hash: hash of the unit profile
:type profile_hash: basestring
:param content_type: profile (unit) type ID
:type content_type: str
:param profile_id: unique id of the unit profile
:type profile_id: str
:param bound_repo_id: repo id to be used to calculate applicability
against the given unit profile
:type bound_repo_id: str
:param existing_applicability: existing RepoProfileApplicability object to be replaced
:type existing_applicability: pulp.server.db.model.consumer.RepoProfileApplicability
"""
profiler_conduit = ProfilerConduit()
# Get the profiler for content_type of given unit_profile
profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler(content_type)
# Check if the profiler supports applicability, else return
if profiler.calculate_applicable_units == Profiler.calculate_applicable_units:
# If base class calculate_applicable_units method is called,
# skip applicability regeneration
return
# Find out which content types have unit counts greater than zero in the bound repo
repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types(
bound_repo_id)
# Get the intersection of existing types in the repo and the types that the profiler
# handles. If the intersection is not empty, regenerate applicability
if (set(repo_content_types) & set(profiler.metadata()['types'])):
# Get the actual profile for existing_applicability or lookup using profile_id
if existing_applicability:
profile = existing_applicability.profile
else:
unit_profile = UnitProfile.get_collection().find_one({'id': profile_id},
projection=['profile'])
try:
profile = unit_profile['profile']
except TypeError:
# It means that unit_profile = None.
# Consumer can be removed during applicability regeneration,
# so it is possible that its profile no longer exists. It is harmless.
return
call_config = PluginCallConfiguration(plugin_config=profiler_cfg,
repo_plugin_config=None)
try:
applicability = profiler.calculate_applicable_units(profile,
bound_repo_id,
call_config,
profiler_conduit)
except NotImplementedError:
msg = "Profiler for content type [%s] does not support applicability" % content_type
_logger.debug(msg)
return
try:
# Create a new RepoProfileApplicability object and save it in the db
RepoProfileApplicability.objects.create(profile_hash,
bound_repo_id,
profile,
applicability)
except DuplicateKeyError:
# Update existing applicability
if not existing_applicability:
applicability_dict = RepoProfileApplicability.get_collection().find_one(
{'repo_id': bound_repo_id, 'profile_hash': profile_hash})
existing_applicability = RepoProfileApplicability(**applicability_dict)
existing_applicability.applicability = applicability
existing_applicability.save()
@staticmethod
def _get_existing_repo_content_types(repo_id):
"""
For the given repo_id, return a list of content_type_ids that have content units counts
greater than 0.
:param repo_id: The repo_id for the repository that we wish to know the unit types contained
therein
:type repo_id: basestring
:return: A list of content type ids that have unit counts greater than 0
:rtype: list
"""
repo_obj = model.Repository.objects.get(repo_id=repo_id)
if not repo_obj:
return []
repo_content_types_with_non_zero_unit_count = []
for content_type, count in repo_obj.content_unit_counts.items():
if count > 0:
repo_content_types_with_non_zero_unit_count.append(content_type)
return repo_content_types_with_non_zero_unit_count
@staticmethod
def _is_existing_applicability(repo_id, profile_hash):
"""
Check if applicability for given repo and profle hash is already calculated.
:param repo_id: repo id
:type repo_id: basestring
:param profile_hash: unit profile hash
:type profile_hash: basestring
:return: true if applicability exists, false otherwise
:type: boolean
"""
query_params = {'repo_id': repo_id, 'profile_hash': profile_hash}
if RepoProfileApplicability.get_collection().find_one(query_params, projection=['_id']):
return True
return False
@staticmethod
def _profiler(type_id):
"""
Find the profiler.
Returns the Profiler base class when not matched.
:param type_id: The content type ID.
:type type_id: str
:return: (profiler, cfg)
:rtype: tuple
"""
try:
plugin, cfg = plugin_api.get_profiler_by_type(type_id)
except plugin_exceptions.PluginNotFound:
plugin = Profiler()
cfg = {}
return plugin, cfg
regenerate_applicability_for_consumers = task(
ApplicabilityRegenerationManager.regenerate_applicability_for_consumers, base=Task,
ignore_result=True)
regenerate_applicability_for_repos = task(
ApplicabilityRegenerationManager.regenerate_applicability_for_repos, base=Task,
ignore_result=True)
batch_regenerate_applicability_task = task(
ApplicabilityRegenerationManager.batch_regenerate_applicability, base=Task,
ignore_results=True)
class DoesNotExist(Exception):
"""
An Exception to be raised when a get() is called on a manager with query parameters that do not
match an object in the database.
"""
pass
class MultipleObjectsReturned(Exception):
"""
An Exception to be raised when a get() is called on a manager that results in more than one
object being returned.
"""
pass
class RepoProfileApplicabilityManager(object):
"""
This class is useful for querying for RepoProfileApplicability objects in the database.
"""
def create(self, profile_hash, repo_id, profile, applicability):
"""
Create and return a RepoProfileApplicability object.
:param profile_hash: The hash of the profile that this object contains applicability data
for
:type profile_hash: basestring
:param repo_id: The repo ID that this applicability data is for
:type repo_id: basestring
:param profile: The entire profile that resulted in the profile_hash
:type profile: object
:param applicability: A dictionary structure mapping unit type IDs to lists of applicable
Unit IDs.
:type applicability: dict
:return: A new RepoProfileApplicability object
:rtype: pulp.server.db.model.consumer.RepoProfileApplicability
"""
applicability = RepoProfileApplicability(
profile_hash=profile_hash, repo_id=repo_id, profile=profile,
applicability=applicability)
applicability.save()
return applicability
def filter(self, query_params):
"""
Get a list of RepoProfileApplicability objects with the given MongoDB query dict.
:param query_params: A MongoDB query dictionary that selects RepoProfileApplicability
documents
:type query_params: dict
:return: A list of RepoProfileApplicability objects that match the given query
:rtype: list
"""
collection = RepoProfileApplicability.get_collection()
mongo_applicabilities = collection.find(query_params)
applicabilities = [RepoProfileApplicability(**dict(applicability))
for applicability in mongo_applicabilities]
return applicabilities
def get(self, query_params):
"""
Get a single RepoProfileApplicability object with the given MongoDB query dict. This
will raise a DoesNotExist if no such object exists. It will also raise
MultipleObjectsReturned if the query_dict was not specific enough to match just one
RepoProfileApplicability object.
:param query_params: A MongoDB query dictionary that selects a single
RepoProfileApplicability document
:type query_params: dict
:return: A RepoProfileApplicability object that matches the given query
:rtype: pulp.server.db.model.consumer.RepoProfileApplicability
"""
applicability = self.filter(query_params)
if not applicability:
raise DoesNotExist(_('The RepoProfileApplicability object does not exist.'))
if len(applicability) > 1:
error_message = _('The given query matched %(num)s documents.')
error_message = error_message % {'num': len(applicability)}
raise MultipleObjectsReturned(error_message)
return applicability[0]
@staticmethod
def remove_orphans():
"""
The RepoProfileApplicability objects can become orphaned over time, as repositories are
deleted, or as consumer profiles change. This method searches for RepoProfileApplicability
objects that reference either repositories or profile hashes that no longer exist in Pulp.
"""
# Find all of the repo_ids that are referenced by RepoProfileApplicability objects
rpa_collection = RepoProfileApplicability.get_collection()
rpa_repo_ids = rpa_collection.distinct('repo_id')
# Find all of the repo_ids that exist in Pulp
repo_ids = model.Repository.objects.distinct('repo_id')
# Find rpa_repo_ids that aren't part of repo_ids
missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids))
# Remove all RepoProfileApplicability objects that reference these repo_ids
if missing_repo_ids:
rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}})
# Next, we need to find profile_hashes that don't exist in the UnitProfile collection
rpa_profile_hashes = rpa_collection.distinct('profile_hash')
# Find the profile hashes that exist in current UnitProfiles
profile_hashes = UnitProfile.get_collection().distinct('profile_hash')
# Find profile hashes that we have RepoProfileApplicability objects for, but no real
# UnitProfiles
missing_profile_hashes = list(set(rpa_profile_hashes) - set(profile_hashes))
# Remove all RepoProfileApplicability objects that reference these profile hashes
if missing_profile_hashes:
rpa_collection.remove({'profile_hash': {'$in': missing_profile_hashes}})
# Instantiate one of the managers on the object it manages for convenience
RepoProfileApplicability.objects = RepoProfileApplicabilityManager()
def retrieve_consumer_applicability(consumer_criteria, content_types=None):
"""
Query content applicability for consumers matched by a given consumer_criteria, optionally
limiting by content type.
This method returns a list of dictionaries that each have two
keys: 'consumers', and 'applicability'. 'consumers' will index a list of consumer_ids,
for consumers that have the same repository bindings and profiles. 'applicability' will
index a dictionary that will have keys for each content type that is applicable, and the
content type ids will index the applicability data for those content types. For example,
[{'consumers': ['consumer_1', 'consumer_2'],
'applicability': {'content_type_1': ['unit_1', 'unit_3']}},
{'consumers': ['consumer_2', 'consumer_3'],
'applicability': {'content_type_1': ['unit_1', 'unit_2']}}]
:param consumer_ids: A list of consumer ids that the applicability data should be retrieved
against
:type consumer_ids: list
:param content_types: An optional list of content types that the caller wishes to limit the
results to. Defaults to None, which will return data for all types
:type content_types: list
:return: applicability data matching the consumer criteria query
:rtype: list
"""
# We only need the consumer ids
consumer_criteria['fields'] = ['id']
consumer_ids = [c['id'] for c in ConsumerQueryManager.find_by_criteria(consumer_criteria)]
consumer_map = dict([(c, {'profiles': [], 'repo_ids': []}) for c in consumer_ids])
# Fill out the mapping of consumer_ids to profiles, and store the list of profile_hashes
profile_hashes = _add_profiles_to_consumer_map_and_get_hashes(consumer_ids, consumer_map)
# Now add in repo_ids that the consumers are bound to
_add_repo_ids_to_consumer_map(consumer_ids, consumer_map)
# We don't need the list of consumer_ids anymore, so let's free a little RAM
del consumer_ids
# Now lets get all RepoProfileApplicability objects that have the profile hashes for our
# consumers
applicability_map = _get_applicability_map(profile_hashes, content_types)
# We don't need the profile_hashes anymore, so let's free some RAM
del profile_hashes
# Now we need to add consumers who match the applicability data to the applicability_map
_add_consumers_to_applicability_map(consumer_map, applicability_map)
# We don't need the consumer_map anymore, so let's free it up
del consumer_map
# Collate all the entries for the same sets of consumers together
consumer_applicability_map = _get_consumer_applicability_map(applicability_map)
# Free the applicability_map, we don't need it anymore
del applicability_map
# Form the data into the expected output format and return
return _format_report(consumer_applicability_map)
def _add_consumers_to_applicability_map(consumer_map, applicability_map):
"""
For all consumers in the consumer_map, look for their profiles and repos in the
applicability_map, and if found, add the consumer_ids to the applicability_map.
:param consumer_map: A dictionary mapping consumer_ids to dictionaries with keys
'profiles' and 'repo_ids'. 'profiles' indexes a list of profiles
for each consumer_id, and 'repo_ids' indexes a list of repo_ids
that the consumer is bound to.
:type consumer_map: dict
:param applicability_map: The mapping of (profile_hash, repo_id) to applicability_data and
consumer_ids the data applies to. This method appends
consumer_ids to the appropriate lists of consumer_ids
:type applicability_map: dict
"""
for consumer_id, repo_profile_data in consumer_map.items():
for profile in repo_profile_data['profiles']:
for repo_id in repo_profile_data['repo_ids']:
repo_profile = (profile['profile_hash'], repo_id)
# Only add the consumer to the applicability map if there is applicability_data
# for this combination of repository and profile
if repo_profile in applicability_map:
applicability_map[repo_profile]['consumers'].append(consumer_id)
def _add_profiles_to_consumer_map_and_get_hashes(consumer_ids, consumer_map):
"""
Query for all the profiles associated with the given list of consumer_ids, add those
profiles to the consumer_map, and then return a list of all profile_hashes.
:param consumer_ids: A list of consumer_ids that we want to map the profiles to
:type consumer_ids: list
:param consumer_map: A dictionary mapping consumer_ids to a dictionary with key 'profiles',
which indexes a list that this method will append the found profiles
to.
:type consumer_map: dict
:return: A list of the profile_hashes that were associated with the given
consumers
:rtype: list
"""
profiles = UnitProfile.get_collection().find(
{'consumer_id': {'$in': consumer_ids}},
projection=['consumer_id', 'profile_hash'])
profile_hashes = set()
for p in profiles:
consumer_map[p['consumer_id']]['profiles'].append(p)
profile_hashes.add(p['profile_hash'])
# Let's return a list of all the unique profile_hashes for the query we will do a
# bit later for applicability data
return list(profile_hashes)
def _add_repo_ids_to_consumer_map(consumer_ids, consumer_map):
"""
Query for all bindings for the given list of consumer_ids, and for each one add the bound
repo_ids to the consumer_map's entry for the consumer.
:param consumer_ids: The list of consumer_ids. We could pull this from the consumer_map,
but since we already have this list it's probably more performant to
use it as is.
:type consumer_ids: list
:param consumer_map: A dictionary mapping consumer_ids to a dictionary with key 'profiles',
which indexes a list that this method will append the found profiles
to.
:type consumer_map: dict
"""
bindings = Bind.get_collection().find(
{'consumer_id': {'$in': consumer_ids}},
projection=['consumer_id', 'repo_id'])
for b in bindings:
consumer_map[b['consumer_id']]['repo_ids'].append(b['repo_id'])
def _format_report(consumer_applicability_map):
"""
Turn the consumer_applicability_map into the expected response format for this API call.
:param consumer_applicability_map: A mapping of frozensets of consumers to their
applicability data
:type consumer_applicability_map: dict
:return: A list of dictionaries that have two keys, consumers
and applicability. consumers indexes a list of
consumer_ids, and applicability indexes the
applicability data for those consumer_ids.
:rtype: list
"""
report = []
for consumers, applicability in consumer_applicability_map.iteritems():
# If there are no consumers for this applicability data, there is no need to include
# it in the report
if consumers:
applicability_data = {'consumers': list(consumers),
'applicability': applicability}
report.append(applicability_data)
return report
def _get_applicability_map(profile_hashes, content_types):
"""
Build an "applicability_map", which is a dictionary that maps tuples of
(profile_hash, repo_id) to a dictionary of applicability data and consumer_ids. The
consumer_ids are just initialized to an empty list, so that a later method can add
consumers to it. For example, it might look like:
{('profile_hash_1', 'repo_1'): {'applicability': {<applicability_data>}, 'consumers': []}}
:param profile_hashes: A list of profile hashes that the applicabilities should be queried
with. The applicability map is initialized with all applicability
data for all the given profile_hashes.
:type profile_hashes: list
:param content_types: If not None, content_types is a list of content_types to
be included in the applicability data within the
applicability_map
:type content_types: list or None
:return: The applicability map
:rtype: dict
"""
applicabilities = RepoProfileApplicability.get_collection().find(
{'profile_hash': {'$in': profile_hashes}},
projection=['profile_hash', 'repo_id', 'applicability'])
return_value = {}
for a in applicabilities:
if content_types is not None:
# The caller has requested us to filter by content_type, so we need to look through
# the applicability data and filter out the unwanted content types. Some
# applicabilities may end up being empty if they don't have any data for the
# requested types, so we'll build a list of those to remove
for key in a['applicability'].keys():
if key not in content_types:
del a['applicability'][key]
# If a doesn't have anything worth reporting, move on to the next applicability
if not a['applicability']:
continue
return_value[(a['profile_hash'], a['repo_id'])] = {'applicability': a['applicability'],
'consumers': []}
return return_value
def _get_consumer_applicability_map(applicability_map):
"""
Massage the applicability_map into a form that will help us to collate applicability
groups that contain the same data together.
:param applicability_map: The mapping of (profile_hash, repo_id) to applicability_data and
consumer_ids it applies to. This method appends consumer_ids to
the appropriate lists of consumer_ids
:type applicability_map: dict
:return: The consumer_applicability_map, which maps frozensets of
consumer_ids to their collective applicability data.
:rtype: dict
"""
consumer_applicability_map = {}
for repo_profile, data in applicability_map.iteritems():
# This will be the key for our map, a set of the consumers that share data
consumers = frozenset(data['consumers'])
if consumers in consumer_applicability_map:
for content_type, applicability in data['applicability'].iteritems():
if content_type in consumer_applicability_map[consumers]:
# There is already applicability data for this consumer set and
# content type. We will convert the existing data and the new data to
# sets, generate the union of those sets, and turn it back into a list
# so that we can report unique units.
consumer_applicability_map[consumers][content_type] = list(
set(consumer_applicability_map[consumers][content_type]) |
set(applicability))
else:
# This consumer set does not already have applicability data for this type, so
# let's set applicability as the data for it
consumer_applicability_map[consumers][content_type] = applicability
else:
# This consumer set is not already part of the consumer_applicability_map, so we can
# set all the applicability data we have to this consumer set
consumer_applicability_map[consumers] = data['applicability']
return consumer_applicability_map