Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PR #1466/f6c9d1e7 backport][0.17] optimization for x-repo search index builds #1469

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/1467.bugfix
@@ -0,0 +1 @@
Increase collectionversion search index build speeds.
61 changes: 53 additions & 8 deletions pulp_ansible/app/tasks/collectionversion_index.py
Expand Up @@ -13,6 +13,8 @@
CrossRepositoryCollectionVersionIndex as CVIndex,
)

from pulpcore.plugin.models import RepositoryVersion


log = logging.getLogger(__name__)

Expand All @@ -35,6 +37,43 @@ def get_highest_version_string_from_cv_objects(cv_objects):
return str(versions[0])


def compute_repository_changes(repository_version):
"""Use the previous version to make a list of namespace(s).name(s) changed."""

# Figure out what the previous repo version is
repository = repository_version.repository
previous_number = repository_version.number - 1
previous_version = RepositoryVersion.objects.filter(
repository=repository, number=previous_number
).first()

# If there isn't a previous verison, all things have "changed"
if previous_version is None:
return None

changed_collections = set()

cv_type = CollectionVersion.get_pulp_type()
deprecation_type = AnsibleCollectionDeprecated.get_pulp_type()
signature_type = CollectionVersionSignature.get_pulp_type()

for func in [repository_version.added, repository_version.removed]:
for modified in func(base_version=previous_version):
if modified.pulp_type == cv_type:
cv = modified.ansible_collectionversion
changed_collections.add((cv.namespace, cv.name))
elif modified.pulp_type == deprecation_type:
deprecation = modified.ansible_ansiblecollectiondeprecated
changed_collections.add((deprecation.namespace, deprecation.name))
elif modified.pulp_type == signature_type:
signature = modified.ansible_collectionversionsignature
changed_collections.add(
(signature.signed_collection.namespace, signature.signed_collection.name)
)

return changed_collections


def update_index(distribution=None, repository=None, repository_version=None, is_latest=False):
"""Rebuild index by distribtion|repository|repositoryversion."""

Expand Down Expand Up @@ -93,17 +132,14 @@ def update_index(distribution=None, repository=None, repository_version=None, is
if CVIndex.objects.filter(repository_version=repository_version).exists():
return

# What has changed between this version and the last?
changed_collections = compute_repository_changes(repository_version)

# get all CVs in this repository version
cvs = repository_version.content.filter(pulp_type="ansible.collection_version").values_list(
cvs_pks = repository_version.content.filter(pulp_type="ansible.collection_version").values_list(
"pk", flat=True
)
cvs = CollectionVersion.objects.filter(pk__in=cvs)

# clean out cvs no longer in the repo when a distro w/ a repo
if not use_repository_version:
CVIndex.objects.filter(repository=repository, repository_version=None).exclude(
collection_version__pk__in=cvs
).delete()
cvs = CollectionVersion.objects.filter(pk__in=cvs_pks)

# get the set of signatures in this repo version
repo_signatures_pks = repository_version.content.filter(
Expand All @@ -129,10 +165,19 @@ def update_index(distribution=None, repository=None, repository_version=None, is
if use_repository_version:
repo_v = repository_version

# clean out cvs no longer in the repo when a distro w/ a repo
if not use_repository_version:
CVIndex.objects.filter(repository=repository, repository_version=None).exclude(
collection_version__pk__in=cvs
).delete()

# iterate through each collection in the repository
for colkey in colset:
namespace, name = colkey

if changed_collections is not None and (namespace, name) not in changed_collections:
continue

# get all the versions for this collection
related_cvs = cvs.filter(namespace=namespace, name=name).only("version")

Expand Down
Expand Up @@ -762,8 +762,6 @@ def _run_search(self, search_client, specs, specs_filter, search_filters):

comparison = compare_keys(skeys, rkeys)

# import epdb; epdb.st()

assert len(skeys) == len(rkeys), comparison

@pytest.mark.pulp_on_localhost
Expand Down Expand Up @@ -1389,3 +1387,50 @@ def test_cross_repo_search_index_on_deleted_distro_with_another_still_remaining(
limit=1000, repository_name=[pulp_repo.name], repository_version="latest"
)
assert resp.meta.count == 1


def test_cross_repo_search_index_on_distribution_with_repository_and_deprecation(
ansible_collection_deprecations_api_client,
ansible_distro_api_client,
ansible_repo_api_client,
ansible_repo_version_api_client,
build_and_upload_collection,
galaxy_v3_collections_api_client,
galaxy_v3_default_search_api_client,
gen_object_with_cleanup,
monitor_task,
):
"""Make sure indexes are marking deprecations."""

pulp_repo = gen_object_with_cleanup(ansible_repo_api_client, {"name": str(uuid.uuid4())})
col = build_and_upload_collection(ansible_repo=pulp_repo)

# make a distro that points only at the latest repo version ...
distro = gen_object_with_cleanup(
ansible_distro_api_client,
{
"name": pulp_repo.name,
"base_path": pulp_repo.name,
"repository": pulp_repo.pulp_href,
},
)

# make a deprecation
namespace = col[0].namespace
name = col[0].name
monitor_task(
galaxy_v3_collections_api_client.update(
name,
namespace,
pulp_repo.name,
{"deprecated": True},
).task
)

# make sure the CV was indexed
dist_id = distro.pulp_href.split("/")[-2]
resp = galaxy_v3_default_search_api_client.list(limit=1000, distribution=[dist_id])
assert resp.meta.count == 1, resp

# did it get properly marked as deprecated?
assert resp.data[0].is_deprecated, resp