Skip to content

Commit

Permalink
optimization for x-repo search index builds (#1466)
Browse files Browse the repository at this point in the history
* Bypass re-indexing collections which have not changed in the new repo version.

fixes #1467

Signed-off-by: James Tanner <tanner.jc@gmail.com>
  • Loading branch information
jctanner committed May 25, 2023
1 parent e6610b2 commit f6c9d1e
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGES/1467.bugfix
@@ -0,0 +1 @@
Increase collectionversion search index build speeds.
61 changes: 53 additions & 8 deletions pulp_ansible/app/tasks/collectionversion_index.py
Expand Up @@ -13,6 +13,8 @@
CrossRepositoryCollectionVersionIndex as CVIndex,
)

from pulpcore.plugin.models import RepositoryVersion


log = logging.getLogger(__name__)

Expand All @@ -35,6 +37,43 @@ def get_highest_version_string_from_cv_objects(cv_objects):
return str(versions[0])


def compute_repository_changes(repository_version):
"""Use the previous version to make a list of namespace(s).name(s) changed."""

# Figure out what the previous repo version is
repository = repository_version.repository
previous_number = repository_version.number - 1
previous_version = RepositoryVersion.objects.filter(
repository=repository, number=previous_number
).first()

# If there isn't a previous verison, all things have "changed"
if previous_version is None:
return None

changed_collections = set()

cv_type = CollectionVersion.get_pulp_type()
deprecation_type = AnsibleCollectionDeprecated.get_pulp_type()
signature_type = CollectionVersionSignature.get_pulp_type()

for func in [repository_version.added, repository_version.removed]:
for modified in func(base_version=previous_version):
if modified.pulp_type == cv_type:
cv = modified.ansible_collectionversion
changed_collections.add((cv.namespace, cv.name))
elif modified.pulp_type == deprecation_type:
deprecation = modified.ansible_ansiblecollectiondeprecated
changed_collections.add((deprecation.namespace, deprecation.name))
elif modified.pulp_type == signature_type:
signature = modified.ansible_collectionversionsignature
changed_collections.add(
(signature.signed_collection.namespace, signature.signed_collection.name)
)

return changed_collections


def update_index(distribution=None, repository=None, repository_version=None, is_latest=False):
"""Rebuild index by distribtion|repository|repositoryversion."""

Expand Down Expand Up @@ -93,17 +132,14 @@ def update_index(distribution=None, repository=None, repository_version=None, is
if CVIndex.objects.filter(repository_version=repository_version).exists():
return

# What has changed between this version and the last?
changed_collections = compute_repository_changes(repository_version)

# get all CVs in this repository version
cvs = repository_version.content.filter(pulp_type="ansible.collection_version").values_list(
cvs_pks = repository_version.content.filter(pulp_type="ansible.collection_version").values_list(
"pk", flat=True
)
cvs = CollectionVersion.objects.filter(pk__in=cvs)

# clean out cvs no longer in the repo when a distro w/ a repo
if not use_repository_version:
CVIndex.objects.filter(repository=repository, repository_version=None).exclude(
collection_version__pk__in=cvs
).delete()
cvs = CollectionVersion.objects.filter(pk__in=cvs_pks)

# get the set of signatures in this repo version
repo_signatures_pks = repository_version.content.filter(
Expand All @@ -129,10 +165,19 @@ def update_index(distribution=None, repository=None, repository_version=None, is
if use_repository_version:
repo_v = repository_version

# clean out cvs no longer in the repo when a distro w/ a repo
if not use_repository_version:
CVIndex.objects.filter(repository=repository, repository_version=None).exclude(
collection_version__pk__in=cvs
).delete()

# iterate through each collection in the repository
for colkey in colset:
namespace, name = colkey

if changed_collections is not None and (namespace, name) not in changed_collections:
continue

# get all the versions for this collection
related_cvs = cvs.filter(namespace=namespace, name=name).only("version")

Expand Down
Expand Up @@ -762,8 +762,6 @@ def _run_search(self, search_client, specs, specs_filter, search_filters):

comparison = compare_keys(skeys, rkeys)

# import epdb; epdb.st()

assert len(skeys) == len(rkeys), comparison

@pytest.mark.pulp_on_localhost
Expand Down Expand Up @@ -1389,3 +1387,50 @@ def test_cross_repo_search_index_on_deleted_distro_with_another_still_remaining(
limit=1000, repository_name=[pulp_repo.name], repository_version="latest"
)
assert resp.meta.count == 1


def test_cross_repo_search_index_on_distribution_with_repository_and_deprecation(
ansible_collection_deprecations_api_client,
ansible_distro_api_client,
ansible_repo_api_client,
ansible_repo_version_api_client,
build_and_upload_collection,
galaxy_v3_collections_api_client,
galaxy_v3_default_search_api_client,
gen_object_with_cleanup,
monitor_task,
):
"""Make sure indexes are marking deprecations."""

pulp_repo = gen_object_with_cleanup(ansible_repo_api_client, {"name": str(uuid.uuid4())})
col = build_and_upload_collection(ansible_repo=pulp_repo)

# make a distro that points only at the latest repo version ...
distro = gen_object_with_cleanup(
ansible_distro_api_client,
{
"name": pulp_repo.name,
"base_path": pulp_repo.name,
"repository": pulp_repo.pulp_href,
},
)

# make a deprecation
namespace = col[0].namespace
name = col[0].name
monitor_task(
galaxy_v3_collections_api_client.update(
name,
namespace,
pulp_repo.name,
{"deprecated": True},
).task
)

# make sure the CV was indexed
dist_id = distro.pulp_href.split("/")[-2]
resp = galaxy_v3_default_search_api_client.list(limit=1000, distribution=[dist_id])
assert resp.meta.count == 1, resp

# did it get properly marked as deprecated?
assert resp.data[0].is_deprecated, resp

0 comments on commit f6c9d1e

Please sign in to comment.