Skip to content

Commit

Permalink
Improve publish speed by 2x to 20x
Browse files Browse the repository at this point in the history
Use a more complex single query rather than N small queries to iterate
content artifacts.

Using a repositoriy containing 20,000 content units, the publish time
is improved from ~23 seconds to 1 second if the content is immediate
synced, and 14 seconds (vs. the same) if content is on_demand.

closes: #8508
https://pulp.plan.io/issues/8508
  • Loading branch information
dralley committed Apr 6, 2021
1 parent be1c401 commit 69182ee
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGES/8508.misc
@@ -0,0 +1 @@
Substantially improved speed of publishing repositories, especially large ones.
44 changes: 24 additions & 20 deletions pulp_file/app/tasks/publishing.py
Expand Up @@ -5,9 +5,14 @@

from django.core.files import File

from pulpcore.plugin.models import RepositoryVersion, PublishedMetadata, RemoteArtifact

from pulp_file.app.models import FileContent, FilePublication
from pulpcore.plugin.models import (
ContentArtifact,
RepositoryVersion,
PublishedMetadata,
RemoteArtifact,
)

from pulp_file.app.models import FilePublication
from pulp_file.manifest import Entry, Manifest


Expand Down Expand Up @@ -57,20 +62,19 @@ def populate(publication):
"""

def find_artifact():
_artifact = content_artifact.artifact
if not _artifact:
_artifact = RemoteArtifact.objects.filter(content_artifact=content_artifact).first()
return _artifact

for content in FileContent.objects.filter(
pk__in=publication.repository_version.content
).order_by("-pulp_created"):
for content_artifact in content.contentartifact_set.all():
artifact = find_artifact()
entry = Entry(
relative_path=content_artifact.relative_path,
digest=artifact.sha256,
size=artifact.size,
)
yield entry
content_artifacts = ContentArtifact.objects.filter(
content__in=publication.repository_version.content
).order_by("-content__pulp_created")

for content_artifact in content_artifacts.select_related("artifact").iterator():
if content_artifact.artifact:
artifact = content_artifact.artifact
else:
# TODO: this scales poorly, one query per on_demand content being published.
artifact = RemoteArtifact.objects.filter(content_artifact=content_artifact).first()
entry = Entry(
relative_path=content_artifact.relative_path,
digest=artifact.sha256,
size=artifact.size,
)
yield entry

0 comments on commit 69182ee

Please sign in to comment.