Skip to content

Commit

Permalink
Add cleanup tasks to remove stale objects
Browse files Browse the repository at this point in the history
Uploads and PulpTemporaryFiles are cleaned up. Set the *_PROTECTION_TIME
settings to None in order to disable this behaviour.

fixes pulp#3949
  • Loading branch information
mdellweg committed Jun 23, 2023
1 parent d1de78b commit 4ae0638
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGES/3949.feature
@@ -0,0 +1 @@
Added periodically executed cleanup tasks for uploads and temporary files.
4 changes: 3 additions & 1 deletion pulpcore/app/settings.py
Expand Up @@ -240,8 +240,10 @@

WORKER_TTL = 30

# how long to protect orphan content in minutes
# how long to protect ephemeral items in minutes
ORPHAN_PROTECTION_TIME = 24 * 60
UPLOAD_PROTECTION_TIME = 24 * 60
TMPFILE_PROTECTION_TIME = 24 * 60

REMOTE_USER_ENVIRON_NAME = "REMOTE_USER"

Expand Down
93 changes: 48 additions & 45 deletions pulpcore/app/tasks/orphan.py
@@ -1,12 +1,15 @@
import gc

from django.conf import settings
from django.utils import timezone

from pulpcore.app.models import (
Artifact,
Content,
ProgressReport,
PublishedMetadata,
PulpTemporaryFile,
Upload,
)


Expand Down Expand Up @@ -41,56 +44,56 @@ def orphan_cleanup(content_pks=None, orphan_protection_time=settings.ORPHAN_PROT
content_pks (list): A list of content pks. If specified, only remove these orphans.
"""
progress_bar = ProgressReport(
with ProgressReport(
message="Clean up orphan Content",
total=0,
total=None,
code="clean-up.content",
done=0,
state="running",
)

while True:
content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude(
pulp_type=PublishedMetadata.get_pulp_type()
)
content_count = content.count()
if not content_count:
break

progress_bar.total += content_count
progress_bar.save()

# delete the content
for c in queryset_iterator(content):
progress_bar.increase_by(c.count())
c.delete()

progress_bar.state = "completed"
progress_bar.save()
) as progress_bar:
while True:
content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude(
pulp_type=PublishedMetadata.get_pulp_type()
)
content_count = content.count()
if not content_count:
break

# delete the content
for c in queryset_iterator(content):
progress_bar.increase_by(c.count())
c.delete()

# delete the artifacts that don't belong to any content
artifacts = Artifact.objects.orphaned(orphan_protection_time)

progress_bar = ProgressReport(
with ProgressReport(
message="Clean up orphan Artifacts",
total=artifacts.count(),
code="clean-up.content",
done=0,
state="running",
)
progress_bar.save()

counter = 0
interval = 100
for artifact in artifacts.iterator():
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()
progress_bar.done += 1
counter += 1

if counter >= interval:
progress_bar.save()
counter = 0

progress_bar.state = "completed"
progress_bar.save()
code="clean-up.artifacts",
) as progress_bar:
for artifact in progress_bar.iter(artifacts.iterator()):
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()


def upload_cleanup():
expiration = timezone.now() - timezone.timedelta(minutes=settings.UPLOAD_PROTECTION_TIME)
qs = Upload.objects.filter(pulp_created__lt=expiration)
with ProgressReport(
message="Clean up uploads",
total=qs.count(),
code="clean-up.uploads",
) as pr:
for upload in pr.iter(qs):
upload.delete()


def tmpfile_cleanup():
expiration = timezone.now() - timezone.timedelta(minutes=settings.TMPFILE_PROTECTION_TIME)
qs = PulpTemporaryFile.objects.filter(pulp_created__lt=expiration)
with ProgressReport(
message="Clean up shared temporary files",
total=qs.count(),
code="clean-up.tmpfiles",
) as pr:
for tmpfile in pr.iter(qs):
tmpfile.delete()
19 changes: 19 additions & 0 deletions pulpcore/app/util.py
Expand Up @@ -311,6 +311,25 @@ def configure_analytics():
models.TaskSchedule.objects.filter(task_name=task_name).delete()


def configure_cleanup():
for name, task_name, protection_time in [
("uploads", "pulpcore.app.tasks.orphan.upload_cleanup", settings.UPLOAD_PROTECTION_TIME),
(
"shared temporary files",
"pulpcore.app.tasks.orphan.tmpfile_cleanup",
settings.TMPFILE_PROTECTION_TIME,
),
]:
if protection_time:
dispatch_interval = timedelta(minutes=protection_time)
name = f"Clean up stale {name} periodically"
models.TaskSchedule.objects.update_or_create(
name=name, defaults={"task_name": task_name, "dispatch_interval": dispatch_interval}
)
else:
models.TaskSchedule.objects.filter(task_name=task_name).delete()


@lru_cache(maxsize=1)
def _artifact_serving_distribution():
return models.ArtifactDistribution.objects.get()
Expand Down
2 changes: 2 additions & 0 deletions pulpcore/tasking/pulpcore_worker.py
Expand Up @@ -24,6 +24,7 @@

from pulpcore.app.util import (
configure_analytics,
configure_cleanup,
set_domain,
set_current_user,
)
Expand Down Expand Up @@ -57,6 +58,7 @@

def startup_hook():
configure_analytics()
configure_cleanup()


class PGAdvisoryLock:
Expand Down

0 comments on commit 4ae0638

Please sign in to comment.