From 5795c57bc347365e169b55891b93a08ce8e647f4 Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Fri, 23 Jun 2023 14:21:41 +0200 Subject: [PATCH] Add cleanup tasks to remove stale objects Uploads and PulpTemporaryFiles are cleaned up. Set the *_PROTECTION_TIME settings to None in order to disable this behaviour. fixes #3949 --- CHANGES/3949.feature | 2 + docs/configuration/settings.rst | 11 ++++ pulpcore/app/settings.py | 4 +- pulpcore/app/tasks/orphan.py | 95 +++++++++++++++-------------- pulpcore/app/util.py | 19 ++++++ pulpcore/tasking/pulpcore_worker.py | 2 + 6 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 CHANGES/3949.feature diff --git a/CHANGES/3949.feature b/CHANGES/3949.feature new file mode 100644 index 0000000000..c50a905760 --- /dev/null +++ b/CHANGES/3949.feature @@ -0,0 +1,2 @@ +Added periodically executed cleanup tasks for uploads and temporary files. Configure a time +interval in ``UPLOAD_PROTECTION_TIME`` or ``TMPFILE_PROTECTION_TIME`` to activate. diff --git a/docs/configuration/settings.rst b/docs/configuration/settings.rst index 4b9628b66a..3946b59b3f 100644 --- a/docs/configuration/settings.rst +++ b/docs/configuration/settings.rst @@ -442,6 +442,17 @@ ORPHAN_PROTECTION_TIME up before the task finishes. Default is 1440 minutes (24 hours). +.. _upload_protection_time: +.. _tmpfile_protection_time: + +UPLOAD_PROTECTION_TIME and TMPFILE_PROTECTION_TIME +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Pulp uses ``uploads`` and ``pulp temporary files`` to pass data from the api to worker tasks. + These options allow to specify a timeinterval in minutes used for cleaning up stale entries. If + set to 0, automatic cleanup is disabled, which is the default. + + .. _task_diagnostics: TASK_DIAGNOSTICS diff --git a/pulpcore/app/settings.py b/pulpcore/app/settings.py index fcc5968ccc..8544cb56b8 100644 --- a/pulpcore/app/settings.py +++ b/pulpcore/app/settings.py @@ -240,8 +240,10 @@ WORKER_TTL = 30 -# how long to protect orphan content in minutes +# how long to protect ephemeral items in minutes ORPHAN_PROTECTION_TIME = 24 * 60 +UPLOAD_PROTECTION_TIME = 0 +TMPFILE_PROTECTION_TIME = 0 REMOTE_USER_ENVIRON_NAME = "REMOTE_USER" diff --git a/pulpcore/app/tasks/orphan.py b/pulpcore/app/tasks/orphan.py index 471ed4f082..212b420512 100644 --- a/pulpcore/app/tasks/orphan.py +++ b/pulpcore/app/tasks/orphan.py @@ -1,12 +1,15 @@ import gc from django.conf import settings +from django.utils import timezone from pulpcore.app.models import ( Artifact, Content, ProgressReport, PublishedMetadata, + PulpTemporaryFile, + Upload, ) @@ -41,56 +44,58 @@ def orphan_cleanup(content_pks=None, orphan_protection_time=settings.ORPHAN_PROT content_pks (list): A list of content pks. If specified, only remove these orphans. """ - progress_bar = ProgressReport( + with ProgressReport( message="Clean up orphan Content", - total=0, + total=None, code="clean-up.content", - done=0, - state="running", - ) - - while True: - content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude( - pulp_type=PublishedMetadata.get_pulp_type() - ) - content_count = content.count() - if not content_count: - break - - progress_bar.total += content_count - progress_bar.save() - - # delete the content - for c in queryset_iterator(content): - progress_bar.increase_by(c.count()) - c.delete() - - progress_bar.state = "completed" - progress_bar.save() + ) as progress_bar: + while True: + content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude( + pulp_type=PublishedMetadata.get_pulp_type() + ) + content_count = content.count() + if not content_count: + break + + # delete the content + for c in queryset_iterator(content): + progress_bar.increase_by(c.count()) + c.delete() # delete the artifacts that don't belong to any content artifacts = Artifact.objects.orphaned(orphan_protection_time) - progress_bar = ProgressReport( + with ProgressReport( message="Clean up orphan Artifacts", total=artifacts.count(), - code="clean-up.content", - done=0, - state="running", - ) - progress_bar.save() - - counter = 0 - interval = 100 - for artifact in artifacts.iterator(): - # we need to manually call delete() because it cleans up the file on the filesystem - artifact.delete() - progress_bar.done += 1 - counter += 1 - - if counter >= interval: - progress_bar.save() - counter = 0 - - progress_bar.state = "completed" - progress_bar.save() + code="clean-up.artifacts", + ) as progress_bar: + for artifact in progress_bar.iter(artifacts.iterator()): + # we need to manually call delete() because it cleans up the file on the filesystem + artifact.delete() + + +def upload_cleanup(): + assert settings.UPLOAD_PROTECTION_TIME > 0 + expiration = timezone.now() - timezone.timedelta(minutes=settings.UPLOAD_PROTECTION_TIME) + qs = Upload.objects.filter(pulp_created__lt=expiration) + with ProgressReport( + message="Clean up uploads", + total=qs.count(), + code="clean-up.uploads", + ) as pr: + for upload in pr.iter(qs): + upload.delete() + + +def tmpfile_cleanup(): + assert settings.TMPFILE_PROTECTION_TIME > 0 + expiration = timezone.now() - timezone.timedelta(minutes=settings.TMPFILE_PROTECTION_TIME) + qs = PulpTemporaryFile.objects.filter(pulp_created__lt=expiration) + with ProgressReport( + message="Clean up shared temporary files", + total=qs.count(), + code="clean-up.tmpfiles", + ) as pr: + for tmpfile in pr.iter(qs): + tmpfile.delete() diff --git a/pulpcore/app/util.py b/pulpcore/app/util.py index 6d52b3f246..5211190f79 100644 --- a/pulpcore/app/util.py +++ b/pulpcore/app/util.py @@ -311,6 +311,25 @@ def configure_analytics(): models.TaskSchedule.objects.filter(task_name=task_name).delete() +def configure_cleanup(): + for name, task_name, protection_time in [ + ("uploads", "pulpcore.app.tasks.orphan.upload_cleanup", settings.UPLOAD_PROTECTION_TIME), + ( + "shared temporary files", + "pulpcore.app.tasks.orphan.tmpfile_cleanup", + settings.TMPFILE_PROTECTION_TIME, + ), + ]: + if protection_time > 0: + dispatch_interval = timedelta(minutes=protection_time) + name = f"Clean up stale {name} periodically" + models.TaskSchedule.objects.update_or_create( + name=name, defaults={"task_name": task_name, "dispatch_interval": dispatch_interval} + ) + else: + models.TaskSchedule.objects.filter(task_name=task_name).delete() + + @lru_cache(maxsize=1) def _artifact_serving_distribution(): return models.ArtifactDistribution.objects.get() diff --git a/pulpcore/tasking/pulpcore_worker.py b/pulpcore/tasking/pulpcore_worker.py index d1437fd5e6..af3e245b6a 100644 --- a/pulpcore/tasking/pulpcore_worker.py +++ b/pulpcore/tasking/pulpcore_worker.py @@ -24,6 +24,7 @@ from pulpcore.app.util import ( configure_analytics, + configure_cleanup, set_domain, set_current_user, ) @@ -57,6 +58,7 @@ def startup_hook(): configure_analytics() + configure_cleanup() class PGAdvisoryLock: