Skip to content

Commit

Permalink
Add cleanup tasks to remove stale objects
Browse files Browse the repository at this point in the history
Uploads and PulpTemporaryFiles are cleaned up. Set the *_PROTECTION_TIME
settings to None in order to disable this behaviour.

fixes pulp#3949
  • Loading branch information
mdellweg committed Jun 29, 2023
1 parent d1de78b commit 5795c57
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 46 deletions.
2 changes: 2 additions & 0 deletions CHANGES/3949.feature
@@ -0,0 +1,2 @@
Added periodically executed cleanup tasks for uploads and temporary files. Configure a time
interval in ``UPLOAD_PROTECTION_TIME`` or ``TMPFILE_PROTECTION_TIME`` to activate.
11 changes: 11 additions & 0 deletions docs/configuration/settings.rst
Expand Up @@ -442,6 +442,17 @@ ORPHAN_PROTECTION_TIME
up before the task finishes. Default is 1440 minutes (24 hours).


.. _upload_protection_time:
.. _tmpfile_protection_time:

UPLOAD_PROTECTION_TIME and TMPFILE_PROTECTION_TIME
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Pulp uses ``uploads`` and ``pulp temporary files`` to pass data from the api to worker tasks.
These options allow to specify a timeinterval in minutes used for cleaning up stale entries. If
set to 0, automatic cleanup is disabled, which is the default.


.. _task_diagnostics:

TASK_DIAGNOSTICS
Expand Down
4 changes: 3 additions & 1 deletion pulpcore/app/settings.py
Expand Up @@ -240,8 +240,10 @@

WORKER_TTL = 30

# how long to protect orphan content in minutes
# how long to protect ephemeral items in minutes
ORPHAN_PROTECTION_TIME = 24 * 60
UPLOAD_PROTECTION_TIME = 0
TMPFILE_PROTECTION_TIME = 0

REMOTE_USER_ENVIRON_NAME = "REMOTE_USER"

Expand Down
95 changes: 50 additions & 45 deletions pulpcore/app/tasks/orphan.py
@@ -1,12 +1,15 @@
import gc

from django.conf import settings
from django.utils import timezone

from pulpcore.app.models import (
Artifact,
Content,
ProgressReport,
PublishedMetadata,
PulpTemporaryFile,
Upload,
)


Expand Down Expand Up @@ -41,56 +44,58 @@ def orphan_cleanup(content_pks=None, orphan_protection_time=settings.ORPHAN_PROT
content_pks (list): A list of content pks. If specified, only remove these orphans.
"""
progress_bar = ProgressReport(
with ProgressReport(
message="Clean up orphan Content",
total=0,
total=None,
code="clean-up.content",
done=0,
state="running",
)

while True:
content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude(
pulp_type=PublishedMetadata.get_pulp_type()
)
content_count = content.count()
if not content_count:
break

progress_bar.total += content_count
progress_bar.save()

# delete the content
for c in queryset_iterator(content):
progress_bar.increase_by(c.count())
c.delete()

progress_bar.state = "completed"
progress_bar.save()
) as progress_bar:
while True:
content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude(
pulp_type=PublishedMetadata.get_pulp_type()
)
content_count = content.count()
if not content_count:
break

# delete the content
for c in queryset_iterator(content):
progress_bar.increase_by(c.count())
c.delete()

# delete the artifacts that don't belong to any content
artifacts = Artifact.objects.orphaned(orphan_protection_time)

progress_bar = ProgressReport(
with ProgressReport(
message="Clean up orphan Artifacts",
total=artifacts.count(),
code="clean-up.content",
done=0,
state="running",
)
progress_bar.save()

counter = 0
interval = 100
for artifact in artifacts.iterator():
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()
progress_bar.done += 1
counter += 1

if counter >= interval:
progress_bar.save()
counter = 0

progress_bar.state = "completed"
progress_bar.save()
code="clean-up.artifacts",
) as progress_bar:
for artifact in progress_bar.iter(artifacts.iterator()):
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()


def upload_cleanup():
assert settings.UPLOAD_PROTECTION_TIME > 0
expiration = timezone.now() - timezone.timedelta(minutes=settings.UPLOAD_PROTECTION_TIME)
qs = Upload.objects.filter(pulp_created__lt=expiration)
with ProgressReport(
message="Clean up uploads",
total=qs.count(),
code="clean-up.uploads",
) as pr:
for upload in pr.iter(qs):
upload.delete()


def tmpfile_cleanup():
assert settings.TMPFILE_PROTECTION_TIME > 0
expiration = timezone.now() - timezone.timedelta(minutes=settings.TMPFILE_PROTECTION_TIME)
qs = PulpTemporaryFile.objects.filter(pulp_created__lt=expiration)
with ProgressReport(
message="Clean up shared temporary files",
total=qs.count(),
code="clean-up.tmpfiles",
) as pr:
for tmpfile in pr.iter(qs):
tmpfile.delete()
19 changes: 19 additions & 0 deletions pulpcore/app/util.py
Expand Up @@ -311,6 +311,25 @@ def configure_analytics():
models.TaskSchedule.objects.filter(task_name=task_name).delete()


def configure_cleanup():
for name, task_name, protection_time in [
("uploads", "pulpcore.app.tasks.orphan.upload_cleanup", settings.UPLOAD_PROTECTION_TIME),
(
"shared temporary files",
"pulpcore.app.tasks.orphan.tmpfile_cleanup",
settings.TMPFILE_PROTECTION_TIME,
),
]:
if protection_time > 0:
dispatch_interval = timedelta(minutes=protection_time)
name = f"Clean up stale {name} periodically"
models.TaskSchedule.objects.update_or_create(
name=name, defaults={"task_name": task_name, "dispatch_interval": dispatch_interval}
)
else:
models.TaskSchedule.objects.filter(task_name=task_name).delete()


@lru_cache(maxsize=1)
def _artifact_serving_distribution():
return models.ArtifactDistribution.objects.get()
Expand Down
2 changes: 2 additions & 0 deletions pulpcore/tasking/pulpcore_worker.py
Expand Up @@ -24,6 +24,7 @@

from pulpcore.app.util import (
configure_analytics,
configure_cleanup,
set_domain,
set_current_user,
)
Expand Down Expand Up @@ -57,6 +58,7 @@

def startup_hook():
configure_analytics()
configure_cleanup()


class PGAdvisoryLock:
Expand Down

0 comments on commit 5795c57

Please sign in to comment.