Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added bulk-task-purge feature. #1721

Merged
merged 1 commit into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES/8554.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Added a ``/tasks/purge/`` API to do bulk-deletion of old tasks.

Over time, the database can fill with task-records. This API allows
an installation to bulk-remove records based on their completion
timestamps.

NOTE: this endpoint is in tech-preview and may change in backwards
incompatible ways in the future.
ggainey marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
RBACContentGuardSerializer,
RBACContentGuardPermissionSerializer,
)
from .purge import PurgeSerializer # noqa
from .repository import ( # noqa
RemoteSerializer,
RepositorySerializer,
Expand Down
21 changes: 21 additions & 0 deletions pulpcore/app/serializers/purge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from datetime import datetime, timedelta, timezone
from gettext import gettext as _

from rest_framework import serializers

from pulpcore.app.serializers import ValidateFieldsMixin # noqa
from pulpcore.constants import TASK_FINAL_STATES


class PurgeSerializer(serializers.Serializer, ValidateFieldsMixin):
finished_before = serializers.DateTimeField(
ggainey marked this conversation as resolved.
Show resolved Hide resolved
help_text=_(
"Purge tasks completed earlier than this timestamp. Format '%Y-%m-%d[T%H:%M:%S]'"
),
default=(datetime.now(timezone.utc) - timedelta(days=30)).strftime("%Y-%m-%d"),
)
states = serializers.MultipleChoiceField(
choices=TASK_FINAL_STATES,
default=["completed"],
help_text=_("List of task-states to be purged. Only 'final' states are allowed."),
)
2 changes: 2 additions & 0 deletions pulpcore/app/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

from .orphan import orphan_cleanup # noqa

from .purge import purge # noqa

from .reclaim_space import reclaim_space # noqa

from .repository import repair_all_artifacts # noqa
50 changes: 50 additions & 0 deletions pulpcore/app/tasks/purge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from gettext import gettext as _
from django_currentuser.middleware import get_current_authenticated_user
from pulpcore.app.models import (
ProgressReport,
Task,
)
from pulpcore.app.role_util import get_objects_for_user


def purge(finished_before, states):
"""
This task purges from the database records of tasks which finished prior to the specified time.

It will remove only tasks that are 'owned' by the current-user (admin-users own All The Things,
so admins can delete all tasks).

It will not remove tasks that are incomplete (ie, in states running|waiting|cancelling).

It reports (using ProgressReport) the total entities deleted, as well as individual counts
for each class of entity. This shows the results of cascading-deletes that are triggered
by deleting a Task.

Args:
finished_before (DateTime): Earliest finished-time to **NOT** purge.
states (List[str]): List of task-states we want to purge.

"""
current_user = get_current_authenticated_user()
qs = Task.objects.filter(finished_at__lt=finished_before, state__in=states)
units_deleted, details = get_objects_for_user(current_user, "core.delete_task", qs=qs).delete()

# Progress bar reporting total-units
progress_bar = ProgressReport(
message=_("Purged task-objects total"),
total=units_deleted,
code="purge.tasks.total",
done=units_deleted,
state="completed",
)
progress_bar.save()
# This loop reports back the specific entities deleted and the number removed
for key in details:
progress_bar = ProgressReport(
message=_("Purged task-objects of type {}".format(key)),
total=details[key],
code="purge.tasks.key.{}".format(key),
done=details[key],
state="completed",
)
progress_bar.save()
ggainey marked this conversation as resolved.
Show resolved Hide resolved
37 changes: 36 additions & 1 deletion pulpcore/app/viewsets/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,23 @@
from django_filters.rest_framework import DjangoFilterBackend, filters
from drf_spectacular.utils import extend_schema
from rest_framework import mixins, status
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter
from rest_framework.response import Response
from rest_framework.serializers import ValidationError

from pulpcore.app.models import Task, TaskGroup, Worker
from pulpcore.app.response import OperationPostponedResponse
from pulpcore.app.serializers import (
AsyncOperationResponseSerializer,
MinimalTaskSerializer,
TaskCancelSerializer,
TaskGroupSerializer,
PurgeSerializer,
TaskSerializer,
WorkerSerializer,
TaskGroupSerializer,
)
from pulpcore.app.tasks import purge
from pulpcore.app.viewsets import BaseFilterSet, NamedModelViewSet
from pulpcore.app.viewsets.base import DATETIME_FILTER_OPTIONS, NAME_FILTER_OPTIONS
from pulpcore.app.viewsets.custom_filters import (
Expand All @@ -24,6 +29,7 @@
CreatedResourcesFilter,
)
from pulpcore.constants import TASK_INCOMPLETE_STATES, TASK_STATES, TASK_CHOICES
from pulpcore.tasking.tasks import dispatch
from pulpcore.tasking.util import cancel as cancel_task


Expand Down Expand Up @@ -90,6 +96,13 @@ class TaskViewSet(
"effect": "allow",
"condition": "has_model_or_obj_perms:core.change_task",
},
# 'purge' is filtered by current-user and core.delete_task permissions at the queryset
# level, and needs no extra protections here
{
"action": ["purge"],
"principal": "authenticated",
"effect": "allow",
},
],
"creation_hooks": [
{
Expand Down Expand Up @@ -139,6 +152,28 @@ def get_serializer_class(self):
return TaskCancelSerializer
return super().get_serializer_class()

@extend_schema(
description=(
"Trigger an asynchronous task that deletes completed tasks that finished prior"
" to a specified timestamp (tech-preview, may change in the future)."
),
summary="Purge Completed Tasks",
operation_id="tasks_purge",
request=PurgeSerializer,
responses={202: AsyncOperationResponseSerializer},
)
@action(detail=False, methods=["post"])
def purge(self, request):
"""
Purge task-records for tasks in 'final' states.
"""
serializer = PurgeSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task = dispatch(
purge, args=[serializer.data["finished_before"], list(serializer.data["states"])]
)
return OperationPostponedResponse(task, request)


class TaskGroupFilter(BaseFilterSet):
class Meta:
Expand Down
Loading