From d5bc207b9e338dafd541d7d82e3d5138b73cf763 Mon Sep 17 00:00:00 2001 From: David Davis Date: Fri, 7 May 2021 14:12:44 -0400 Subject: [PATCH] Add timestamp_of_interest to Content and Artifact fixes #8823 --- CHANGES/plugin_api/8823.feature | 5 ++++ .../0068_add_timestamp_of_interest.py | 23 +++++++++++++++++++ pulpcore/app/models/content.py | 12 ++++++++++ pulpcore/content/handler.py | 12 +++++++++- pulpcore/plugin/actions.py | 6 +++++ pulpcore/plugin/stages/artifact_stages.py | 9 +++++++- pulpcore/plugin/stages/content_stages.py | 10 ++++++-- pulpcore/plugin/viewsets/content.py | 8 ++++++- 8 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 CHANGES/plugin_api/8823.feature create mode 100644 pulpcore/app/migrations/0068_add_timestamp_of_interest.py diff --git a/CHANGES/plugin_api/8823.feature b/CHANGES/plugin_api/8823.feature new file mode 100644 index 0000000000..e6c089dbbc --- /dev/null +++ b/CHANGES/plugin_api/8823.feature @@ -0,0 +1,5 @@ +Added a ``timestamp_of_interest`` field to Content and Artifacts. This field can be updated by +calling a new method ``touch()`` on Artifacts and Content. Plugin writers should call this method +whenever they deal with Content or Artifacts. For example, this includes places where Content is +uploaded or added to Repository Versions. This will prevent Content and Artifacts from being cleaned +up when orphan cleanup becomes a non-blocking task in pulpcore 3.15. diff --git a/pulpcore/app/migrations/0068_add_timestamp_of_interest.py b/pulpcore/app/migrations/0068_add_timestamp_of_interest.py new file mode 100644 index 0000000000..d3e3ae935f --- /dev/null +++ b/pulpcore/app/migrations/0068_add_timestamp_of_interest.py @@ -0,0 +1,23 @@ +# Generated by Django 2.2.20 on 2021-05-07 18:01 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0067_add_protect_to_task_reservation'), + ] + + operations = [ + migrations.AddField( + model_name='artifact', + name='timestamp_of_interest', + field=models.DateTimeField(auto_now=True), + ), + migrations.AddField( + model_name='content', + name='timestamp_of_interest', + field=models.DateTimeField(auto_now=True), + ), + ] diff --git a/pulpcore/app/models/content.py b/pulpcore/app/models/content.py index 6d6eb015e1..a0fdd317ad 100644 --- a/pulpcore/app/models/content.py +++ b/pulpcore/app/models/content.py @@ -160,6 +160,7 @@ class Artifact(HandleTempFilesMixin, BaseModel): sha256 (models.CharField): The SHA-256 checksum of the file (REQUIRED). sha384 (models.CharField): The SHA-384 checksum of the file. sha512 (models.CharField): The SHA-512 checksum of the file. + timestamp_of_interest (models.DateTimeField): timestamp that prevents orphan cleanup """ def storage_path(self, name): @@ -180,6 +181,7 @@ def storage_path(self, name): sha256 = models.CharField(max_length=64, null=False, unique=True, db_index=True) sha384 = models.CharField(max_length=96, null=True, unique=True, db_index=True) sha512 = models.CharField(max_length=128, null=True, unique=True, db_index=True) + timestamp_of_interest = models.DateTimeField(auto_now=True) objects = BulkCreateManager() @@ -331,6 +333,10 @@ def from_pulp_temporary_file(cls, temp_file): temp_file.delete() return artifact + def touch(self): + """Update timestamp_of_interest.""" + self.save(update_fields=["timestamp_of_interest"]) + class PulpTemporaryFile(HandleTempFilesMixin, BaseModel): """ @@ -426,6 +432,7 @@ class Content(MasterModel, QueryMixin): Fields: upstream_id (models.UUIDField) : identifier of content imported from an 'upstream' Pulp + timestamp_of_interest (models.DateTimeField): timestamp that prevents orphan cleanup Relations: @@ -437,6 +444,7 @@ class Content(MasterModel, QueryMixin): upstream_id = models.UUIDField(null=True) # Used by PulpImport/Export processing _artifacts = models.ManyToManyField(Artifact, through="ContentArtifact") + timestamp_of_interest = models.DateTimeField(auto_now=True) objects = BulkCreateManager() @@ -495,6 +503,10 @@ def init_from_artifact_and_relative_path(artifact, relative_path): """ raise NotImplementedError() + def touch(self): + """Update timestamp_of_interest.""" + self.save(update_fields=["timestamp_of_interest"]) + class ContentArtifact(BaseModel, QueryMixin): """ diff --git a/pulpcore/content/handler.py b/pulpcore/content/handler.py index 54438a4ee8..f016b77729 100644 --- a/pulpcore/content/handler.py +++ b/pulpcore/content/handler.py @@ -23,6 +23,7 @@ ) from django.db import ( # noqa: E402: module level not at top of file connection, + DatabaseError, IntegrityError, transaction, ) @@ -648,7 +649,16 @@ def _save_artifact(self, download_result, remote_artifact): with transaction.atomic(): artifact.save() except IntegrityError: - artifact = Artifact.objects.get(artifact.q()) + try: + artifact = Artifact.objects.get(artifact.q()) + artifact.touch() + except (Artifact.DoesNotExist, DatabaseError): + # it's possible that orphan cleanup deleted the artifact + # so fall back to creating a new artifact again + artifact = Artifact( + **download_result.artifact_attributes, file=download_result.path + ) + artifact.save() update_content_artifact = True if content_artifact._state.adding: # This is the first time pull-through content was requested. diff --git a/pulpcore/plugin/actions.py b/pulpcore/plugin/actions.py index 0e75d42108..25a7299111 100644 --- a/pulpcore/plugin/actions.py +++ b/pulpcore/plugin/actions.py @@ -1,3 +1,4 @@ +from django.db import DatabaseError from drf_spectacular.utils import extend_schema from rest_framework.decorators import action @@ -39,6 +40,11 @@ def modify(self, request, pk): if "add_content_units" in request.data: for url in request.data["add_content_units"]: content = self.get_resource(url, Content) + try: + content.touch() + except DatabaseError: + # content has since been removed. call get_url to raise an exception. + content = self.get_resource(url, Content) add_content_units.append(content.pk) if "remove_content_units" in request.data: diff --git a/pulpcore/plugin/stages/artifact_stages.py b/pulpcore/plugin/stages/artifact_stages.py index 60d50d14ed..c79f5a1d78 100644 --- a/pulpcore/plugin/stages/artifact_stages.py +++ b/pulpcore/plugin/stages/artifact_stages.py @@ -3,6 +3,7 @@ from gettext import gettext as _ import logging +from django.db import DatabaseError from django.db.models import Prefetch, prefetch_related_objects from pulpcore.plugin.exceptions import UnsupportedDigestValidationError @@ -93,7 +94,13 @@ async def run(self): for result in existing_artifacts: result_digest = getattr(result, digest_type) if result_digest == artifact_digest: - d_artifact.artifact = result + try: + result.touch() + except DatabaseError: + # update failed so leave artifact empty to create it later + pass + else: + d_artifact.artifact = result break for d_content in batch: diff --git a/pulpcore/plugin/stages/content_stages.py b/pulpcore/plugin/stages/content_stages.py index f21cb923b7..cf79f717ef 100644 --- a/pulpcore/plugin/stages/content_stages.py +++ b/pulpcore/plugin/stages/content_stages.py @@ -1,7 +1,7 @@ from collections import defaultdict from django.core.exceptions import ObjectDoesNotExist -from django.db import IntegrityError, transaction +from django.db import DatabaseError, IntegrityError, transaction from django.db.models import Q from pulpcore.plugin.models import Content, ContentArtifact, ProgressReport @@ -50,7 +50,13 @@ async def run(self): for model_type in content_q_by_type.keys(): for result in model_type.objects.filter(content_q_by_type[model_type]).iterator(): for d_content in d_content_by_nat_key[result.natural_key()]: - d_content.content = result + try: + result.touch() + except DatabaseError: + # update failed so leave content empty to create it later + pass + else: + d_content.content = result for d_content in batch: await self.put(d_content) diff --git a/pulpcore/plugin/viewsets/content.py b/pulpcore/plugin/viewsets/content.py index b2dc505c36..5744272e49 100644 --- a/pulpcore/plugin/viewsets/content.py +++ b/pulpcore/plugin/viewsets/content.py @@ -2,6 +2,7 @@ from drf_spectacular.utils import extend_schema +from django.db import DatabaseError from django.db.utils import IntegrityError from pulpcore.app import tasks @@ -109,7 +110,12 @@ def init_content_data(self, serializer, request): artifact.save() except IntegrityError: # if artifact already exists, let's use it - artifact = Artifact.objects.get(sha256=artifact.sha256) + try: + artifact = Artifact.objects.get(sha256=artifact.sha256) + artifact.touch() + except (Artifact.DoesNotExist, DatabaseError): + # the artifact has since been removed from when we first attempted to save it + artifact.save() task_payload["artifact"] = ArtifactSerializer( artifact, context={"request": request}