Skip to content

Commit

Permalink
Add a retention policy feature for purging older packages
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Jul 2, 2020
1 parent ce6c364 commit 4d0247c
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGES/5367.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a retention policy feature - when specified, the latest N versions of each package will be kept and older versions will be purged.
5 changes: 4 additions & 1 deletion coverage.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Manual Coverage
| As a user, I can sync all yum content types ( NO drpm) in additive mode (default) | PART | checking counts of packages and advisories |
| As a user, I can sync and skip specific type (srpm) | NO | |
| As a user, I can sync opensuse repository | NO | |
| As a user, I can sync from a mirror list | YES | |
| As a user, I can sync from a mirror list | YES | |
| **Duplicates** | | |
| As a user, I have only one advisory with the same id in a repo version | YES | |
| As a user, I have only one module with the same NSVCA in a repo version | NO | |
Expand Down Expand Up @@ -41,3 +41,6 @@ Manual Coverage
| As a user, when a module is removed, its packages are removed as well ( not referenced by other modules) | NO | |
| **Consumer cases** | | |
| As a user, I can use dnf to install all the content served by Pulp | PART | only covers rpm installation |
| **Retention** | | |
| As a user, I can have a repository option that retains the latest N packages of the same name | PART | No coverage of packages with differing arch in same repo (src, i686, x86_64), no coverage of non-sync repo modifications, no coverage of modular RPMs being exempted. |

2 changes: 1 addition & 1 deletion docs/_static/api.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/workflows/create_sync_publish.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ Repository GET response:
"versions_href": "/pulp/api/v3/repositories/rpm/rpm/a02ace53-d490-458d-8b93-604fbcd23a9c/versions/"
}
RPM Repositories support several additional options.

- metadata_signing_service:
See :ref:`metadata_signing`.
- retain_package_versions:
The maximum number of versions of each package to keep; as new versions of packages are added by upload, sync, or copy, older versions of the same packages are automatically removed. A value of 0 means "unlimited".

.. _create-remote:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
class Migration(migrations.Migration):

dependencies = [
('rpm', '0011_rpmremote_sles_auth_token'),
('rpm', '0012_remove_pkg_group_env_cat_related_pkgs'),
]

operations = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.13 on 2020-06-24 19:32

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('rpm', '0013_RAW_rpm_evr_extension'),
]

operations = [
migrations.AddField(
model_name='rpmrepository',
name='retain_package_versions',
field=models.PositiveIntegerField(default=0),
),
]
30 changes: 30 additions & 0 deletions pulp_rpm/app/models/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from django.contrib.postgres.fields import JSONField
from django.db import models
from django.db.models import Window, F
from django.db.models.functions import RowNumber

from pulpcore.plugin.models import Content

Expand All @@ -26,6 +28,31 @@ def db_type(self, connection):
return 'evr_t'


class PackageManager(models.Manager):
"""Custom Package object manager."""

def with_age(self):
"""Provide an "age" score for each Package object in the queryset.
Annotate the Package objects with an "age". Age is calculated with a postgresql
window function which partitions the Packages by name and architecture, orders the
packages in each group by 'evr', and returns the row number of each package, which
is the relative "age" within the group. The newest package gets age=1, second newest
age=2, and so on.
A second partition by architecture is important because there can be packages with
the same name and verison numbers but they are not interchangeable because they have
differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch.
"""
return self.annotate(
age=Window(
expression=RowNumber(),
partition_by=[F('name'), F('arch')],
order_by=F('evr').desc()
)
)


class Package(Content):
"""
The "Package" content type. Formerly "rpm" in Pulp 2.
Expand Down Expand Up @@ -122,6 +149,8 @@ class Package(Content):
attribute in the primary XML.
"""

objects = PackageManager()

TYPE = 'package'

# Required metadata
Expand All @@ -131,6 +160,7 @@ class Package(Content):
release = models.CharField(max_length=255)
arch = models.CharField(max_length=20)

# Currently filled by a database trigger - consider eventually switching to generated column
evr = RpmVersionField()

pkgId = models.CharField(unique=True, max_length=128) # formerly "checksum" in Pulp 2
Expand Down
43 changes: 40 additions & 3 deletions pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
models,
transaction,
)

from pulpcore.plugin.download import DownloaderFactory
from pulpcore.plugin.models import (
AsciiArmoredDetachedSigningService,
Content,
CreatedResource,
Remote,
Repository,
Expand Down Expand Up @@ -77,6 +77,7 @@ class RpmRepository(Repository):
last_sync_remote = models.ForeignKey(Remote, null=True, on_delete=models.SET_NULL)
last_sync_repo_version = models.PositiveIntegerField(default=0)
original_checksum_types = JSONField(default=dict)
retain_package_versions = models.PositiveIntegerField(default=0)

def new_version(self, base_version=None):
"""
Expand Down Expand Up @@ -124,8 +125,8 @@ def finalize_new_version(self, new_version):
Resolve advisory conflicts when there is more than one advisory with the same id.
Args:
new_version (pulpcore.app.models.RepositoryVersion): The incomplete RepositoryVersion to
finalize.
new_version (pulpcore.app.models.RepositoryVersion): The incomplete RepositoryVersion
to finalize.
"""
if new_version.base_version:
previous_version = new_version.base_version
Expand All @@ -140,10 +141,46 @@ def finalize_new_version(self, new_version):
from pulp_rpm.app.modulemd import resolve_module_packages # avoid circular import
resolve_module_packages(new_version, previous_version)

self._apply_retention_policy(new_version)

from pulp_rpm.app.advisory import resolve_advisories # avoid circular import
resolve_advisories(new_version, previous_version)
validate_repo_version(new_version)

def _apply_retention_policy(self, new_version):
"""Apply the repository's "retain_package_versions" settings to the new version.
Remove all non-modular packages that are older than the retention policy. A value of 0
for the package retention policy represents disabled. A value of 3 would mean that the
3 most recent versions of each package would be kept while older versions are discarded.
Args:
new_version (models.RepositoryVersion): Repository version to filter
"""
assert not new_version.complete, \
"Cannot apply retention policy to completed repository versions"

if self.retain_package_versions > 0:
# It would be more ideal if, instead of annotating with an age and filtering manually,
# we could use Django to filter the particular Package content we want to delete.
# Something like ".filter(F('age') > self.retain_package_versions)" would be better
# however this is not currently possible with Django. It would be possible with raw
# SQL but the repository version content membership subquery is currently
# django-managed and would be difficult to share.
#
# Instead we have to do the filtering manually.
nonmodular_packages = Package.objects.with_age().filter(
pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()),
is_modular=False, # don't want to filter out modular RPMs
).only('pk')

old_packages = []
for package in nonmodular_packages:
if package.age > self.retain_package_versions:
old_packages.append(package.pk)

new_version.remove_content(Content.objects.filter(pk__in=old_packages))


class RpmRemote(Remote):
"""
Expand Down
19 changes: 14 additions & 5 deletions pulp_rpm/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,25 @@ class RpmRepositorySerializer(RepositorySerializer):
"""

metadata_signing_service = serializers.HyperlinkedRelatedField(
help_text="A reference to an associated signing service.",
help_text=_("A reference to an associated signing service."),
view_name='signing-services-detail',
queryset=AsciiArmoredDetachedSigningService.objects.all(),
many=False,
required=False,
allow_null=True
)
retain_package_versions = serializers.IntegerField(
help_text=_("The number of versions of each package to keep in the repository; "
"older versions will be purged. The default is '0', which will disable "
"this feature and keep all versions of each package."),
min_value=0,
required=False,
)

class Meta:
fields = RepositorySerializer.Meta.fields + ('metadata_signing_service',)
fields = RepositorySerializer.Meta.fields + (
'metadata_signing_service', 'retain_package_versions'
)
model = RpmRepository


Expand All @@ -324,13 +333,13 @@ class RpmRemoteSerializer(RemoteSerializer):
"""

sles_auth_token = serializers.CharField(
help_text="Authentication token for SLES repositories.",
help_text=_("Authentication token for SLES repositories."),
required=False, allow_null=True
)

policy = serializers.ChoiceField(
help_text="The policy to use when downloading content. The possible values include: "
"'immediate', 'on_demand', and 'streamed'. 'immediate' is the default.",
help_text=_("The policy to use when downloading content. The possible values include: "
"'immediate', 'on_demand', and 'streamed'. 'immediate' is the default."),
choices=Remote.POLICY_CHOICES,
default=Remote.IMMEDIATE
)
Expand Down
3 changes: 3 additions & 0 deletions pulp_rpm/app/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def sync(self, request, pk):
skip_types = serializer.validated_data.get('skip_types')
optimize = serializer.validated_data.get('optimize')

if repository.retain_package_versions > 0 and mirror:
raise DRFValidationError("Cannot use 'retain_package_versions' with mirror-mode sync")

result = enqueue_with_reservation(
tasks.synchronize,
[repository, remote],
Expand Down
Loading

0 comments on commit 4d0247c

Please sign in to comment.