Skip to content

Commit

Permalink
WIP: Actually merge colliding content during migration
Browse files Browse the repository at this point in the history
  • Loading branch information
quba42 committed May 23, 2023
1 parent 89cbc2d commit df8d06f
Showing 1 changed file with 107 additions and 2 deletions.
109 changes: 107 additions & 2 deletions pulp_deb/app/migrations/0023_merge_colliding_structure_content.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,120 @@
# Generated by Django 3.2.19 on 2023-05-09 12:35
# Generated by Django 3.2.19 on 2023-05-09 12:35, extended manually;

from django.db import migrations
from django.core.exceptions import ObjectDoesNotExist

BATCH_SIZE = 1000

class Migration(migrations.Migration):

# TODO: Batch everything!




def merge_colliding_structure_content(apps, schema_editor):
ReleaseArchitecture = apps.get_model('deb', 'ReleaseArchitecture')
ReleaseComponent = apps.get_model('deb', 'ReleaseComponent')
PackageReleaseComponent = apps.get_model('deb', 'PackageReleaseComponent')
RepositoryContent = apps.get_model('core', 'RepositoryContent')
Content = apps.get_model('core', 'Content')

def _get_or_create_prc_to_keep(duplicate_prc_package, component_to_keep):
try:
prc_to_keep = PackageReleaseComponent.objects.get(
release_component=component_to_keep, package=duplicate_prc_package
)
except ObjectDoesNotExist:
component = ReleaseComponent.objects.get(pk=component_to_keep)
prc_to_keep = PackageReleaseComponent.objects.create(
pulp_type='deb.package_release_component', release_component=component, package=duplicate_prc_package
)
prc_to_keep.save()
return prc_to_keep

def _update_repo_content(duplicate_content_ids, content_to_keep):
for repo_content in RepositoryContent.objects.filter(content_id__in=duplicate_content_ids):
repo_content.content_id = content_to_keep
repo_content.save()

# Deduplicate ReleaseArchitecture:
distributions = (
ReleaseArchitecture.objects.all()
.distinct('distribution')
.values_list('distribution', flat=True)
)

for distribution in distributions:
architectures = (
ReleaseArchitecture.objects.filter(distribution=distribution)
.distinct('architecture')
.values_list('architecture', flat=True)
)
for architecture in architectures:
duplicate_architecture_ids = list(
ReleaseArchitecture.objects.filter(
distribution=distribution, architecture=architecture
).values_list('pk', flat=True)
)
if len(duplicate_architecture_ids) > 1:
architecture_to_keep = duplicate_architecture_ids.pop()
_update_repo_content(duplicate_architecture_ids, architecture_to_keep)
ReleaseArchitecture.objects.filter(pk__in=duplicate_architecture_ids).delete()
Content.objects.filter(pk__in=duplicate_architecture_ids).delete()

# Deduplicate ReleaseComponent:
distributions = (
ReleaseComponent.objects.all()
.distinct('distribution')
.values_list('distribution', flat=True)
)
for distribution in distributions:
components = (
ReleaseComponent.objects.filter(distribution=distribution)
.distinct('component')
.values_list('component', flat=True)
)
for component in components:
duplicate_component_ids = list(
ReleaseComponent.objects.filter(
distribution=distribution, component=component
).values_list('pk', flat=True)
)
if len(duplicate_component_ids) > 1:
component_to_keep = duplicate_component_ids.pop()
_update_repo_content(duplicate_component_ids, component_to_keep)

# Deduplicate PackageReleaseComponents
global_duplicate_prcs = set()
for duplicate_component in duplicate_component_ids:
duplicate_prcs = PackageReleaseComponent.objects.filter(
release_component=duplicate_component
)
for duplicate_prc in duplicate_prcs.iterator():
prc_to_keep = _get_or_create_prc_to_keep(
duplicate_prc.package, component_to_keep
)
_update_repo_content([duplicate_prc.pk], prc_to_keep)
global_duplicate_prcs.add(duplicate_prc.pk)

ReleaseComponent.objects.filter(pk__in=duplicate_component_ids).delete()
Content.objects.filter(pk__in=duplicate_component_ids).delete()
PackageReleaseComponent.objects.filter(pk__in=global_duplicate_prcs).delete()
Content.objects.filter(pk__in=global_duplicate_prcs).delete()


class Migration(migrations.Migration):
dependencies = [
('deb', '0022_alter_aptdistribution_distribution_ptr_and_more'),
]

operations = [
migrations.RunPython(
merge_colliding_structure_content, reverse_code=migrations.RunPython.noop, elidable=True
),
migrations.RunSQL(
sql="SET CONSTRAINTS ALL IMMEDIATE;",
reverse_sql="",
),
migrations.AlterUniqueTogether(
name='releasearchitecture',
unique_together={('distribution', 'architecture')},
Expand Down

0 comments on commit df8d06f

Please sign in to comment.