Skip to content

Commit

Permalink
feat: Add deleteorphan command, closes #340
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed May 3, 2024
1 parent c2da83e commit 741ae5c
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
13 changes: 12 additions & 1 deletion docs/cli.rst
Expand Up @@ -66,7 +66,7 @@ deletecollection

Delete a collection and its ancestors.

Rows in the ``package_data`` and ``data`` tables are not deleted.
Rows in the ``package_data`` and ``data`` tables are not deleted. Use :ref:`cli-deleteorphan` instead.

.. code-block:: bash
Expand All @@ -81,6 +81,17 @@ Get the status of a root collection and its children.
./manage.py collectionstatus collection_id
.. cli-deleteorphan:
deleteorphan
~~~~~~~~~~~~

Delete rows from the data and package_data tables that relate to no collections.

.. code-block:: bash
./manage.py deleteorphan
.. _cli-workers:

Workers
Expand Down
54 changes: 54 additions & 0 deletions process/management/deleteorphan.py
@@ -0,0 +1,54 @@
from django.core.management.base import BaseCommand
from django.db import connection, transaction
from django.utils.translation import gettext as t
from django.utils.translation import gettext_lazy as _

from process.util import wrap as w


class Command(BaseCommand):
help = w(t("Delete rows from the data and package_data tables that relate to no collections"))

def add_arguments(self, parser):
parser.add_argument("-f", "--force", action="store_true", help=_("delete the rows without prompting"))

def handle(self, *args, **options):
if not options["force"]:
confirm = input("Orphaned rows will be deleted. Do you want to continue? [y/N] ")
if confirm.lower() != "y":
return

self.stderr.write("Working... ", ending="")

data = (
"""
SELECT id FROM data WHERE
NOT EXISTS (SELECT FROM record WHERE data_id = data.id)
AND NOT EXISTS (SELECT FROM release WHERE data_id = data.id)
AND NOT EXISTS (SELECT FROM compiled_release WHERE data_id = data.id)
LIMIT 100000
""",
"DELETE FROM data WHERE id IN %(ids)s",
)
package_data = (
"""
SELECT id FROM package_data WHERE
NOT EXISTS (SELECT FROM record WHERE package_data_id = package_data.id)
AND NOT EXISTS (SELECT FROM release WHERE package_data_id = package_data.id)
LIMIT 100000
""",
"DELETE FROM package_data WHERE id IN %(ids)s",
)

with connection.cursor() as cursor:
for select, delete in (data, package_data):
while True:
with transaction.atomic():
cursor.execute(select)
ids = tuple(row[0] for row in cursor.fetchall())
if not ids:
break
cursor.execute(delete, ids=ids)
self.stderr.write(".", ending="")

self.stderr.write(self.style.SUCCESS("done"))

0 comments on commit 741ae5c

Please sign in to comment.