Skip to content

Commit

Permalink
Repair damage from kobotoolbox/formpack#322
Browse files Browse the repository at this point in the history
  • Loading branch information
jnm committed Jun 17, 2024
1 parent 2d03413 commit a4dfe91
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 1 deletion.
5 changes: 5 additions & 0 deletions kobo/apps/reports/report_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from rest_framework import serializers
from formpack import FormPack

from kpi.utils.bugfix import repair_file_column_content_and_save
from kpi.utils.log import logging
from .constants import (
FUZZY_VERSION_ID_KEY,
Expand All @@ -21,6 +22,10 @@ def build_formpack(asset, submission_stream=None, use_all_form_versions=True):
are assumed to have been collected with that version of the form.
"""

# Cope with kobotoolbox/formpack#322, which wrote invalid content into the
# database
repair_file_column_content_and_save(asset)

if asset.has_deployment:
if use_all_form_versions:
_versions = asset.deployed_versions
Expand Down
2 changes: 2 additions & 0 deletions kpi/models/asset_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class AssetVersion(models.Model):
)
version_content = models.JSONField()
uid_aliases = models.JSONField(null=True)
# Tee hee, `deployed_content` is never written to the database!
# TODO: It should be changed to a property instead, no?
deployed_content = models.JSONField(null=True)
_deployment_data = models.JSONField(default=dict)
deployed = models.BooleanField(default=False)
Expand Down
1 change: 1 addition & 0 deletions kpi/serializers/v2/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ def validate_settings(self, settings: dict) -> dict:
return {**self.instance.settings, **settings}

def _content(self, obj):
# FIXME: Is this dead code?
return json.dumps(obj.content)

def _get_status(self, perm_assignments):
Expand Down
67 changes: 67 additions & 0 deletions kpi/utils/bugfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# coding: utf-8

import datetime

from django.db import transaction
from formpack.utils.bugfix import repair_file_column_content_in_place

from kpi.models import Asset, AssetVersion


def repair_file_column_content_and_save(asset, include_versions=True) -> bool:
"""
Given an `Asset`, repair any damage caused to its `content` and the
`version_content` of all related `AssetVersion`s by
kobotoolbox/formpack#322, which wrongly transformed the `file` column into
`media::file` and included it in the list of translated columns.
Writes only to the `content` (or `version_content`) field for each modified
model instance.
Returns `True` if any change was made
"""

# When was this bug introduced? See formpack commit
# 443a8e940756976a9f88fb577dbbc53510726536
BAD_TIME = datetime.datetime(
2024, 4, 30, 15, 27, 2, tzinfo=datetime.timezone.utc
)

any_change = False
with transaction.atomic():
# Lock and refetch the latest content to guard against clobbering
# concurrent updates
content = (
Asset.objects.filter(pk=asset.pk, date_modified__gte=BAD_TIME)
.select_for_update()
.values('content')
.first()
)
if not content:
# This asset was not modified recently enough to be affected by
# this bug
return False

if repair_file_column_content_in_place(content):
Asset.objects.filter(pk=asset.pk).update(content=content)
any_change = True
# Also update the in-memory asset instance passed to this function
asset.content = content

if not include_versions:
return any_change

# Previous versions of the content may need repair, regardless of
# whether or not the current content did
for pk, version_content in (
asset.asset_versions.filter(date_modified__gte=BAD_TIME)
.select_for_update()
.values_list('pk', 'version_content')
):
if repair_file_column_content_in_place(version_content):
AssetVersion.objects.filter(pk=pk).update(
version_content=version_content
)
any_change = True

return any_change
13 changes: 12 additions & 1 deletion kpi/views/v2/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@
AssetListSerializer,
AssetSerializer,
)
from kpi.utils.hash import calculate_hash
from kpi.serializers.v2.reports import ReportsDetailSerializer
from kpi.utils.bugfix import repair_file_column_content_and_save
from kpi.utils.hash import calculate_hash
from kpi.utils.kobo_to_xlsform import to_xlsform_structure
from kpi.utils.ss_structure_to_mdtable import ss_structure_to_mdtable
from kpi.utils.object_permission import (
Expand Down Expand Up @@ -403,6 +404,16 @@ def get_object(self):
raise Http404

self.check_object_permissions(self.request, asset)

# Cope with kobotoolbox/formpack#322, which wrote invalid content
# into the database. For performance, consider only the current
# content, not previous versions. Previous versions are handled in
# `kobo.apps.reports.report_data.build_formpack()`
if self.request.method == 'GET':
repair_file_column_content_and_save(
asset, include_versions=False
)

return asset

return super().get_object()
Expand Down

0 comments on commit a4dfe91

Please sign in to comment.