Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle submission XML with namespace or encoding declaration #4832

Merged
merged 5 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 17 additions & 14 deletions kpi/deployment_backends/base_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from django.utils import timezone
from django.utils.translation import gettext_lazy as t
from django.core.exceptions import PermissionDenied
from lxml import etree
from rest_framework import serializers
from rest_framework.pagination import _positive_int as positive_int
from shortuuid import ShortUUID
Expand All @@ -29,7 +28,12 @@
from kpi.models.asset_file import AssetFile
from kpi.models.paired_data import PairedData
from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes
from kpi.utils.xml import edit_submission_xml
from kpi.utils.xml import (
edit_submission_xml,
fromstring_preserve_root_xmlns,
get_or_create_element,
xml_tostring,
)


class BaseDeploymentBackend(abc.ABC):
Expand All @@ -44,9 +48,9 @@ class BaseDeploymentBackend(abc.ABC):
]

# XPaths are relative to the root node
SUBMISSION_CURRENT_UUID_XPATH = './meta/instanceID'
SUBMISSION_DEPRECATED_UUID_XPATH = './meta/deprecatedID'
FORM_UUID_XPATH = './formhub/uuid'
SUBMISSION_CURRENT_UUID_XPATH = 'meta/instanceID'
SUBMISSION_DEPRECATED_UUID_XPATH = 'meta/deprecatedID'
FORM_UUID_XPATH = 'formhub/uuid'

def __init__(self, asset):
self.asset = asset
Expand Down Expand Up @@ -136,23 +140,22 @@ def bulk_update_submissions(

kc_responses = []
for submission in submissions:
xml_parsed = etree.fromstring(submission)
xml_parsed = fromstring_preserve_root_xmlns(submission)

_uuid, uuid_formatted = self.generate_new_instance_id()

# Updating xml fields for submission. In order to update an existing
# submission, the current `instanceID` must be moved to the value
# for `deprecatedID`.
instance_id = xml_parsed.find('meta/instanceID')
instance_id = get_or_create_element(
xml_parsed, self.SUBMISSION_CURRENT_UUID_XPATH
)
# If the submission has been edited before, it will already contain
# a deprecatedID element - otherwise create a new element
deprecated_id = xml_parsed.find('meta/deprecatedID')
deprecated_id_or_new = (
deprecated_id
if deprecated_id is not None
else etree.SubElement(xml_parsed.find('meta'), 'deprecatedID')
deprecated_id = get_or_create_element(
xml_parsed, self.SUBMISSION_DEPRECATED_UUID_XPATH
)
deprecated_id_or_new.text = instance_id.text
deprecated_id.text = instance_id.text
instance_id.text = uuid_formatted

# If the form has been updated with new fields and earlier
Expand All @@ -164,7 +167,7 @@ def bulk_update_submissions(
edit_submission_xml(xml_parsed, path, value)

kc_response = self.store_submission(
user, etree.tostring(xml_parsed), _uuid
user, xml_tostring(xml_parsed), _uuid
)
kc_responses.append(
{
Expand Down
50 changes: 25 additions & 25 deletions kpi/deployment_backends/kobocat_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from datetime import date, datetime
from typing import Generator, Optional, Union
from urllib.parse import urlparse
from xml.etree import ElementTree as ET
try:
from zoneinfo import ZoneInfo
except ImportError:
from backports.zoneinfo import ZoneInfo

import requests
from defusedxml import ElementTree as DET
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured, SuspiciousFileOperation
from django.core.files import File
Expand All @@ -29,7 +29,6 @@
from django.utils.translation import gettext_lazy as t
from django_redis import get_redis_connection
from kobo_service_account.utils import get_request_headers
from lxml import etree
from rest_framework import status
from rest_framework.reverse import reverse

Expand Down Expand Up @@ -59,6 +58,7 @@
from kpi.utils.mongo_helper import MongoHelper
from kpi.utils.object_permission import get_database_user
from kpi.utils.permissions import is_user_anonymous
from kpi.utils.xml import fromstring_preserve_root_xmlns, xml_tostring
from .base_backend import BaseDeploymentBackend
from .kc_access.shadow_models import (
KobocatXForm,
Expand Down Expand Up @@ -384,7 +384,7 @@ def duplicate_submission(
)

# parse XML string to ET object
xml_parsed = ET.fromstring(submission)
xml_parsed = fromstring_preserve_root_xmlns(submission)

# attempt to update XML fields for duplicate submission. Note that
# `start` and `end` are not guaranteed to be included in the XML object
Expand All @@ -399,10 +399,12 @@ def duplicate_submission(
# Rely on `meta/instanceID` being present. If it's absent, something is
# fishy enough to warrant raising an exception instead of continuing
# silently
xml_parsed.find('meta/instanceID').text = uuid_formatted
xml_parsed.find(self.SUBMISSION_CURRENT_UUID_XPATH).text = (
uuid_formatted
)

kc_response = self.store_submission(
user, ET.tostring(xml_parsed), _uuid, attachments
user, xml_tostring(xml_parsed), _uuid, attachments
)
if kc_response.status_code == status.HTTP_201_CREATED:
return next(self.get_submissions(user, query={'_uuid': _uuid}))
Expand All @@ -420,8 +422,8 @@ def edit_submission(
"""
submission_xml = xml_submission_file.read()
try:
xml_root = ET.fromstring(submission_xml)
except ET.ParseError:
xml_root = fromstring_preserve_root_xmlns(submission_xml)
except DET.ParseError:
raise SubmissionIntegrityError(
t('Your submission XML is malformed.')
)
Expand Down Expand Up @@ -551,18 +553,11 @@ def get_attachment(
raise SubmissionNotFoundException

if xpath:
submission_tree = ET.ElementTree(ET.fromstring(submission_xml))

try:
element = submission_tree.find(xpath)
except KeyError:
raise InvalidXPathException

try:
attachment_filename = element.text
except AttributeError:
submission_root = fromstring_preserve_root_xmlns(submission_xml)
element = submission_root.find(xpath)
if element is None:
raise XPathNotFoundException

attachment_filename = element.text
filters = {
'media_file_basename': attachment_filename,
}
Expand Down Expand Up @@ -1125,7 +1120,7 @@ def set_validation_statuses(self, user: 'auth.User', data: dict) -> dict:
def store_submission(
self, user, xml_submission, submission_uuid, attachments=None
):
file_tuple = (submission_uuid, io.BytesIO(xml_submission))
file_tuple = (submission_uuid, io.StringIO(xml_submission))
files = {'xml_submission_file': file_tuple}
if attachments:
files.update(attachments)
Expand Down Expand Up @@ -1557,14 +1552,17 @@ def prepare_bulk_update_response(kc_responses: list) -> dict:
# so it needs to be parsed before extracting the text
results = []
for response in kc_responses:
message = t('Something went wrong')
try:
message = (
ET.fromstring(response['response'].content)
.find(OPEN_ROSA_XML_MESSAGE)
.text
xml_parsed = fromstring_preserve_root_xmlns(
response['response'].content
)
except ET.ParseError:
message = t('Something went wrong')
except DET.ParseError:
pass
else:
message_el = xml_parsed.find(OPEN_ROSA_XML_MESSAGE)
if message_el is not None and message_el.text.strip():
message = message_el.text

results.append(
{
Expand All @@ -1582,6 +1580,8 @@ def prepare_bulk_update_response(kc_responses: list) -> dict:
return {
'status': status.HTTP_200_OK
if total_successes > 0
# FIXME: If KoboCAT returns something unexpected, like a 404 or a
# 500, then 400 is not the right response to send to the client
else status.HTTP_400_BAD_REQUEST,
'data': {
'count': total_update_attempts,
Expand Down
38 changes: 19 additions & 19 deletions kpi/deployment_backends/mock_backend.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
# coding: utf-8
from __future__ import annotations

import copy
import os
import time
import uuid
from collections import defaultdict
from datetime import date, datetime
from typing import Optional, Union
from xml.etree import ElementTree as ET

from django.db.models import Sum
from django.db.models.functions import Coalesce
from lxml import etree

try:
from zoneinfo import ZoneInfo
except ImportError:
Expand All @@ -21,8 +16,9 @@
from deepmerge import always_merger
from dict2xml import dict2xml as dict2xml_real
from django.conf import settings
from django.db.models import Sum
from django.db.models.functions import Coalesce
from django.urls import reverse
from lxml import etree
from rest_framework import status

from kobo.apps.trackers.models import NLPUsageCounter
Expand All @@ -43,6 +39,7 @@
from kpi.models.asset_file import AssetFile
from kpi.tests.utils.mock import MockAttachment
from kpi.utils.mongo_helper import MongoHelper, drop_mock_only
from kpi.utils.xml import fromstring_preserve_root_xmlns
from .base_backend import BaseDeploymentBackend


Expand Down Expand Up @@ -233,14 +230,18 @@ def duplicate_submission(
sub['_id']
for sub in self.get_submissions(self.asset.owner, fields=['_id'])
)) + 1
duplicated_submission.update({
'_id': next_id,
'start': updated_time,
'end': updated_time,
'meta/instanceID': f'uuid:{uuid.uuid4()}',
'meta/deprecatedID': submission['meta/instanceID'],
'_attachments': dup_att,
})
duplicated_submission.update(
{
'_id': next_id,
'start': updated_time,
'end': updated_time,
self.SUBMISSION_CURRENT_UUID_XPATH: f'uuid:{uuid.uuid4()}',
self.SUBMISSION_DEPRECATED_UUID_XPATH: submission[
self.SUBMISSION_CURRENT_UUID_XPATH
],
'_attachments': dup_att,
}
)

self.asset.deployment.mock_submissions([duplicated_submission])
return duplicated_submission
Expand Down Expand Up @@ -283,11 +284,9 @@ def get_attachment(
)

if xpath:
submission_tree = ET.ElementTree(
ET.fromstring(submission_xml)
)
submission_root = fromstring_preserve_root_xmlns(submission_xml)
try:
element = submission_tree.find(xpath)
element = submission_root.find(xpath)
except KeyError:
raise InvalidXPathException

Expand Down Expand Up @@ -589,6 +588,7 @@ def store_submission(
"""
Return a mock response without actually storing anything
"""

return {
'uuid': submission_uuid,
'status_code': status.HTTP_201_CREATED,
Expand Down