Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix filtering repeat groups in export #3575

Merged
merged 4 commits into from Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 29 additions & 7 deletions kpi/models/import_export_task.py
Expand Up @@ -6,7 +6,8 @@
import tempfile
from collections import defaultdict
from io import BytesIO
from os.path import splitext
from os.path import splitext, split
from typing import List
from urllib.parse import urlparse

import dateutil.parser
Expand Down Expand Up @@ -626,12 +627,9 @@ def _run_task(self, messages):
# Take this opportunity to do some housekeeping
self.log_and_mark_stuck_as_errored(self.user, source_url)

# Include the group name in `fields` for Mongo to correctly filter
# for repeat groups
if fields:
field_groups = set(f.split('/')[0] for f in fields if '/' in f)
fields += list(field_groups)

# Include the group name in `fields` for Mongo to correctly filter for
# repeat groups
fields = _get_fields_and_groups(fields)
submission_stream = source.deployment.get_submissions(
user=self.user,
fields=fields,
Expand Down Expand Up @@ -794,3 +792,27 @@ def _strip_header_keys(survey_dict):
if re.search(r'_header$', sheet_name):
del survey_dict[sheet_name]
return survey_dict


def _get_fields_and_groups(fields: List[str]) -> List[str]:
"""
Ensure repeat groups are included when filtering for specific fields by
appending the path items. For example, a field with path of
`group1/group2/field` will be added to the list as:
['group1/group2/field', 'group1/group2', 'group1']
"""
if not fields:
return []
field_groups = set()
for field in fields:
if '/' not in field:
continue
items = []
while field:
_path = split(field)[0]
if _path:
items.append(_path)
field = _path
field_groups.update(items)
fields += list(field_groups)
return fields
33 changes: 18 additions & 15 deletions kpi/tests/test_mock_data_exports.py
Expand Up @@ -247,28 +247,25 @@ class MockDataExportsBase(TestCase):
'schema': '1',
'survey': [
{
'name': 'person',
'type': 'begin_group',
'name': 'people',
'label': ['People'],
},
{
joshuaberetta marked this conversation as resolved.
Show resolved Hide resolved
'type': 'begin_repeat',
'$kuid': 'yl4hr30',
'name': 'person',
'label': ['person'],
'required': False,
'$autoname': 'person',
},
{
'type': 'text',
'$kuid': 'ij1cs76',
'label': ['name'],
'required': False,
'$autoname': 'name',
},
{
'type': 'integer',
'$kuid': 'xj9fr84',
'label': ['age'],
'required': False,
'$autoname': 'age',
},
{'type': 'end_repeat', '$kuid': '/yl4hr30'},
{'type': 'end_repeat'},
{'type': 'end_group'},
],
'settings': {},
'translated': ['label'],
Expand All @@ -278,9 +275,15 @@ class MockDataExportsBase(TestCase):
{
'_id': 9999,
'formhub/uuid': 'cfb562511e8e44d1998de69002b492d9',
'person': [
{'person/name': 'Julius Caesar', 'person/age': '55'},
{'person/name': 'Augustus', 'person/age': '75'},
'people/person': [
{
'people/person/name': 'Julius Caesar',
'people/person/age': '55',
},
{
'people/person/name': 'Augustus',
'people/person/age': '75',
},
],
'__version__': 'vbKavWWCpgBCZms6hQX4FN',
'meta/instanceID': 'uuid:f80be949-89b5-4af1-a29d-7d292b2bc0cd',
Expand Down Expand Up @@ -609,7 +612,7 @@ def test_xls_export_filter_fields_repeat_groups(self):
'fields': [
'_uuid',
'_submission_time',
'person/name',
'people/person/name',
'_index'
]
}
Expand Down