Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

import/export Taxonomy API functions #58

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 192 additions & 1 deletion openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,28 @@
Please look at the models.py file for more information about the kinds of data
are stored in this app.
"""
import csv
import json
from enum import Enum
from io import StringIO, BytesIO, TextIOWrapper
from typing import List, Type

from django.db import transaction
from django.db.models import QuerySet
from django.core.exceptions import ObjectDoesNotExist
from django.utils.translation import gettext_lazy as _

from .models import ObjectTag, Tag, Taxonomy

csv_fields = ['id', 'name', 'parent_id', 'parent_name']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: would like to make it clear that this is a constant, and it's not part of the externally-exportable python api:

Suggested change
csv_fields = ['id', 'name', 'parent_id', 'parent_name']
_CSV_FIELDS = ['id', 'name', 'parent_id', 'parent_name']


class TaxonomyDataFormat(Enum):
"""
Formats used to export and import Taxonomies
"""
CSV = 'CSV'
JSON = 'JSON'


def create_taxonomy(
name,
Expand All @@ -29,6 +44,7 @@ def create_taxonomy(
"""
Creates, saves, and returns a new Taxonomy with the given attributes.
"""

return Taxonomy.objects.create(
name=name,
description=description,
Expand Down Expand Up @@ -105,5 +121,180 @@ def tag_object(
Raised ValueError if the proposed tags are invalid for this taxonomy.
Preserves existing (valid) tags, adds new (valid) tags, and removes omitted (or invalid) tags.
"""

return taxonomy.tag_object(tags, object_id, object_type)


def import_tags(taxonomy: Taxonomy, tags: BytesIO, format: TaxonomyDataFormat, replace=False):
"""
Imports the hierarchical tags from the given blob into the Taxonomy.
The blob can be CSV or JSON format.

If replace, then removes any existing child Tags linked to this taxonomy before performing the import.
"""

# Validations
if taxonomy.allow_free_text:
raise ValueError(
_(
f"Invalid taxonomy ({taxonomy.id}): You cannot import into a free-form taxonomy."
)
)

# Read file and build the tags data to be uploaded
try:
tags_data = {}
tags.seek(0)
if format == TaxonomyDataFormat.CSV:
text_tags = TextIOWrapper(tags, encoding='utf-8')
csv_reader = csv.DictReader(text_tags)
header_fields = csv_reader.fieldnames
if csv_fields != header_fields:
raise ValueError(
_(
f"Invalid CSV header: {header_fields}. Must be: {csv_fields}."
)
)
tags_data = list(csv_reader)
elif format == TaxonomyDataFormat.JSON:
tags_data = json.load(tags)
if 'tags' not in tags_data:
raise ValueError(
_(
f"Invalid JSON format: Missing 'tags' list."
)
)
tags_data = tags_data.get('tags')
else:
raise ValueError(
_(
f"Invalid format: {format}"
)
)
except ValueError as e:
raise e
finally:
tags.close()


updated_tags = []

def create_update_tag(tag):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please try to avoid inner functions this large and complex. The create_update_tag function has interesting logic but it's harder to test separately because it's nested in here.

"""
Function to create a new Tag or update an existing one.

This function keeps a creation/update history with `updated_tags`,
a same tag can't be created/updated in a same taxonomy import.
Also, recursively, creates the parents of the `tag`.

Returns the created/updated Tag.
Raise KeyError if 'id' or 'name' don't exist on `tag`
"""

tag_id = tag['id']
tag_name = tag['name']
tag_parent_id = tag.get('parent_id')
tag_parent_name = tag.get('parent_name')

# Check if the tag has not already been created or updated
if tag_id not in updated_tags:
try:
# Update tag
tag_instance = taxonomy.tag_set.get(external_id=tag_id)
tag_instance.value = tag_name

if tag_instance.parent and (not tag_parent_id or not tag_parent_name):
# if there is no parent in the data import
tag_instance.parent = None
updated_tags.append(tag_id)
except Tag.DoesNotExist:
# Create tag
tag_instance = Tag(
taxonomy=taxonomy,
value=tag_name,
external_id=tag_id,
)
updated_tags.append(tag_id)

if tag_parent_id and tag_parent_name:
# Parent creation/update
parent = create_update_tag({'id': tag_parent_id, 'name': tag_parent_name})
tag_instance.parent = parent

tag_instance.save()
return tag_instance
else:
# Returns the created/updated tag from history
return taxonomy.tag_set.get(external_id=tag_id)

# Create and update tags
with transaction.atomic():
for tag in tags_data:
try:
create_update_tag(tag)
except KeyError as e:
key = e.args[0]
raise ValueError(
_(
f"Invalid JSON format: Missing '{key}' on a tag ({tag})"
)
)

# If replace, delete all not updated tags (Not present in the file)
if replace:
taxonomy.tag_set.exclude(external_id__in=updated_tags).delete()

resync_object_tags(ObjectTag.objects.filter(taxonomy=taxonomy))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be a large operation.. so I don't think it should sit under the same atomic operation. Can you bump it out a level?

Suggested change
resync_object_tags(ObjectTag.objects.filter(taxonomy=taxonomy))
resync_object_tags(ObjectTag.objects.filter(taxonomy=taxonomy))


def export_tags(taxonomy: Taxonomy, format: TaxonomyDataFormat) -> str:
"""
Creates a blob string describing all the tags in the given Taxonomy.
The output format can be CSV or JSON.
"""

# Validations
if taxonomy.allow_free_text:
raise ValueError(
_(
f"Invalid taxonomy ({taxonomy.id}): You cannot import into a free-form taxonomy."
)
)
if format not in TaxonomyDataFormat.__members__.values():
raise ValueError(
_(
f"Invalid format: {format}"
)
)

# Build tags in a dictionary format
tags = get_tags(taxonomy)
result = []
for tag in tags:
result_tag = {
'id': tag.external_id or tag.id,
'name': tag.value,
}
if tag.parent:
result_tag['parent_id'] = tag.parent.external_id or tag.parent.id
result_tag['parent_name'] = tag.parent.value
result.append(result_tag)

# Convert dictonary into the output format
if format == TaxonomyDataFormat.CSV:
with StringIO() as csv_buffer:
csv_writer = csv.DictWriter(csv_buffer, fieldnames=csv_fields)
csv_writer.writeheader()

for tag in result:
csv_writer.writerow(tag)

csv_string = csv_buffer.getvalue()
return csv_string
else:
# TaxonomyDataFormat.JSON
# Verification is made at the beginning before bringing and assembling tags data.
json_result = {
'name': taxonomy.name,
'description': taxonomy.description,
'tags': result
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
'tags': result
'tags': result,

}
return json.dumps(json_result)
44 changes: 22 additions & 22 deletions tests/openedx_tagging/core/fixtures/tagging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,152 +4,152 @@
taxonomy: 1
parent: null
value: Bacteria
external_id: null
external_id: tag_1
- model: oel_tagging.tag
pk: 2
fields:
taxonomy: 1
parent: null
value: Archaea
external_id: null
external_id: tag_2
- model: oel_tagging.tag
pk: 3
fields:
taxonomy: 1
parent: null
value: Eukaryota
external_id: null
external_id: tag_3
- model: oel_tagging.tag
pk: 4
fields:
taxonomy: 1
parent: 1
value: Eubacteria
external_id: null
external_id: tag_4
- model: oel_tagging.tag
pk: 5
fields:
taxonomy: 1
parent: 1
value: Archaebacteria
external_id: null
external_id: tag_5
- model: oel_tagging.tag
pk: 6
fields:
taxonomy: 1
parent: 2
value: DPANN
external_id: null
external_id: tag_6
- model: oel_tagging.tag
pk: 7
fields:
taxonomy: 1
parent: 2
value: Euryarchaeida
external_id: null
external_id: tag_7
- model: oel_tagging.tag
pk: 8
fields:
taxonomy: 1
parent: 2
value: Proteoarchaeota
external_id: null
external_id: tag_8
- model: oel_tagging.tag
pk: 9
fields:
taxonomy: 1
parent: 3
value: Animalia
external_id: null
external_id: tag_9
- model: oel_tagging.tag
pk: 10
fields:
taxonomy: 1
parent: 3
value: Plantae
external_id: null
external_id: tag_10
- model: oel_tagging.tag
pk: 11
fields:
taxonomy: 1
parent: 3
value: Fungi
external_id: null
external_id: tag_11
- model: oel_tagging.tag
pk: 12
fields:
taxonomy: 1
parent: 3
value: Protista
external_id: null
external_id: tag_12
- model: oel_tagging.tag
pk: 13
fields:
taxonomy: 1
parent: 3
value: Monera
external_id: null
external_id: tag_13
- model: oel_tagging.tag
pk: 14
fields:
taxonomy: 1
parent: 9
value: Arthropoda
external_id: null
external_id: tag_14
- model: oel_tagging.tag
pk: 15
fields:
taxonomy: 1
parent: 9
value: Chordata
external_id: null
external_id: tag_15
- model: oel_tagging.tag
pk: 16
fields:
taxonomy: 1
parent: 9
value: Gastrotrich
external_id: null
external_id: tag_16
- model: oel_tagging.tag
pk: 17
fields:
taxonomy: 1
parent: 9
value: Cnidaria
external_id: null
external_id: tag_17
- model: oel_tagging.tag
pk: 18
fields:
taxonomy: 1
parent: 9
value: Ctenophora
external_id: null
external_id: tag_18
- model: oel_tagging.tag
pk: 19
fields:
taxonomy: 1
parent: 9
value: Placozoa
external_id: null
external_id: tag_19
- model: oel_tagging.tag
pk: 20
fields:
taxonomy: 1
parent: 9
value: Porifera
external_id: null
external_id: tag_20
- model: oel_tagging.tag
pk: 21
fields:
taxonomy: 1
parent: 15
value: Mammalia
external_id: null
external_id: tag_21
- model: oel_tagging.taxonomy
pk: 1
fields:
name: Life on Earth
description: null
description: This taxonomy contains the Kingdoms of the Earth
enabled: true
required: false
allow_multiple: false
Expand Down
Loading