-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
import/export Taxonomy API functions #58
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -10,13 +10,28 @@ | |||||
Please look at the models.py file for more information about the kinds of data | ||||||
are stored in this app. | ||||||
""" | ||||||
import csv | ||||||
import json | ||||||
from enum import Enum | ||||||
from io import StringIO, BytesIO, TextIOWrapper | ||||||
from typing import List, Type | ||||||
|
||||||
from django.db import transaction | ||||||
from django.db.models import QuerySet | ||||||
from django.core.exceptions import ObjectDoesNotExist | ||||||
from django.utils.translation import gettext_lazy as _ | ||||||
|
||||||
from .models import ObjectTag, Tag, Taxonomy | ||||||
|
||||||
csv_fields = ['id', 'name', 'parent_id', 'parent_name'] | ||||||
|
||||||
class TaxonomyDataFormat(Enum): | ||||||
""" | ||||||
Formats used to export and import Taxonomies | ||||||
""" | ||||||
CSV = 'CSV' | ||||||
JSON = 'JSON' | ||||||
|
||||||
|
||||||
def create_taxonomy( | ||||||
name, | ||||||
|
@@ -29,6 +44,7 @@ def create_taxonomy( | |||||
""" | ||||||
Creates, saves, and returns a new Taxonomy with the given attributes. | ||||||
""" | ||||||
|
||||||
return Taxonomy.objects.create( | ||||||
name=name, | ||||||
description=description, | ||||||
|
@@ -105,5 +121,180 @@ def tag_object( | |||||
Raised ValueError if the proposed tags are invalid for this taxonomy. | ||||||
Preserves existing (valid) tags, adds new (valid) tags, and removes omitted (or invalid) tags. | ||||||
""" | ||||||
|
||||||
return taxonomy.tag_object(tags, object_id, object_type) | ||||||
|
||||||
|
||||||
def import_tags(taxonomy: Taxonomy, tags: BytesIO, format: TaxonomyDataFormat, replace=False): | ||||||
""" | ||||||
Imports the hierarchical tags from the given blob into the Taxonomy. | ||||||
The blob can be CSV or JSON format. | ||||||
|
||||||
If replace, then removes any existing child Tags linked to this taxonomy before performing the import. | ||||||
""" | ||||||
|
||||||
# Validations | ||||||
if taxonomy.allow_free_text: | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid taxonomy ({taxonomy.id}): You cannot import into a free-form taxonomy." | ||||||
) | ||||||
) | ||||||
|
||||||
# Read file and build the tags data to be uploaded | ||||||
try: | ||||||
tags_data = {} | ||||||
tags.seek(0) | ||||||
if format == TaxonomyDataFormat.CSV: | ||||||
text_tags = TextIOWrapper(tags, encoding='utf-8') | ||||||
csv_reader = csv.DictReader(text_tags) | ||||||
header_fields = csv_reader.fieldnames | ||||||
if csv_fields != header_fields: | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid CSV header: {header_fields}. Must be: {csv_fields}." | ||||||
) | ||||||
) | ||||||
tags_data = list(csv_reader) | ||||||
elif format == TaxonomyDataFormat.JSON: | ||||||
tags_data = json.load(tags) | ||||||
if 'tags' not in tags_data: | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid JSON format: Missing 'tags' list." | ||||||
) | ||||||
) | ||||||
tags_data = tags_data.get('tags') | ||||||
else: | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid format: {format}" | ||||||
) | ||||||
) | ||||||
except ValueError as e: | ||||||
raise e | ||||||
finally: | ||||||
tags.close() | ||||||
|
||||||
|
||||||
updated_tags = [] | ||||||
|
||||||
def create_update_tag(tag): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try to avoid inner functions this large and complex. The |
||||||
""" | ||||||
Function to create a new Tag or update an existing one. | ||||||
|
||||||
This function keeps a creation/update history with `updated_tags`, | ||||||
a same tag can't be created/updated in a same taxonomy import. | ||||||
Also, recursively, creates the parents of the `tag`. | ||||||
|
||||||
Returns the created/updated Tag. | ||||||
Raise KeyError if 'id' or 'name' don't exist on `tag` | ||||||
""" | ||||||
|
||||||
tag_id = tag['id'] | ||||||
tag_name = tag['name'] | ||||||
tag_parent_id = tag.get('parent_id') | ||||||
tag_parent_name = tag.get('parent_name') | ||||||
|
||||||
# Check if the tag has not already been created or updated | ||||||
if tag_id not in updated_tags: | ||||||
try: | ||||||
# Update tag | ||||||
tag_instance = taxonomy.tag_set.get(external_id=tag_id) | ||||||
tag_instance.value = tag_name | ||||||
|
||||||
if tag_instance.parent and (not tag_parent_id or not tag_parent_name): | ||||||
# if there is no parent in the data import | ||||||
tag_instance.parent = None | ||||||
updated_tags.append(tag_id) | ||||||
except Tag.DoesNotExist: | ||||||
# Create tag | ||||||
tag_instance = Tag( | ||||||
taxonomy=taxonomy, | ||||||
value=tag_name, | ||||||
external_id=tag_id, | ||||||
) | ||||||
updated_tags.append(tag_id) | ||||||
|
||||||
if tag_parent_id and tag_parent_name: | ||||||
# Parent creation/update | ||||||
parent = create_update_tag({'id': tag_parent_id, 'name': tag_parent_name}) | ||||||
tag_instance.parent = parent | ||||||
|
||||||
tag_instance.save() | ||||||
return tag_instance | ||||||
else: | ||||||
# Returns the created/updated tag from history | ||||||
return taxonomy.tag_set.get(external_id=tag_id) | ||||||
|
||||||
# Create and update tags | ||||||
with transaction.atomic(): | ||||||
for tag in tags_data: | ||||||
try: | ||||||
create_update_tag(tag) | ||||||
except KeyError as e: | ||||||
key = e.args[0] | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid JSON format: Missing '{key}' on a tag ({tag})" | ||||||
) | ||||||
) | ||||||
|
||||||
# If replace, delete all not updated tags (Not present in the file) | ||||||
if replace: | ||||||
taxonomy.tag_set.exclude(external_id__in=updated_tags).delete() | ||||||
|
||||||
resync_object_tags(ObjectTag.objects.filter(taxonomy=taxonomy)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be a large operation.. so I don't think it should sit under the same atomic operation. Can you bump it out a level?
Suggested change
|
||||||
|
||||||
def export_tags(taxonomy: Taxonomy, format: TaxonomyDataFormat) -> str: | ||||||
""" | ||||||
Creates a blob string describing all the tags in the given Taxonomy. | ||||||
The output format can be CSV or JSON. | ||||||
""" | ||||||
|
||||||
# Validations | ||||||
if taxonomy.allow_free_text: | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid taxonomy ({taxonomy.id}): You cannot import into a free-form taxonomy." | ||||||
) | ||||||
) | ||||||
if format not in TaxonomyDataFormat.__members__.values(): | ||||||
raise ValueError( | ||||||
_( | ||||||
f"Invalid format: {format}" | ||||||
) | ||||||
) | ||||||
|
||||||
# Build tags in a dictionary format | ||||||
tags = get_tags(taxonomy) | ||||||
result = [] | ||||||
for tag in tags: | ||||||
result_tag = { | ||||||
'id': tag.external_id or tag.id, | ||||||
'name': tag.value, | ||||||
} | ||||||
if tag.parent: | ||||||
result_tag['parent_id'] = tag.parent.external_id or tag.parent.id | ||||||
result_tag['parent_name'] = tag.parent.value | ||||||
result.append(result_tag) | ||||||
|
||||||
# Convert dictonary into the output format | ||||||
if format == TaxonomyDataFormat.CSV: | ||||||
with StringIO() as csv_buffer: | ||||||
csv_writer = csv.DictWriter(csv_buffer, fieldnames=csv_fields) | ||||||
csv_writer.writeheader() | ||||||
|
||||||
for tag in result: | ||||||
csv_writer.writerow(tag) | ||||||
|
||||||
csv_string = csv_buffer.getvalue() | ||||||
return csv_string | ||||||
else: | ||||||
# TaxonomyDataFormat.JSON | ||||||
# Verification is made at the beginning before bringing and assembling tags data. | ||||||
json_result = { | ||||||
'name': taxonomy.name, | ||||||
'description': taxonomy.description, | ||||||
'tags': result | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit:
Suggested change
|
||||||
} | ||||||
return json.dumps(json_result) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: would like to make it clear that this is a constant, and it's not part of the externally-exportable python api: