feat: handle tags when importing/exporting courses (openedx#34356)

raccoongang · Apr 24, 2024 · 7426ab6 · 7426ab6
1 parent 4ba50ae
commit 7426ab6
Show file tree

Hide file tree

Showing 18 changed files with 581 additions and 283 deletions.
diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py b/cms/djangoapps/contentstore/management/commands/tests/test_export_olx.py
@@ -17,6 +17,8 @@
 from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
 from xmodule.modulestore.tests.factories import CourseFactory
 
+from openedx.core.djangoapps.content_tagging.tests.test_objecttag_export_helpers import TaggedCourseMixin
+
 
 class TestArgParsingCourseExportOlx(unittest.TestCase):
     """
@@ -31,7 +33,7 @@ def test_no_args(self):
             call_command('export_olx')
 
 
-class TestCourseExportOlx(ModuleStoreTestCase):
+class TestCourseExportOlx(TaggedCourseMixin, ModuleStoreTestCase):
     """
     Test exporting OLX content from a course or library.
     """
@@ -61,7 +63,7 @@ def create_dummy_course(self, store_type):
         )
         return course.id
 
-    def check_export_file(self, tar_file, course_key):
+    def check_export_file(self, tar_file, course_key, with_tags=False):
         """Check content of export file."""
         names = tar_file.getnames()
         dirname = "{0.org}-{0.course}-{0.run}".format(course_key)
@@ -71,6 +73,10 @@ def check_export_file(self, tar_file, course_key):
         self.assertIn(f"{dirname}/about/overview.html", names)
         self.assertIn(f"{dirname}/assets/assets.xml", names)
         self.assertIn(f"{dirname}/policies", names)
+        if with_tags:
+            self.assertIn(f"{dirname}/tags.csv", names)
+        else:
+            self.assertNotIn(f"{dirname}/tags.csv", names)
 
     def test_export_course(self):
         test_course_key = self.create_dummy_course(ModuleStoreEnum.Type.split)
@@ -98,3 +104,11 @@ def __init__(self, bytes_io):
         output = output_wrapper.bytes_io.read()
         with tarfile.open(fileobj=BytesIO(output), mode="r:gz") as tar_file:
             self.check_export_file(tar_file, test_course_key)
+
+    def test_export_course_with_tags(self):
+        tmp_dir = path(mkdtemp())
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        filename = tmp_dir / 'test.tar.gz'
+        call_command('export_olx', '--output', filename, str(self.course.id))
+        with tarfile.open(filename) as tar_file:
+            self.check_export_file(tar_file, self.course.id, with_tags=True)
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -170,10 +170,10 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
     }
     for obj_tag in all_tags:
         # Add the taxonomy name:
-        if obj_tag.name not in result[Fields.tags_taxonomy]:
-            result[Fields.tags_taxonomy].append(obj_tag.name)
-        # Taxonomy name plus each level of tags, in a list:
-        parts = [obj_tag.name] + obj_tag.get_lineage()  # e.g. ["Location", "North America", "Canada", "Vancouver"]
+        if obj_tag.taxonomy.name not in result[Fields.tags_taxonomy]:
+            result[Fields.tags_taxonomy].append(obj_tag.taxonomy.name)
+        # Taxonomy name plus each level of tags, in a list: # e.g. ["Location", "North America", "Canada", "Vancouver"]
+        parts = [obj_tag.taxonomy.name] + obj_tag.get_lineage()
         parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator.
         # Now we build each level (tags.level0, tags.level1, etc.) as applicable.
         # We have a hard-coded limit of 4 levels of tags for now (see Fields.tags above).

diff --git a/openedx/core/djangoapps/content_tagging/api.py b/openedx/core/djangoapps/content_tagging/api.py
@@ -2,15 +2,21 @@
 Content Tagging APIs
 """
 from __future__ import annotations
+import io
 
 from itertools import groupby
+import csv
+from typing import Iterator
+from opaque_keys.edx.keys import UsageKey
 
 import openedx_tagging.core.tagging.api as oel_tagging
 from django.db.models import Exists, OuterRef, Q, QuerySet
 from opaque_keys.edx.keys import CourseKey
 from opaque_keys.edx.locator import LibraryLocatorV2
 from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy
+from openedx_tagging.core.tagging.models.utils import TAGS_CSV_SEPARATOR
 from organizations.models import Organization
+from .helpers.objecttag_export_helpers import build_object_tree_with_objecttags, iterate_with_level
 
 from .models import TaxonomyOrg
 from .types import ContentKey, TagValuesByObjectIdDict, TagValuesByTaxonomyIdDict, TaxonomyDict
@@ -164,7 +170,7 @@ def get_all_object_tags(
     all_object_tags = ObjectTag.objects.filter(
         Q(tag__isnull=False, tag__taxonomy__isnull=False),
         object_id_clause,
-    ).select_related("tag__taxonomy")
+    ).select_related("tag__taxonomy").order_by("object_id")
 
     if prefetch_orgs:
         all_object_tags = all_object_tags.prefetch_related("tag__taxonomy__taxonomyorg_set")
@@ -174,7 +180,8 @@ def get_all_object_tags(
 
     for object_id, block_tags in groupby(all_object_tags, lambda x: x.object_id):
         grouped_object_tags[object_id] = {}
-        for taxonomy_id, taxonomy_tags in groupby(block_tags, lambda x: x.tag.taxonomy_id if x.tag else 0):
+        block_tags_sorted = sorted(block_tags, key=lambda x: x.tag.taxonomy_id if x.tag else 0)  # type: ignore
+        for taxonomy_id, taxonomy_tags in groupby(block_tags_sorted, lambda x: x.tag.taxonomy_id if x.tag else 0):
             object_tags_list = list(taxonomy_tags)
             grouped_object_tags[object_id][taxonomy_id] = [
                 tag.value for tag in object_tags_list
@@ -185,7 +192,7 @@ def get_all_object_tags(
                 assert object_tags_list[0].tag.taxonomy
                 taxonomies[taxonomy_id] = object_tags_list[0].tag.taxonomy
 
-    return grouped_object_tags, taxonomies
+    return grouped_object_tags, dict(sorted(taxonomies.items()))
 
 
 def set_all_object_tags(
@@ -211,6 +218,125 @@ def set_all_object_tags(
         )
 
 
+def generate_csv_rows(object_id, buffer) -> Iterator[str]:
+    """
+    Returns a CSV string with tags and taxonomies of all blocks of `object_id`
+    """
+    content_key = get_content_key_from_string(object_id)
+
+    if isinstance(content_key, UsageKey):
+        raise ValueError("The object_id must be a CourseKey or a LibraryLocatorV2.")
+
+    all_object_tags, taxonomies = get_all_object_tags(content_key)
+    tagged_content = build_object_tree_with_objecttags(content_key, all_object_tags)
+
+    header = {"name": "Name", "type": "Type", "id": "ID"}
+
+    # Prepare the header for the taxonomies
+    for taxonomy_id, taxonomy in taxonomies.items():
+        header[f"taxonomy_{taxonomy_id}"] = taxonomy.export_id
+
+    csv_writer = csv.DictWriter(buffer, fieldnames=header.keys(), quoting=csv.QUOTE_NONNUMERIC)
+    yield csv_writer.writerow(header)
+
+    # Iterate over the blocks and yield the rows
+    for item, level in iterate_with_level(tagged_content):
+        block_key = get_content_key_from_string(item.block_id)
+
+        block_data = {
+            "name": level * "  " + item.display_name,
+            "type": item.category,
+            "id": getattr(block_key, 'block_id', item.block_id),
+        }
+
+        # Add the tags for each taxonomy
+        for taxonomy_id in taxonomies:
+            if taxonomy_id in item.object_tags:
+                block_data[f"taxonomy_{taxonomy_id}"] = f"{TAGS_CSV_SEPARATOR} ".join(
+                    list(item.object_tags[taxonomy_id])
+                )
+
+        yield csv_writer.writerow(block_data)
+
+
+def export_tags_in_csv_file(object_id, file_dir, file_name) -> None:
+    """
+    Writes a CSV file with tags and taxonomies of all blocks of `object_id`
+    """
+    buffer = io.StringIO()
+    for _ in generate_csv_rows(object_id, buffer):
+        # The generate_csv_rows function is a generator,
+        # we don't need to do anything with the result here
+        pass
+
+    with file_dir.open(file_name, 'w') as csv_file:
+        buffer.seek(0)
+        csv_file.write(buffer.read())
+
+
+def set_exported_object_tags(
+    content_key: ContentKey,
+    exported_tags: TagValuesByTaxonomyIdDict,
+) -> None:
+    """
+    Sets the tags for the given exported content object.
+    """
+    content_key_str = str(content_key)
+
+    # Clear all tags related with the content.
+    oel_tagging.delete_object_tags(content_key_str)
+
+    for taxonomy_export_id, tags_values in exported_tags.items():
+        if not tags_values:
+            continue
+
+        taxonomy = oel_tagging.get_taxonomy_by_export_id(str(taxonomy_export_id))
+        oel_tagging.tag_object(
+            object_id=content_key_str,
+            taxonomy=taxonomy,
+            tags=tags_values,
+            create_invalid=True,
+            taxonomy_export_id=str(taxonomy_export_id),
+        )
+
+
+def import_course_tags_from_csv(csv_path, course_id) -> None:
+    """
+    Import tags from a csv file generated on export.
+    """
+    # Open csv file and extract the tags
+    with open(csv_path, 'r') as csv_file:
+        csv_reader = csv.DictReader(csv_file)
+        tags_in_blocks = list(csv_reader)
+
+    def get_exported_tags(block) -> TagValuesByTaxonomyIdDict:
+        """
+        Returns a map with taxonomy export_id and tags for this block.
+        """
+        result = {}
+        for key, value in block.items():
+            if key in ['Type', 'Name', 'ID'] or not value:
+                continue
+            result[key] = value.split(TAGS_CSV_SEPARATOR)
+        return result
+
+    course_key = CourseKey.from_string(str(course_id))
+
+    for block in tags_in_blocks:
+        exported_tags = get_exported_tags(block)
+        block_type = block.get('Type', '')
+        block_id = block.get('ID', '')
+
+        if not block_type or not block_id:
+            raise ValueError(f"Invalid format of csv in: '{block}'.")
+
+        if block_type == 'course':
+            set_exported_object_tags(course_key, exported_tags)
+        else:
+            block_key = course_key.make_usage_key(block_type, block_id)
+            set_exported_object_tags(block_key, exported_tags)
+
+
 def copy_object_tags(
     source_content_key: ContentKey,
     dest_content_key: ContentKey,

diff --git a/openedx/core/djangoapps/content_tagging/auth.py b/openedx/core/djangoapps/content_tagging/auth.py
@@ -0,0 +1,14 @@
+"""
+Functions to validate the access in content tagging actions
+"""
+
+
+from openedx_tagging.core.tagging import rules as oel_tagging_rules
+
+
+def has_view_object_tags_access(user, object_id):
+    return user.has_perm(
+        "oel_tagging.view_objecttag",
+        # The obj arg expects a model, but we are passing an object
+        oel_tagging_rules.ObjectTagPermissionItem(taxonomy=None, object_id=object_id),  # type: ignore[arg-type]
+    )
diff --git a/openedx/core/djangoapps/content_tagging/helpers/__init__.py b/openedx/core/djangoapps/content_tagging/helpers/__init__.py
diff --git a/...g/rest_api/v1/objecttag_export_helpers.py → ...gging/helpers/objecttag_export_helpers.py b/...g/rest_api/v1/objecttag_export_helpers.py → ...gging/helpers/objecttag_export_helpers.py
@@ -12,10 +12,9 @@
 from xblock.core import XBlock
 
 import openedx.core.djangoapps.content_libraries.api as library_api
-from openedx.core.djangoapps.content_libraries.api import LibraryXBlockMetadata
 from xmodule.modulestore.django import modulestore
 
-from ...types import TagValuesByObjectIdDict, TagValuesByTaxonomyIdDict
+from ..types import TagValuesByObjectIdDict, TagValuesByTaxonomyIdDict
 
 
 @define
@@ -69,7 +68,7 @@ def _get_course_tagged_object_and_children(
 
 def _get_library_tagged_object_and_children(
     library_key: LibraryLocatorV2, object_tag_cache: TagValuesByObjectIdDict
-) -> tuple[TaggedContent, list[LibraryXBlockMetadata]]:
+) -> tuple[TaggedContent, list[library_api.LibraryXBlockMetadata]]:
     """
     Returns a TaggedContent with library metadata with its tags, and its children.
     """
@@ -89,7 +88,7 @@ def _get_library_tagged_object_and_children(
 
     library_components = library_api.get_library_components(library_key)
     children = [
-        LibraryXBlockMetadata.from_component(library_key, component)
+        library_api.LibraryXBlockMetadata.from_component(library_key, component)
         for component in library_components
     ]
 
@@ -117,7 +116,7 @@ def _get_xblock_tagged_object_and_children(
 
 
 def _get_library_block_tagged_object(
-    library_block: LibraryXBlockMetadata, object_tag_cache: TagValuesByObjectIdDict
+    library_block: library_api.LibraryXBlockMetadata, object_tag_cache: TagValuesByObjectIdDict
 ) -> tuple[TaggedContent, None]:
     """
     Returns a TaggedContent with library content block metadata and its tags,
@@ -144,7 +143,7 @@ def build_object_tree_with_objecttags(
     """
     get_tagged_children: Union[
         # _get_course_tagged_object_and_children type
-        Callable[[LibraryXBlockMetadata, dict[str, dict[int, list[Any]]]], tuple[TaggedContent, None]],
+        Callable[[library_api.LibraryXBlockMetadata, dict[str, dict[int, list[Any]]]], tuple[TaggedContent, None]],
         # _get_library_block_tagged_object type
         Callable[[UsageKey, dict[str, dict[int, list[Any]]]], tuple[TaggedContent, list[Any]]]
     ]

diff --git a/openedx/core/djangoapps/content_tagging/rest_api/v1/tests/test_views.py b/openedx/core/djangoapps/content_tagging/rest_api/v1/tests/test_views.py
@@ -37,8 +37,7 @@
 from openedx.core.djangoapps.content_tagging.utils import rules_cache
 from openedx.core.djangolib.testing.utils import skip_unless_cms
 
-
-from .test_objecttag_export_helpers import TaggedCourseMixin
+from ....tests.test_objecttag_export_helpers import TaggedCourseMixin
 
 User = get_user_model()
 
@@ -1759,6 +1758,7 @@ def test_get_tags(self):
         # Fetch this object's tags for a single taxonomy
         expected_tags = [{
             'name': 'Multiple Taxonomy',
+            'export_id': '13-multiple-taxonomy',
             'taxonomy_id': taxonomy.pk,
             'can_tag_object': True,
             'tags': [
@@ -1854,24 +1854,8 @@ def test_export_course(self, user_attr) -> None:
         assert response.status_code == status.HTTP_200_OK
         assert response.headers['Content-Type'] == 'text/csv'
 
-        expected_csv = (
-            '"Name","Type","ID","1-taxonomy-1","2-taxonomy-2"\r\n'
-            '"Test Course","course","course-v1:orgA+test_course+test_run","Tag 1.1",""\r\n'
-            '"  test sequential","sequential","block-v1:orgA+test_course+test_run+type@sequential+block@test_'
-            'sequential","Tag 1.1, Tag 1.2","Tag 2.1"\r\n'
-            '"    test vertical1","vertical","block-v1:orgA+test_course+test_run+type@vertical+block@test_'
-            'vertical1","","Tag 2.2"\r\n'
-            '"    test vertical2","vertical","block-v1:orgA+test_course+test_run+type@vertical+block@test_'
-            'vertical2","",""\r\n'
-            '"      test html","html","block-v1:orgA+test_course+test_run+type@html+block@test_html","","Tag 2.1"\r\n'
-            '"  untagged sequential","sequential","block-v1:orgA+test_course+test_run+type@sequential+block@untagged_'
-            'sequential","",""\r\n'
-            '"    untagged vertical","vertical","block-v1:orgA+test_course+test_run+type@vertical+block@untagged_'
-            'vertical","",""\r\n'
-        )
-
         zip_content = BytesIO(b"".join(response.streaming_content)).getvalue()  # type: ignore[attr-defined]
-        assert zip_content == expected_csv.encode()
+        assert zip_content == self.expected_csv.encode()
 
     def test_export_course_anoymous_forbidden(self) -> None:
         url = OBJECT_TAGS_EXPORT_URL.format(object_id=str(self.course.id))
@@ -1888,7 +1872,7 @@ def test_export_course_invalid_id(self) -> None:
         url = OBJECT_TAGS_EXPORT_URL.format(object_id="invalid")
         self.client.force_authenticate(user=self.staff)
         response = self.client.get(url)
-        assert response.status_code == status.HTTP_400_BAD_REQUEST
+        assert response.status_code == status.HTTP_403_FORBIDDEN
 
 
 @skip_unless_cms