openedx · ormsbee · Sep 24, 2025 · Sep 18, 2025 · Sep 20, 2025 · Sep 22, 2025
diff --git a/openedx_learning/apps/authoring/backup_restore/serializers.py b/openedx_learning/apps/authoring/backup_restore/serializers.py
@@ -0,0 +1,42 @@
+"""
+The serializers module for restoration of authoring data.
+"""
+from rest_framework import serializers
+
+from openedx_learning.apps.authoring.components import api as components_api
+
+
+class ComponentSerializer(serializers.Serializer):  # pylint: disable=abstract-method
+    """
+    Serializer for components.
+    Contains logic to convert entity_key to component_type and local_key.
+    """
+    can_stand_alone = serializers.BooleanField(required=True)
+    key = serializers.CharField(required=True)
+    created = serializers.DateTimeField(required=True)
+    created_by = serializers.CharField(required=True, allow_null=True)
+
+    def validate(self, attrs):
+        """
+        Custom validation logic:
+        parse the entity_key into (component_type, local_key).
+        """
+        entity_key = attrs["key"]
+        try:
+            component_type_obj, local_key = components_api.get_or_create_component_type_by_entity_key(entity_key)
+            attrs["component_type"] = component_type_obj
+            attrs["local_key"] = local_key
+        except ValueError as exc:
+            raise serializers.ValidationError({"key": str(exc)})
+        return attrs
+
+
+class ComponentVersionSerializer(serializers.Serializer):  # pylint: disable=abstract-method
+    """
+    Serializer for component versions.
+    """
+    title = serializers.CharField(required=True)
+    entity_key = serializers.CharField(required=True)
+    created = serializers.DateTimeField(required=True)
+    created_by = serializers.CharField(required=True, allow_null=True)
+    content_to_replace = serializers.DictField(child=serializers.CharField(), required=True)
diff --git a/openedx_learning/apps/authoring/backup_restore/toml.py b/openedx_learning/apps/authoring/backup_restore/toml.py
@@ -117,7 +117,6 @@ def toml_publishable_entity(
         children = []
 
         [version.container.unit]
-        graded = true
     """
     entity_table = _get_toml_publishable_entity_table(entity, draft_version, published_version)
     doc = tomlkit.document()
@@ -219,3 +218,21 @@ def parse_learning_package_toml(content: str) -> dict:
     if "key" not in lp_data["learning_package"]:
         raise ValueError("Invalid learning package TOML: missing 'key' in 'learning_package' section")
     return lp_data["learning_package"]
+
+
+def parse_publishable_entity_toml(content: str) -> tuple[Dict[str, Any], list]:
+    """
+    Parse the publishable entity TOML file and return a dict of its fields.
+    """
+    pe_data: Dict[str, Any] = tomlkit.parse(content)
+
+    # Validate the minimum required fields
+    if "entity" not in pe_data:
+        raise ValueError("Invalid publishable entity TOML: missing 'entity' section")
+    if "version" not in pe_data:
+        raise ValueError("Invalid publishable entity TOML: missing 'version' section")
+    if "key" not in pe_data["entity"]:
+        raise ValueError("Invalid publishable entity TOML: missing 'key' field")
+    if "can_stand_alone" not in pe_data["entity"]:
+        raise ValueError("Invalid publishable entity TOML: missing 'can_stand_alone' field")
+    return pe_data["entity"], pe_data.get("version", [])
diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py
@@ -6,33 +6,43 @@
 import zipfile
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, TypedDict
 
 from django.db import transaction
 from django.db.models import Prefetch, QuerySet
 from django.utils.text import slugify
 
 from openedx_learning.api.authoring_models import (
     Collection,
+    ComponentType,
     ComponentVersion,
     ComponentVersionContent,
     Content,
     LearningPackage,
     PublishableEntity,
     PublishableEntityVersion,
 )
+from openedx_learning.apps.authoring.backup_restore.serializers import ComponentSerializer, ComponentVersionSerializer
 from openedx_learning.apps.authoring.backup_restore.toml import (
     parse_learning_package_toml,
+    parse_publishable_entity_toml,
     toml_collection,
     toml_learning_package,
     toml_publishable_entity,
 )
 from openedx_learning.apps.authoring.collections import api as collections_api
+from openedx_learning.apps.authoring.components import api as components_api
 from openedx_learning.apps.authoring.publishing import api as publishing_api
 
 TOML_PACKAGE_NAME = "package.toml"
 
 
+class ComponentDefaults(TypedDict):
+    content_to_replace: dict[str, int | bytes | None]
+    created: datetime
+    created_by: Optional[int]
+
+
 def slugify_hashed_filename(identifier: str) -> str:
     """
     Generate a filesystem-safe filename from an identifier.
@@ -386,6 +396,8 @@ class LearningPackageUnzipper:
 
     def __init__(self) -> None:
         self.utc_now: datetime = datetime.now(tz=timezone.utc)
+        self.component_types_cache: dict[Tuple[str, str], ComponentType] = {}
+        self.errors: list[dict[str, Any]] = []
 
     # --------------------------
     # Public API
@@ -451,9 +463,98 @@ def _restore_containers(
     def _restore_components(
         self, zipf: zipfile.ZipFile, component_files: List[str], learning_package: LearningPackage
     ) -> None:
-        """Restore components from the zip archive."""
-        for component_file in component_files:
-            self._load_component(zipf, component_file, learning_package)
+        """
+        Restore components and their versions from the zip archive.
+        This method validates all components and their versions before persisting any data.
+        If any validation errors occur, no data is persisted and errors are collected.
+        """
+
+        validated_components = []
+        validated_drafts = []
+        validated_published = []
+
+        for file in component_files:
+            if not file.endswith(".toml"):
+                # Skip non-TOML files
+                continue
+
+            # Load component data from the TOML file
+            component_data, draft_version, published_version = self._load_component_data(zipf, file)
+
+            # Validate component data
+            component_serializer = ComponentSerializer(data={
+                "created": self.utc_now,
+                "created_by": None,
+                **component_data,
+            })
+            if not component_serializer.is_valid():
+                # Collect errors and continue
+                self.errors.append({"file": file, "errors": component_serializer.errors})
+                continue
+            # Collect component validated data
+            validated_components.append(component_serializer.validated_data)
+
+            # Load and validate versions
+            valid_versions = self._validate_versions(
+                component_serializer.validated_data,
+                draft_version,
+                published_version
+            )
+            if valid_versions["draft"]:
+                validated_drafts.append(valid_versions["draft"])
+            if valid_versions["published"]:
+                validated_published.append(valid_versions["published"])
+
+        if self.errors:
+            return
+
+        # Persist all validated components and their versions if there are no errors
+        self._persist_components(learning_package, validated_components, validated_drafts, validated_published)
+
+    def _persist_components(
+        self,
+        learning_package: LearningPackage,
+        validated_components: List[dict[str, Any]],
+        validated_drafts: List[dict[str, Any]],
+        validated_published: List[dict[str, Any]],
+    ) -> None:
+        """
+        Persist validated components and their versions to the database.
+
+        The operation is performed within a bulk draft changes context to save
+        only one transaction on Draft Change Log.
+        """
+        components_by_key = {}  # Map entity_key to Component instance
+        # Step 1:
+        # Create components and their publishable entities
+        # Create all published versions as a draft first
+        # Publish all drafts
+        with publishing_api.bulk_draft_changes_for(learning_package.id):
+            for valid_component in validated_components:
+                entity_key = valid_component.pop("key")
+                component = components_api.create_component(
+                    learning_package.id,
+                    **valid_component,
+                )
+                components_by_key[entity_key] = component
+
+            for valid_draft in validated_published:
+                entity_key = valid_draft.pop("entity_key")
+                components_api.create_next_component_version(
+                    components_by_key[entity_key].publishable_entity.id,
+                    **valid_draft
+                )
+
+            publishing_api.publish_all_drafts(learning_package.id)
+
+        # Step 2: Create all draft versions
+        with publishing_api.bulk_draft_changes_for(learning_package.id):
+            for valid_draft in validated_drafts:
+                entity_key = valid_draft.pop("entity_key")
+                components_api.create_next_component_version(
+                    components_by_key[entity_key].publishable_entity.id,
+                    **valid_draft
+                )
 
     def _restore_collections(
         self, zipf: zipfile.ZipFile, collection_files: List[str], learning_package: LearningPackage
@@ -488,12 +589,12 @@ def _load_container(
         )
         """
 
-    def _load_component(
-        self, zipf: zipfile.ZipFile, component_file: str, learning_package: LearningPackage
-    ):  # pylint: disable=W0613
-        """Load and persist a component (placeholder)."""
-        # TODO: implement actual parsing
-        return None
+    def _load_component_data(self, zipf, component_file):
+        """Load component data and its versions from a TOML file."""
+        content = self._read_file_from_zip(zipf, component_file)
+        component_data, component_version_data = parse_publishable_entity_toml(content)
+        draft_version, published_version = self._get_versions_to_write(component_version_data, component_data)
+        return component_data, draft_version, published_version
 
     # --------------------------
     # Utilities
@@ -529,3 +630,41 @@ def _get_organized_file_list(self, file_paths: List[str]) -> dict[str, Any]:
                 organized["collections"].append(path)
 
         return organized
+
+    def _get_versions_to_write(
+        self,
+        component_version_data: List[dict[str, Any]],
+        component_data: dict[str, Any]
+    ) -> Tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
+        """Return the draft and published versions to write, based on component data."""
+
+        draft_version_num = component_data.get("draft", {}).get("version_num")
+        published_version_num = component_data.get("published", {}).get("version_num")
+
+        # Build lookup by version_num
+        version_lookup = {v.get("version_num"): v for v in component_version_data}
+
+        return (
+            version_lookup.get(draft_version_num) if draft_version_num else None,
+            version_lookup.get(published_version_num) if published_version_num else None,
+        )
+
+    def _validate_versions(self, component_validated_data, draft_version, published_version):
+        """ Validate draft and published versions using ComponentVersionSerializer."""
+        valid_versions = {"draft": None, "published": None}
+        for label, version in [("draft", draft_version), ("published", published_version)]:
+            if version is None:
+                continue
+            entity_key = component_validated_data["key"]
+            version_data = {
+                "entity_key": entity_key,
+                "content_to_replace": {},
+                "created": self.utc_now,
+                "created_by": None,
+                **version,
+            }
+            serializer = ComponentVersionSerializer(data=version_data)
+            if not serializer.is_valid():
+                self.errors.append(f"Errors in {label} version for {entity_key}: {serializer.errors}")
+            valid_versions[label] = serializer.validated_data
+        return valid_versions
diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py
@@ -34,6 +34,7 @@
 # to be callable only by other apps in the authoring package.
 __all__ = [
     "get_or_create_component_type",
+    "get_or_create_component_type_by_entity_key",
     "create_component",
     "create_component_version",
     "create_next_component_version",
@@ -73,6 +74,27 @@ def get_or_create_component_type(namespace: str, name: str) -> ComponentType:
     return component_type
 
 
+def get_or_create_component_type_by_entity_key(entity_key: str) -> tuple[ComponentType, str]:
+    """
+    Get or create a ComponentType based on a full entity key string.
+
+    The entity key is expected to be in the format
+    ``"{namespace}:{type_name}:{local_key}"``. This function will parse out the
+    ``namespace`` and ``type_name`` parts and use those to get or create the
+    ComponentType.
+
+    Raises ValueError if the entity_key is not in the expected format.
+    """
+    try:
+        namespace, type_name, local_key = entity_key.split(':', 2)
+    except ValueError as exc:
+        raise ValueError(
+            f"Invalid entity_key format: {entity_key!r}. "
+            "Expected format: '{namespace}:{type_name}:{local_key}'"
+        ) from exc
+    return get_or_create_component_type(namespace, type_name), local_key
+
+
 def create_component(
     learning_package_id: int,
     /,

diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py
@@ -605,3 +605,38 @@ def setUpTestData(cls) -> None:
             username="user",
             email="user@example.com",
         )
+
+
+class TestComponentTypeUtils(TestCase):
+    """
+    Test the component type utility functions.
+    """
+
+    def test_get_or_create_component_type_by_entity_key_creates_new(self):
+        comp_type, local_key = components_api.get_or_create_component_type_by_entity_key(
+            "video:youtube:abcd1234"
+        )
+
+        assert isinstance(comp_type, ComponentType)
+        assert comp_type.namespace == "video"
+        assert comp_type.name == "youtube"
+        assert local_key == "abcd1234"
+        assert ComponentType.objects.count() == 1
+
+    def test_get_or_create_component_type_by_entity_key_existing(self):
+        ComponentType.objects.create(namespace="video", name="youtube")
+
+        comp_type, local_key = components_api.get_or_create_component_type_by_entity_key(
+            "video:youtube:efgh5678"
+        )
+
+        assert comp_type.namespace == "video"
+        assert comp_type.name == "youtube"
+        assert local_key == "efgh5678"
+        assert ComponentType.objects.count() == 1
+
+    def test_get_or_create_component_type_by_entity_key_invalid_format(self):
+        with self.assertRaises(ValueError) as ctx:
+            components_api.get_or_create_component_type_by_entity_key("not-enough-parts")
+
+        self.assertIn("Invalid entity_key format", str(ctx.exception))