Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions openedx_learning/apps/authoring/backup_restore/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
The serializers module for restoration of authoring data.
"""
from rest_framework import serializers

from openedx_learning.apps.authoring.components import api as components_api


class ComponentSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for components.
Contains logic to convert entity_key to component_type and local_key.
"""
can_stand_alone = serializers.BooleanField(required=True)
key = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True)
created_by = serializers.CharField(required=True, allow_null=True)

def validate(self, attrs):
"""
Custom validation logic:
parse the entity_key into (component_type, local_key).
"""
entity_key = attrs["key"]
try:
component_type_obj, local_key = components_api.get_or_create_component_type_by_entity_key(entity_key)
attrs["component_type"] = component_type_obj
attrs["local_key"] = local_key
except ValueError as exc:
raise serializers.ValidationError({"key": str(exc)})
return attrs


class ComponentVersionSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for component versions.
"""
title = serializers.CharField(required=True)
entity_key = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True)
created_by = serializers.CharField(required=True, allow_null=True)
content_to_replace = serializers.DictField(child=serializers.CharField(), required=True)
19 changes: 18 additions & 1 deletion openedx_learning/apps/authoring/backup_restore/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def toml_publishable_entity(
children = []

[version.container.unit]
graded = true
"""
entity_table = _get_toml_publishable_entity_table(entity, draft_version, published_version)
doc = tomlkit.document()
Expand Down Expand Up @@ -219,3 +218,21 @@ def parse_learning_package_toml(content: str) -> dict:
if "key" not in lp_data["learning_package"]:
raise ValueError("Invalid learning package TOML: missing 'key' in 'learning_package' section")
return lp_data["learning_package"]


def parse_publishable_entity_toml(content: str) -> tuple[Dict[str, Any], list]:
"""
Parse the publishable entity TOML file and return a dict of its fields.
"""
pe_data: Dict[str, Any] = tomlkit.parse(content)

# Validate the minimum required fields
if "entity" not in pe_data:
raise ValueError("Invalid publishable entity TOML: missing 'entity' section")
if "version" not in pe_data:
raise ValueError("Invalid publishable entity TOML: missing 'version' section")
if "key" not in pe_data["entity"]:
raise ValueError("Invalid publishable entity TOML: missing 'key' field")
if "can_stand_alone" not in pe_data["entity"]:
raise ValueError("Invalid publishable entity TOML: missing 'can_stand_alone' field")
Comment on lines +230 to +237
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assume that these error messages will go into a log file, and try to surface everything that's wrong with the publishable entity at once, not just one at a time. Also include identifying information for which publishable entity is missing this information if possible, so there's some way to identify it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented serializers to collect those errors for a future log file. Thank you.

return pe_data["entity"], pe_data.get("version", [])
159 changes: 149 additions & 10 deletions openedx_learning/apps/authoring/backup_restore/zipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,43 @@
import zipfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, List, Optional, Tuple
from typing import Any, List, Optional, Tuple, TypedDict

from django.db import transaction
from django.db.models import Prefetch, QuerySet
from django.utils.text import slugify

from openedx_learning.api.authoring_models import (
Collection,
ComponentType,
ComponentVersion,
ComponentVersionContent,
Content,
LearningPackage,
PublishableEntity,
PublishableEntityVersion,
)
from openedx_learning.apps.authoring.backup_restore.serializers import ComponentSerializer, ComponentVersionSerializer
from openedx_learning.apps.authoring.backup_restore.toml import (
parse_learning_package_toml,
parse_publishable_entity_toml,
toml_collection,
toml_learning_package,
toml_publishable_entity,
)
from openedx_learning.apps.authoring.collections import api as collections_api
from openedx_learning.apps.authoring.components import api as components_api
from openedx_learning.apps.authoring.publishing import api as publishing_api

TOML_PACKAGE_NAME = "package.toml"


class ComponentDefaults(TypedDict):
content_to_replace: dict[str, int | bytes | None]
created: datetime
created_by: Optional[int]


def slugify_hashed_filename(identifier: str) -> str:
"""
Generate a filesystem-safe filename from an identifier.
Expand Down Expand Up @@ -386,6 +396,8 @@ class LearningPackageUnzipper:

def __init__(self) -> None:
self.utc_now: datetime = datetime.now(tz=timezone.utc)
self.component_types_cache: dict[Tuple[str, str], ComponentType] = {}
self.errors: list[dict[str, Any]] = []

# --------------------------
# Public API
Expand Down Expand Up @@ -451,9 +463,98 @@ def _restore_containers(
def _restore_components(
self, zipf: zipfile.ZipFile, component_files: List[str], learning_package: LearningPackage
) -> None:
"""Restore components from the zip archive."""
for component_file in component_files:
self._load_component(zipf, component_file, learning_package)
"""
Restore components and their versions from the zip archive.
This method validates all components and their versions before persisting any data.
If any validation errors occur, no data is persisted and errors are collected.
"""

validated_components = []
validated_drafts = []
validated_published = []

for file in component_files:
if not file.endswith(".toml"):
# Skip non-TOML files
continue
Comment on lines +477 to +479
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do non-TOML files end up here in the first place?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The non-TOML files are static files. Here’s an example list

file entities/xblock.v1/drag-and-drop-v2/4d1b2fac-8b30-42fb-872d-6b10ab580b27/component_versions/v2/block.xml
file entities/xblock.v1/html/e32d5479-9492-41f6-9222-550a7346bc37/component_versions/v5/static/me.png
file entities/xblock.v1/html/e32d5479-9492-41f6-9222-550a7346bc37/component_versions/v5/block.xml
file entities/xblock.v1/html/e32d5479-9492-41f6-9222-550a7346bc37/component_versions/v4/block.xml
file entities/xblock.v1/html/e32d5479-9492-41f6-9222-550a7346bc37/component_versions/v4/static/me.png
file entities/xblock.v1/openassessment/1ee38208-a585-4455-a27e-4930aa541f53/component_versions/v2/block.xml
file entities/xblock.v1/problem/256739e8-c2df-4ced-bd10-8156f6cfa90b/component_versions/v2/block.xml
file entities/xblock.v1/survey/6681da3f-b056-4c6e-a8f9-040967907471/component_versions/v1/block.xml
file entities/xblock.v1/video/22601ebd-9da8-430b-9778-cfe059a98568/component_versions/v3/block.xml

This has not been implemented yet, but it will be included in the next steps


# Load component data from the TOML file
component_data, draft_version, published_version = self._load_component_data(zipf, file)

# Validate component data
component_serializer = ComponentSerializer(data={
"created": self.utc_now,
"created_by": None,
**component_data,
})
if not component_serializer.is_valid():
# Collect errors and continue
self.errors.append({"file": file, "errors": component_serializer.errors})
continue
# Collect component validated data
validated_components.append(component_serializer.validated_data)

# Load and validate versions
valid_versions = self._validate_versions(
component_serializer.validated_data,
draft_version,
published_version
)
if valid_versions["draft"]:
validated_drafts.append(valid_versions["draft"])
if valid_versions["published"]:
validated_published.append(valid_versions["published"])

if self.errors:
return

# Persist all validated components and their versions if there are no errors
self._persist_components(learning_package, validated_components, validated_drafts, validated_published)

def _persist_components(
self,
learning_package: LearningPackage,
validated_components: List[dict[str, Any]],
validated_drafts: List[dict[str, Any]],
validated_published: List[dict[str, Any]],
) -> None:
"""
Persist validated components and their versions to the database.

The operation is performed within a bulk draft changes context to save
only one transaction on Draft Change Log.
"""
components_by_key = {} # Map entity_key to Component instance
# Step 1:
# Create components and their publishable entities
# Create all published versions as a draft first
# Publish all drafts
with publishing_api.bulk_draft_changes_for(learning_package.id):
for valid_component in validated_components:
entity_key = valid_component.pop("key")
component = components_api.create_component(
learning_package.id,
**valid_component,
)
components_by_key[entity_key] = component

for valid_draft in validated_published:
entity_key = valid_draft.pop("entity_key")
components_api.create_next_component_version(
components_by_key[entity_key].publishable_entity.id,
**valid_draft
)

publishing_api.publish_all_drafts(learning_package.id)

# Step 2: Create all draft versions
with publishing_api.bulk_draft_changes_for(learning_package.id):
for valid_draft in validated_drafts:
entity_key = valid_draft.pop("entity_key")
components_api.create_next_component_version(
components_by_key[entity_key].publishable_entity.id,
**valid_draft
)

def _restore_collections(
self, zipf: zipfile.ZipFile, collection_files: List[str], learning_package: LearningPackage
Expand Down Expand Up @@ -488,12 +589,12 @@ def _load_container(
)
"""

def _load_component(
self, zipf: zipfile.ZipFile, component_file: str, learning_package: LearningPackage
): # pylint: disable=W0613
"""Load and persist a component (placeholder)."""
# TODO: implement actual parsing
return None
def _load_component_data(self, zipf, component_file):
"""Load component data and its versions from a TOML file."""
content = self._read_file_from_zip(zipf, component_file)
component_data, component_version_data = parse_publishable_entity_toml(content)
draft_version, published_version = self._get_versions_to_write(component_version_data, component_data)
return component_data, draft_version, published_version

# --------------------------
# Utilities
Expand Down Expand Up @@ -529,3 +630,41 @@ def _get_organized_file_list(self, file_paths: List[str]) -> dict[str, Any]:
organized["collections"].append(path)

return organized

def _get_versions_to_write(
self,
component_version_data: List[dict[str, Any]],
component_data: dict[str, Any]
) -> Tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
"""Return the draft and published versions to write, based on component data."""

draft_version_num = component_data.get("draft", {}).get("version_num")
published_version_num = component_data.get("published", {}).get("version_num")

# Build lookup by version_num
version_lookup = {v.get("version_num"): v for v in component_version_data}

return (
version_lookup.get(draft_version_num) if draft_version_num else None,
version_lookup.get(published_version_num) if published_version_num else None,
)

def _validate_versions(self, component_validated_data, draft_version, published_version):
""" Validate draft and published versions using ComponentVersionSerializer."""
valid_versions = {"draft": None, "published": None}
for label, version in [("draft", draft_version), ("published", published_version)]:
if version is None:
continue
entity_key = component_validated_data["key"]
version_data = {
"entity_key": entity_key,
"content_to_replace": {},
"created": self.utc_now,
"created_by": None,
**version,
}
serializer = ComponentVersionSerializer(data=version_data)
if not serializer.is_valid():
self.errors.append(f"Errors in {label} version for {entity_key}: {serializer.errors}")
valid_versions[label] = serializer.validated_data
return valid_versions
22 changes: 22 additions & 0 deletions openedx_learning/apps/authoring/components/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
# to be callable only by other apps in the authoring package.
__all__ = [
"get_or_create_component_type",
"get_or_create_component_type_by_entity_key",
"create_component",
"create_component_version",
"create_next_component_version",
Expand Down Expand Up @@ -73,6 +74,27 @@ def get_or_create_component_type(namespace: str, name: str) -> ComponentType:
return component_type


def get_or_create_component_type_by_entity_key(entity_key: str) -> tuple[ComponentType, str]:
"""
Get or create a ComponentType based on a full entity key string.

The entity key is expected to be in the format
``"{namespace}:{type_name}:{local_key}"``. This function will parse out the
``namespace`` and ``type_name`` parts and use those to get or create the
ComponentType.

Raises ValueError if the entity_key is not in the expected format.
"""
try:
namespace, type_name, local_key = entity_key.split(':', 2)
except ValueError as exc:
raise ValueError(
f"Invalid entity_key format: {entity_key!r}. "
"Expected format: '{namespace}:{type_name}:{local_key}'"
) from exc
return get_or_create_component_type(namespace, type_name), local_key
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please write a test for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applied. Thanks



def create_component(
learning_package_id: int,
/,
Expand Down
35 changes: 35 additions & 0 deletions tests/openedx_learning/apps/authoring/components/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,3 +605,38 @@ def setUpTestData(cls) -> None:
username="user",
email="user@example.com",
)


class TestComponentTypeUtils(TestCase):
"""
Test the component type utility functions.
"""

def test_get_or_create_component_type_by_entity_key_creates_new(self):
comp_type, local_key = components_api.get_or_create_component_type_by_entity_key(
"video:youtube:abcd1234"
)

assert isinstance(comp_type, ComponentType)
assert comp_type.namespace == "video"
assert comp_type.name == "youtube"
assert local_key == "abcd1234"
assert ComponentType.objects.count() == 1

def test_get_or_create_component_type_by_entity_key_existing(self):
ComponentType.objects.create(namespace="video", name="youtube")

comp_type, local_key = components_api.get_or_create_component_type_by_entity_key(
"video:youtube:efgh5678"
)

assert comp_type.namespace == "video"
assert comp_type.name == "youtube"
assert local_key == "efgh5678"
assert ComponentType.objects.count() == 1

def test_get_or_create_component_type_by_entity_key_invalid_format(self):
with self.assertRaises(ValueError) as ctx:
components_api.get_or_create_component_type_by_entity_key("not-enough-parts")

self.assertIn("Invalid entity_key format", str(ctx.exception))