Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion learning_resources/etl/canvas.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
"""
checksum = calc_checksum(course_archive_path)
course_info = parse_canvas_settings(course_archive_path)
course_title = course_info.get("title")
course_title = course_info.get("title", f"canvas course {course_folder}")
url = canvas_course_url(course_archive_path)
start_at = course_info.get("start_at")
end_at = course_info.get("conclude_at")
Expand Down
73 changes: 70 additions & 3 deletions learning_resources/etl/canvas_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,12 @@ def make_canvas_zip(
files = files or []
zip_path = tmp_path / "canvas_course.zip"
with zipfile.ZipFile(zip_path, "w") as zf:
zf.writestr("course_settings/course_settings.xml", settings_xml)
zf.writestr("course_settings/module_meta.xml", module_xml)
zf.writestr("imsmanifest.xml", manifest_xml)
if settings_xml:
zf.writestr("course_settings/course_settings.xml", settings_xml)
if module_xml:
zf.writestr("course_settings/module_meta.xml", module_xml)
if manifest_xml:
zf.writestr("imsmanifest.xml", manifest_xml)
for filename, content in files:
zf.writestr(filename, content)
return zip_path
Expand Down Expand Up @@ -1737,3 +1740,67 @@ def test_get_published_items_for_attachment_module(mocker, tmp_path):
}
published = get_published_items(zip_path, url_config)
assert Path("web_resources/visible_attachment_module.txt").resolve() in published


def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
"""
Test that canvas course ingestion succeeds even if some config XML files are missing
"""
mocker.patch(
"learning_resources.etl.canvas_utils.parse_context_xml",
return_value={"course_id": "123", "canvas_domain": "mit.edu"},
)
manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
<resources>
<resource identifier="RES1" type="webcontent" href="web_resources/file1.pdf">
<file href="web_resources/file1.pdf"/>
</resource>
<resource identifier="RES2" type="webcontent" href="web_resources/file2.html">
<file href="web_resources/file2.html"/>
</resource>
<resource identifier="RES3" type="webcontent" href="web_resources/html_page.html">
<file href="web_resources/html_page.html"/>
</resource>
</resources>
</manifest>
"""
files_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
<fileMeta xmlns="http://canvas.instructure.com/xsd/cccv1p0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://canvas.instructure.com/xsd/cccv1p0 https://canvas.instructure.com/xsd/cccv1p0.xsd">
<files>
<file identifier="RES1">
<category>uncategorized</category>
</file>
<file identifier="RES2">
<category>uncategorized</category>
</file>
<file identifier="RES3">
<category>uncategorized</category>
</file>
</files>
</fileMeta>
"""
zip_path = make_canvas_zip(
tmp_path,
manifest_xml=manifest_xml,
files=[
("course_settings/files_meta.xml", files_xml),
("web_resources/file1.pdf", "content of file1"),
("web_resources/file2.html", "content of file2"),
("web_resources/html_page.html", ""),
],
)
mocker.patch(
"learning_resources.etl.utils.extract_text_metadata",
return_value={"content": "test"},
)
_, run = run_for_canvas_archive(zip_path, tmp_path, overwrite=True)
content_results = list(
transform_canvas_content_files(
Path(zip_path), run, url_config={}, overwrite=True
)
)
assert run is not None
assert len(content_results) > 0
5 changes: 5 additions & 0 deletions learning_resources/etl/canvas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def parse_module_meta(course_archive_path: str) -> dict:
"""
Parse module_meta.xml and return publish/active status of resources.
"""

with zipfile.ZipFile(course_archive_path, "r") as course_archive:
if "course_settings/module_meta.xml" not in course_archive.namelist():
return {"active": [], "unpublished": []}
module_xml = course_archive.read("course_settings/module_meta.xml")
manifest_xml = course_archive.read("imsmanifest.xml")
resource_map = extract_resources_by_identifierref(manifest_xml)
Expand Down Expand Up @@ -412,6 +415,8 @@ def parse_context_xml(course_archive_path: str) -> dict:
Parse course_settings/context.xml and return context info
"""
with zipfile.ZipFile(course_archive_path, "r") as course_archive:
if "course_settings/context.xml" not in course_archive.namelist():
return {}
context = course_archive.read("course_settings/context.xml")
root = ElementTree.fromstring(context)
context_info = {}
Expand Down
Loading