diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py
index e1fb19e511..277d27d540 100644
--- a/learning_resources/etl/canvas.py
+++ b/learning_resources/etl/canvas.py
@@ -85,7 +85,7 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
"""
checksum = calc_checksum(course_archive_path)
course_info = parse_canvas_settings(course_archive_path)
- course_title = course_info.get("title")
+ course_title = course_info.get("title", f"canvas course {course_folder}")
url = canvas_course_url(course_archive_path)
start_at = course_info.get("start_at")
end_at = course_info.get("conclude_at")
diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py
index 0fb111e068..710f80c853 100644
--- a/learning_resources/etl/canvas_test.py
+++ b/learning_resources/etl/canvas_test.py
@@ -241,9 +241,12 @@ def make_canvas_zip(
files = files or []
zip_path = tmp_path / "canvas_course.zip"
with zipfile.ZipFile(zip_path, "w") as zf:
- zf.writestr("course_settings/course_settings.xml", settings_xml)
- zf.writestr("course_settings/module_meta.xml", module_xml)
- zf.writestr("imsmanifest.xml", manifest_xml)
+ if settings_xml:
+ zf.writestr("course_settings/course_settings.xml", settings_xml)
+ if module_xml:
+ zf.writestr("course_settings/module_meta.xml", module_xml)
+ if manifest_xml:
+ zf.writestr("imsmanifest.xml", manifest_xml)
for filename, content in files:
zf.writestr(filename, content)
return zip_path
@@ -1737,3 +1740,67 @@ def test_get_published_items_for_attachment_module(mocker, tmp_path):
}
published = get_published_items(zip_path, url_config)
assert Path("web_resources/visible_attachment_module.txt").resolve() in published
+
+
+def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
+ """
+ Test that canvas course ingestion succeeds even if some config XML files are missing
+ """
+ mocker.patch(
+ "learning_resources.etl.canvas_utils.parse_context_xml",
+ return_value={"course_id": "123", "canvas_domain": "mit.edu"},
+ )
+ manifest_xml = b"""
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+ files_xml = b"""
+
+
+
+ uncategorized
+
+
+ uncategorized
+
+
+ uncategorized
+
+
+
+ """
+ zip_path = make_canvas_zip(
+ tmp_path,
+ manifest_xml=manifest_xml,
+ files=[
+ ("course_settings/files_meta.xml", files_xml),
+ ("web_resources/file1.pdf", "content of file1"),
+ ("web_resources/file2.html", "content of file2"),
+ ("web_resources/html_page.html", ""),
+ ],
+ )
+ mocker.patch(
+ "learning_resources.etl.utils.extract_text_metadata",
+ return_value={"content": "test"},
+ )
+ _, run = run_for_canvas_archive(zip_path, tmp_path, overwrite=True)
+ content_results = list(
+ transform_canvas_content_files(
+ Path(zip_path), run, url_config={}, overwrite=True
+ )
+ )
+ assert run is not None
+ assert len(content_results) > 0
diff --git a/learning_resources/etl/canvas_utils.py b/learning_resources/etl/canvas_utils.py
index e915bf4d0a..516f82214d 100644
--- a/learning_resources/etl/canvas_utils.py
+++ b/learning_resources/etl/canvas_utils.py
@@ -114,7 +114,10 @@ def parse_module_meta(course_archive_path: str) -> dict:
"""
Parse module_meta.xml and return publish/active status of resources.
"""
+
with zipfile.ZipFile(course_archive_path, "r") as course_archive:
+ if "course_settings/module_meta.xml" not in course_archive.namelist():
+ return {"active": [], "unpublished": []}
module_xml = course_archive.read("course_settings/module_meta.xml")
manifest_xml = course_archive.read("imsmanifest.xml")
resource_map = extract_resources_by_identifierref(manifest_xml)
@@ -412,6 +415,8 @@ def parse_context_xml(course_archive_path: str) -> dict:
Parse course_settings/context.xml and return context info
"""
with zipfile.ZipFile(course_archive_path, "r") as course_archive:
+ if "course_settings/context.xml" not in course_archive.namelist():
+ return {}
context = course_archive.read("course_settings/context.xml")
root = ElementTree.fromstring(context)
context_info = {}