mitodl · shanbady · Nov 17, 2025 · Nov 14, 2025
diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py
@@ -85,7 +85,7 @@ def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
     """
     checksum = calc_checksum(course_archive_path)
     course_info = parse_canvas_settings(course_archive_path)
-    course_title = course_info.get("title")
+    course_title = course_info.get("title", f"canvas course {course_folder}")
     url = canvas_course_url(course_archive_path)
     start_at = course_info.get("start_at")
     end_at = course_info.get("conclude_at")

diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py
@@ -241,9 +241,12 @@ def make_canvas_zip(
     files = files or []
     zip_path = tmp_path / "canvas_course.zip"
     with zipfile.ZipFile(zip_path, "w") as zf:
-        zf.writestr("course_settings/course_settings.xml", settings_xml)
-        zf.writestr("course_settings/module_meta.xml", module_xml)
-        zf.writestr("imsmanifest.xml", manifest_xml)
+        if settings_xml:
+            zf.writestr("course_settings/course_settings.xml", settings_xml)
+        if module_xml:
+            zf.writestr("course_settings/module_meta.xml", module_xml)
+        if manifest_xml:
+            zf.writestr("imsmanifest.xml", manifest_xml)
         for filename, content in files:
             zf.writestr(filename, content)
     return zip_path
@@ -1737,3 +1740,67 @@ def test_get_published_items_for_attachment_module(mocker, tmp_path):
     }
     published = get_published_items(zip_path, url_config)
     assert Path("web_resources/visible_attachment_module.txt").resolve() in published
+
+
+def test_ingestion_finishes_with_missing_xml_files(tmp_path, mocker):
+    """
+    Test that canvas course ingestion succeeds even if some config XML files are missing
+    """
+    mocker.patch(
+        "learning_resources.etl.canvas_utils.parse_context_xml",
+        return_value={"course_id": "123", "canvas_domain": "mit.edu"},
+    )
+    manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
+      <resources>
+        <resource identifier="RES1" type="webcontent"  href="web_resources/file1.pdf">
+          <file href="web_resources/file1.pdf"/>
+        </resource>
+        <resource identifier="RES2" type="webcontent" href="web_resources/file2.html">
+          <file href="web_resources/file2.html"/>
+        </resource>
+        <resource identifier="RES3" type="webcontent" href="web_resources/html_page.html">
+          <file href="web_resources/html_page.html"/>
+        </resource>
+      </resources>
+    </manifest>
+    """
+    files_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+        <fileMeta xmlns="http://canvas.instructure.com/xsd/cccv1p0"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://canvas.instructure.com/xsd/cccv1p0 https://canvas.instructure.com/xsd/cccv1p0.xsd">
+        <files>
+        <file identifier="RES1">
+          <category>uncategorized</category>
+        </file>
+        <file identifier="RES2">
+          <category>uncategorized</category>
+        </file>
+        <file identifier="RES3">
+          <category>uncategorized</category>
+        </file>
+        </files>
+        </fileMeta>
+    """
+    zip_path = make_canvas_zip(
+        tmp_path,
+        manifest_xml=manifest_xml,
+        files=[
+            ("course_settings/files_meta.xml", files_xml),
+            ("web_resources/file1.pdf", "content of file1"),
+            ("web_resources/file2.html", "content of file2"),
+            ("web_resources/html_page.html", ""),
+        ],
+    )
+    mocker.patch(
+        "learning_resources.etl.utils.extract_text_metadata",
+        return_value={"content": "test"},
+    )
+    _, run = run_for_canvas_archive(zip_path, tmp_path, overwrite=True)
+    content_results = list(
+        transform_canvas_content_files(
+            Path(zip_path), run, url_config={}, overwrite=True
+        )
+    )
+    assert run is not None
+    assert len(content_results) > 0
diff --git a/learning_resources/etl/canvas_utils.py b/learning_resources/etl/canvas_utils.py
@@ -114,7 +114,10 @@ def parse_module_meta(course_archive_path: str) -> dict:
     """
     Parse module_meta.xml and return publish/active status of resources.
     """
+
     with zipfile.ZipFile(course_archive_path, "r") as course_archive:
+        if "course_settings/module_meta.xml" not in course_archive.namelist():
+            return {"active": [], "unpublished": []}
         module_xml = course_archive.read("course_settings/module_meta.xml")
         manifest_xml = course_archive.read("imsmanifest.xml")
     resource_map = extract_resources_by_identifierref(manifest_xml)
@@ -412,6 +415,8 @@ def parse_context_xml(course_archive_path: str) -> dict:
     Parse course_settings/context.xml and return context info
     """
     with zipfile.ZipFile(course_archive_path, "r") as course_archive:
+        if "course_settings/context.xml" not in course_archive.namelist():
+            return {}
         context = course_archive.read("course_settings/context.xml")
     root = ElementTree.fromstring(context)
     context_info = {}