From e7438c576014281a757143e3ea43cdab9d6bcec6 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Thu, 18 Sep 2025 15:11:45 -0400 Subject: [PATCH 1/6] Fix flaky test (#2519) --- learning_resources/tasks.py | 10 +++++++--- learning_resources/tasks_test.py | 11 +++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py index c897ab7086..7e937b4be8 100644 --- a/learning_resources/tasks.py +++ b/learning_resources/tasks.py @@ -207,9 +207,13 @@ def get_content_tasks( # noqa: PLR0913 ) if learning_resource_ids: - learning_resources = LearningResource.objects.filter( - id__in=learning_resource_ids, etl_source=etl_source - ).values_list("id", flat=True) + learning_resources = ( + LearningResource.objects.filter( + id__in=learning_resource_ids, etl_source=etl_source + ) + .order_by("-id") + .values_list("id", flat=True) + ) else: learning_resources = ( LearningResource.objects.filter(Q(published=True) | Q(test_mode=True)) diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index e489138e7b..143b5ecfd2 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -238,10 +238,13 @@ def test_get_content_tasks( 3, etl_source=etl_source, platform=platform ) if with_learning_resource_ids: - learning_resource_ids = [ - courses[0].learning_resource_id, - courses[1].learning_resource_id, - ] + learning_resource_ids = sorted( + [ + courses[0].learning_resource_id, + courses[1].learning_resource_id, + ], + reverse=True, + ) else: learning_resource_ids = None s3_prefix = "course-prefix" From 4891d1c69066fd0a67bb1064868f1ac44377290a Mon Sep 17 00:00:00 2001 From: Dan Subak Date: Fri, 19 Sep 2025 08:47:59 -0400 Subject: [PATCH 2/6] Add browser header for podcast extraction (#2514) * Add browser header for podcast extraction * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * User agent line too long * Line too long * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- learning_resources/etl/podcast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/learning_resources/etl/podcast.py b/learning_resources/etl/podcast.py index 0b07b84a7b..c1256003ca 100644 --- a/learning_resources/etl/podcast.py +++ b/learning_resources/etl/podcast.py @@ -19,6 +19,11 @@ CONFIG_FILE_REPO = "mitodl/open-podcast-data" CONFIG_FILE_FOLDER = "podcasts" TIMESTAMP_FORMAT = "%a, %d %b %Y %H:%M:%S %z" +BROWSER_UA_HEADERS = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/39.0.2171.95 Safari/537.36" +} log = logging.getLogger() @@ -128,7 +133,7 @@ def extract(): for playlist_config in configs: rss_url = playlist_config["rss_url"] try: - response = requests.get(rss_url) # noqa: S113 + response = requests.get(rss_url, headers=BROWSER_UA_HEADERS) # noqa: S113 response.raise_for_status() feed = bs(response.content, "xml") From ca21c895dfac829c8d8a78ccf10dfab2874c3a64 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 19 Sep 2025 08:48:37 -0400 Subject: [PATCH 3/6] chore(deps): update redis docker tag to v8 (#2397) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- docker-compose.services.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4340078b60..3545a83571 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - 5432:5432 redis: - image: redis:7.4.5 + image: redis:8.2.1 ports: - 6379:6379 diff --git a/docker-compose.services.yml b/docker-compose.services.yml index 18cb9e7016..30e3b24d02 100644 --- a/docker-compose.services.yml +++ b/docker-compose.services.yml @@ -25,7 +25,7 @@ services: redis: profiles: - backend - image: redis:7.4.5 + image: redis:8.2.1 healthcheck: test: ["CMD", "redis-cli", "ping", "|", "grep", "PONG"] interval: 3s From 7f72a8d45454b20303f15c479414181a2f600504 Mon Sep 17 00:00:00 2001 From: Shankar Ambady Date: Fri, 19 Sep 2025 12:44:55 -0400 Subject: [PATCH 4/6] canvas: citation urls for html content (#2521) * updates to support urls for non-file items * adding canvas type * working urls for assignments and pages * adding title to contentfile and test for urls * update test * titles for embedded pages --- learning_resources/etl/canvas.py | 73 +++++++++++++------ learning_resources/etl/canvas_test.py | 100 +++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 22 deletions(-) diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py index dae0615681..e8b5049204 100644 --- a/learning_resources/etl/canvas.py +++ b/learning_resources/etl/canvas.py @@ -110,10 +110,14 @@ def _get_url_config(bucket, export_tempdir: str, url_config_file: str) -> dict: bucket.download_file(url_config_file, url_config_path) url_config = {} with Path.open(url_config_path, "rb") as f: - for url_item in json.loads(f.read().decode("utf-8")).get("course_files", []): + url_json = json.loads(f.read().decode("utf-8")) + for url_item in url_json.get("course_files", []): url_key = url_item["file_path"] url_key = unquote_plus(url_key.lstrip(url_key.split("/")[0])) url_config[url_key] = url_item["url"] + for url_item in url_json.get("assignments", []) + url_json.get("pages", []): + url_key = url_item.get("name", url_item.get("title")) + url_config[url_key] = url_item.get("html_url") return url_config @@ -199,22 +203,23 @@ def transform_canvas_content_files( """ basedir = course_zipfile.name.split(".")[0] zipfile_path = course_zipfile.absolute() - # grab published module and file items - published_items = [ - Path(item["path"]).resolve() - for item in parse_module_meta(zipfile_path)["active"] - ] + [ - Path(item["path"]).resolve() - for item in parse_files_meta(zipfile_path)["active"] - ] - for item in parse_web_content(zipfile_path)["active"]: - published_items.append(Path(item["path"]).resolve()) - published_items.extend( - [ - Path(embedded_file).resolve() - for embedded_file in item.get("embedded_files", []) - ] - ) + all_published_items = ( + parse_module_meta(zipfile_path)["active"] + + parse_files_meta(zipfile_path)["active"] + + parse_web_content(zipfile_path)["active"] + ) + published_items = {} + for item in all_published_items: + path = Path(item["path"]).resolve() + published_items[path] = item + for embedded_file in item.get("embedded_files", []): + embedded_path = Path(embedded_file).resolve() + if embedded_path in all_published_items: + continue + published_items[embedded_path] = { + "path": embedded_path, + "title": "", + } def _generate_content(): """Inner generator for yielding content data""" @@ -223,7 +228,8 @@ def _generate_content(): zipfile.ZipFile(zipfile_path, "r") as course_archive, ): for member in course_archive.infolist(): - if Path(member.filename).resolve() in published_items: + member_path = Path(member.filename).resolve() + if member_path in published_items: course_archive.extract(member, path=olx_path) log.debug("processing active file %s", member.filename) else: @@ -233,7 +239,14 @@ def _generate_content(): url_path = content_data["source_path"].lstrip( content_data["source_path"].split("/")[0] ) - content_url = url_config.get(url_path, "") + item_meta = published_items.get( + Path(content_data["source_path"]).resolve(), {} + ) + + content_url = url_config.get(url_path) or url_config.get( + item_meta.get("title") + ) + content_data["content_title"] = item_meta.get("title") if content_url: content_data["url"] = content_url yield content_data @@ -561,6 +574,19 @@ def _title_from_html(html: str) -> str: return title.get_text().strip() if title else "" +def _title_from_assignment_settings(xml_string: str) -> str: + """ + Extract the title from assignment_settings.xml + """ + try: + root = ElementTree.fromstring(xml_string) + except Exception: + log.exception("Error parsing XML: %s", sys.stderr) + return "" + title_elem = root.find("cccv1p0:title", NAMESPACES) + return title_elem.text.strip() if title_elem is not None and title_elem.text else "" + + def parse_web_content(course_archive_path: str) -> dict: """ Parse html pages and assignments and return publish/active status of resources @@ -588,9 +614,12 @@ def parse_web_content(course_archive_path: str) -> dict: if assignment_settings: xml_content = course_archive.read(assignment_settings) workflow_state = _workflow_state_from_xml(xml_content) + title = _title_from_assignment_settings(xml_content) + canvas_type = "assignment" else: workflow_state = _workflow_state_from_html(html_content) - title = _title_from_html(html_content) + title = _title_from_html(html_content) + canvas_type = "page" lom_elem = ( resource_map_item.get("metadata", {}) @@ -606,6 +635,7 @@ def parse_web_content(course_archive_path: str) -> dict: { "title": title, "path": file_path, + "canvas_type": canvas_type, "embedded_files": embedded_files, } ) @@ -614,6 +644,7 @@ def parse_web_content(course_archive_path: str) -> dict: { "title": title, "path": file_path, + "canvas_type": canvas_type, "embedded_files": embedded_files, } ) @@ -635,7 +666,7 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict: title = ( item.find("imscp:title", NAMESPACES).text if item.find("imscp:title", NAMESPACES) is not None - else "No Title" + else "" ) resource = root.find( f'.//imscp:resource[@identifier="{identifierref}"]', NAMESPACES diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py index c0d10420d5..2c1c244022 100644 --- a/learning_resources/etl/canvas_test.py +++ b/learning_resources/etl/canvas_test.py @@ -195,7 +195,7 @@ def test_parse_module_meta_returns_active_and_unpublished(tmp_path): """ module_xml = b""" - + Module 1 @@ -981,3 +981,101 @@ def test_embedded_files_from_html(tmp_path, mocker): ) assert any(file["source_path"] == "web_resources/html_page.html" for file in files) + + +def test_get_url_config_assignments_and_pages(mocker, tmp_path): + """ + Test that _get_url_config correctly maps assignments and pages to URLs using their titles. + """ + hmtl_page_title = "html page" + + url_config = { + hmtl_page_title: "https://example.com/htmlpage", + "/file1.html": "https://example.com/file1", + } + + run = LearningResourceRunFactory.create() + + html_content = f""" + + {hmtl_page_title} + + + + + """ + module_xml = b""" + + + Module 1 + + + active + Item 1 + false + false + RES1 + resource + + + + + """ + manifest_xml = b""" + + + + + + + + + + + + + + + + module 1 + + + + + + """ + zip_path = make_canvas_zip_with_module_meta(tmp_path, module_xml, manifest_xml) + with zipfile.ZipFile(zip_path, "a") as zf: + zf.writestr("web_resources/file1.html", "content of file1") + zf.writestr("web_resources/file2.html", "content of file2") + zf.writestr("web_resources/html_page.html", html_content) + mocker.patch( + "learning_resources.etl.utils.extract_text_metadata", + return_value={"content": "test"}, + ) + mocker.patch("learning_resources.etl.canvas.bulk_resources_unpublished_actions") + results = list( + transform_canvas_content_files( + Path(zip_path), + run=run, + url_config=url_config, + overwrite=False, + ) + ) + results = list(results) + list( + zip( + [result["content_title"] for result in results], + [result["url"] for result in results], + ) + ) + results = dict( + zip( + [result["content_title"] for result in results], + [result["url"] for result in results], + ) + ) + assert results["html page"] == "https://example.com/htmlpage" + assert results["Item 1"] == "https://example.com/file1" From c9fa6b2856dac3096aecd1b01c198e9ef92ded48 Mon Sep 17 00:00:00 2001 From: Carey P Gumaer Date: Fri, 19 Sep 2025 14:52:43 -0400 Subject: [PATCH 5/6] fix non-lexicographical ordering in org dashboard programs / program collections (#2523) * Sort program / program collection courses by the order of id's on the program, because the API does not respect this order * remove unnecessary spread * remove unnecessary expansion --- .../OrganizationContent.test.tsx | 119 +++++++++++++----- .../DashboardPage/OrganizationContent.tsx | 117 +++++++---------- 2 files changed, 135 insertions(+), 101 deletions(-) diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx index d41a6aff88..4ab71c50db 100644 --- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx +++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx @@ -66,6 +66,31 @@ describe("OrganizationContent", () => { }) }) + it("displays courses in the correct order based on program.courseIds, regardless of API response order", async () => { + const { orgX, programA, coursesA } = setupProgramsAndCourses() + + // Mock API to return courses in reverse order from program.courseIds + const reversedCoursesA = [...coursesA].reverse() + setMockResponse.get( + expect.stringContaining( + `/api/v2/courses/?id=${programA.courses.join("%2C")}`, + ), + { results: reversedCoursesA }, + ) + + renderWithProviders() + + const programElement = await screen.findByTestId("org-program-root") + const cards = await within(programElement).findAllByTestId( + "enrollment-card-desktop", + ) + + // Verify courses appear in program.courseIds order, not API response order + coursesA.forEach((course, i) => { + expect(cards[i]).toHaveTextContent(course.title) + }) + }) + test("Shows correct enrollment status", async () => { const { orgX, programA, coursesA } = setupProgramsAndCourses() const enrollments = [ @@ -132,19 +157,16 @@ describe("OrganizationContent", () => { { results: [programB] }, ) - // Mock the courses API calls for programs in the collection - // Use dynamic matching since course IDs are randomly generated - setMockResponse.get( - expect.stringContaining( - `/api/v2/courses/?id=${programA.courses.join("%2C")}`, - ), - { results: coursesA }, - ) + // Mock the bulk course API call with first course from each program + const firstCourseA = coursesA.find((c) => c.id === programA.courses[0]) + const firstCourseB = coursesB.find((c) => c.id === programB.courses[0]) + const firstCourseIds = [programB.courses[0], programA.courses[0]] // B first, then A to match collection order + setMockResponse.get( expect.stringContaining( - `/api/v2/courses/?id=${programB.courses.join("%2C")}`, + `/api/v2/courses/?id=${firstCourseIds.join("%2C")}`, ), - { results: coursesB }, + { results: [firstCourseB, firstCourseA] }, // Response order should match request order ) renderWithProviders() @@ -168,8 +190,47 @@ describe("OrganizationContent", () => { // Verify the order matches the programCollection.programs array [programB.id, programA.id] const programCards = collection.getAllByTestId("enrollment-card-desktop") - expect(programCards[0]).toHaveTextContent(coursesB[0].title) - expect(programCards[1]).toHaveTextContent(coursesA[0].title) + expect(programCards[0]).toHaveTextContent(firstCourseB!.title) + expect(programCards[1]).toHaveTextContent(firstCourseA!.title) + }) + + test("Program collection displays the first course from each program", async () => { + const { orgX, programA, programCollection, coursesA } = + setupProgramsAndCourses() + + programCollection.programs = [programA.id] + setMockResponse.get(urls.programCollections.programCollectionsList(), { + results: [programCollection], + }) + + setMockResponse.get( + expect.stringContaining(`/api/v2/programs/?id=${programA.id}`), + { results: [programA] }, + ) + + // Mock bulk API call for the first course + const firstCourseId = programA.courses[0] + const firstCourse = coursesA.find((c) => c.id === firstCourseId) + setMockResponse.get( + expect.stringContaining(`/api/v2/courses/?id=${firstCourseId}`), + { results: [firstCourse] }, + ) + + renderWithProviders() + + const collection = await screen.findByTestId("org-program-collection-root") + + // Wait for program cards to be rendered + const programCards = await waitFor(() => { + const programCards = within(collection).getAllByTestId( + "enrollment-card-desktop", + ) + expect(programCards.length).toBeGreaterThan(0) + return programCards + }) + + // Should display the first course by program.courseIds order + expect(programCards[0]).toHaveTextContent(firstCourse!.title) }) test("Does not render a program separately if it is part of a collection", async () => { @@ -265,34 +326,32 @@ describe("OrganizationContent", () => { const { orgX, programA, programB, programCollection, coursesB } = setupProgramsAndCourses() + // Modify programA to have no courses to test "at least one program has courses" + const programANoCourses = { ...programA, courses: [] } + // Set up the collection to include both programs - programCollection.programs = [programA.id, programB.id] + programCollection.programs = [programANoCourses.id, programB.id] setMockResponse.get(urls.programCollections.programCollectionsList(), { results: [programCollection], }) // Mock individual program API calls for the collection setMockResponse.get( - expect.stringContaining(`/api/v2/programs/?id=${programA.id}`), - { results: [programA] }, + expect.stringContaining(`/api/v2/programs/?id=${programANoCourses.id}`), + { results: [programANoCourses] }, ) setMockResponse.get( expect.stringContaining(`/api/v2/programs/?id=${programB.id}`), { results: [programB] }, ) - // Mock programA to have no courses, programB to have courses - setMockResponse.get( - expect.stringContaining( - `/api/v2/courses/?id=${programA.courses.join("%2C")}`, - ), - { results: [] }, - ) + // Mock bulk course API call - only programB has courses, so only its first course should be included + const firstCourseBId = programB.courses[0] + const firstCourseB = coursesB.find((c) => c.id === firstCourseBId) + setMockResponse.get( - expect.stringContaining( - `/api/v2/courses/?id=${programB.courses.join("%2C")}`, - ), - { results: coursesB }, + expect.stringContaining(`/api/v2/courses/?id=${firstCourseBId}`), + { results: [firstCourseB] }, ) renderWithProviders() @@ -307,11 +366,11 @@ describe("OrganizationContent", () => { // Should see the collection header expect(collection.getByText(programCollection.title)).toBeInTheDocument() - // Should see programB's courses + // Should see programB's course await waitFor(() => { - expect(collection.getAllByText(coursesB[0].title).length).toBeGreaterThan( - 0, - ) + expect( + collection.getAllByText(firstCourseB!.title).length, + ).toBeGreaterThan(0) }) }) diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx index 2612383a9f..ac9950533e 100644 --- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx +++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx @@ -131,13 +131,9 @@ const ProgramDescription = styled(Typography)({ // Custom hook to handle multiple program queries and check if any have courses const useProgramCollectionCourses = (programIds: number[], orgId: number) => { const programQueries = useQueries({ - queries: programIds.map((programId) => ({ - ...programsQueries.programsList({ id: programId, org_id: orgId }), - queryKey: [ - ...programsQueries.programsList({ id: programId, org_id: orgId }) - .queryKey, - ], - })), + queries: programIds.map((programId) => + programsQueries.programsList({ id: programId, org_id: orgId }), + ), }) const isLoading = programQueries.some((query) => query.isLoading) @@ -175,6 +171,25 @@ const OrgProgramCollectionDisplay: React.FC<{ const sanitizedDescription = DOMPurify.sanitize(collection.description ?? "") const { isLoading, programsWithCourses, hasAnyCourses } = useProgramCollectionCourses(collection.programIds, orgId) + const firstCourseIds = programsWithCourses + .map((p) => p?.program.courseIds[0]) + .filter((id): id is number => id !== undefined) + const courses = useQuery({ + ...coursesQueries.coursesList({ + id: firstCourseIds, + org_id: orgId, + }), + enabled: firstCourseIds.length > 0, + }) + const rawCourses = + courses.data?.results.sort((a, b) => { + return firstCourseIds.indexOf(a.id) - firstCourseIds.indexOf(b.id) + }) ?? [] + const transformedCourses = transform.organizationCoursesWithContracts({ + courses: rawCourses, + contracts: contracts ?? [], + enrollments: enrollments ?? [], + }) const header = ( @@ -214,17 +229,26 @@ const OrgProgramCollectionDisplay: React.FC<{ {header} - {programsWithCourses.map((item) => - item ? ( - ( + - ) : null, - )} + ))} + {transformedCourses.map((course) => ( + + ))} ) @@ -260,8 +284,12 @@ const OrgProgramDisplay: React.FC<{ ) if (programLoading || courses.isLoading) return skeleton + const rawCourses = + courses.data?.results.sort((a, b) => { + return program.courseIds.indexOf(a.id) - program.courseIds.indexOf(b.id) + }) ?? [] const transformedCourses = transform.organizationCoursesWithContracts({ - courses: courses.data?.results ?? [], + courses: rawCourses, contracts: contracts ?? [], enrollments: courseRunEnrollments ?? [], }) @@ -307,59 +335,6 @@ const OrgProgramDisplay: React.FC<{ ) } -const ProgramCollectionItem: React.FC<{ - program: DashboardProgram - contracts?: ContractPage[] - enrollments?: CourseRunEnrollment[] - orgId: number -}> = ({ program, contracts, enrollments, orgId }) => { - return ( - - ) -} - -const ProgramCard: React.FC<{ - program: DashboardProgram - contracts?: ContractPage[] - enrollments?: CourseRunEnrollment[] - orgId: number -}> = ({ program, contracts, enrollments, orgId }) => { - const courses = useQuery( - coursesQueries.coursesList({ - id: program.courseIds, - org_id: orgId, - }), - ) - const skeleton = ( - - ) - if (courses.isLoading) return skeleton - const transformedCourses = transform.organizationCoursesWithContracts({ - courses: courses.data?.results ?? [], - contracts: contracts ?? [], - enrollments: enrollments ?? [], - }) - if (courses.isLoading || !transformedCourses.length) return skeleton - // For now we assume the first course is the main one for the program. - const course = transformedCourses[0] - return ( - - ) -} - const OrganizationRoot = styled.div({ display: "flex", flexDirection: "column", From 0f901eb20c8b70420c1aa89a4074851aa250afe9 Mon Sep 17 00:00:00 2001 From: Doof Date: Mon, 22 Sep 2025 13:54:22 +0000 Subject: [PATCH 6/6] Release 0.44.0 --- RELEASE.rst | 9 +++++++++ main/settings.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index 15d167b6c8..09b3914db9 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,15 @@ Release Notes ============= +Version 0.44.0 +-------------- + +- fix non-lexicographical ordering in org dashboard programs / program collections (#2523) +- canvas: citation urls for html content (#2521) +- chore(deps): update redis docker tag to v8 (#2397) +- Add browser header for podcast extraction (#2514) +- Fix flaky test (#2519) + Version 0.43.1 (Released September 18, 2025) -------------- diff --git a/main/settings.py b/main/settings.py index 63cb4f5585..fd8b8850b2 100644 --- a/main/settings.py +++ b/main/settings.py @@ -34,7 +34,7 @@ from main.settings_pluggy import * # noqa: F403 from openapi.settings_spectacular import open_spectacular_settings -VERSION = "0.43.1" +VERSION = "0.44.0" log = logging.getLogger()