diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4340078b60..3545a83571 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ jobs:
- 5432:5432
redis:
- image: redis:7.4.5
+ image: redis:8.2.1
ports:
- 6379:6379
diff --git a/RELEASE.rst b/RELEASE.rst
index 15d167b6c8..09b3914db9 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -1,6 +1,15 @@
Release Notes
=============
+Version 0.44.0
+--------------
+
+- fix non-lexicographical ordering in org dashboard programs / program collections (#2523)
+- canvas: citation urls for html content (#2521)
+- chore(deps): update redis docker tag to v8 (#2397)
+- Add browser header for podcast extraction (#2514)
+- Fix flaky test (#2519)
+
Version 0.43.1 (Released September 18, 2025)
--------------
diff --git a/docker-compose.services.yml b/docker-compose.services.yml
index 18cb9e7016..30e3b24d02 100644
--- a/docker-compose.services.yml
+++ b/docker-compose.services.yml
@@ -25,7 +25,7 @@ services:
redis:
profiles:
- backend
- image: redis:7.4.5
+ image: redis:8.2.1
healthcheck:
test: ["CMD", "redis-cli", "ping", "|", "grep", "PONG"]
interval: 3s
diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx
index d41a6aff88..4ab71c50db 100644
--- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx
+++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.test.tsx
@@ -66,6 +66,31 @@ describe("OrganizationContent", () => {
})
})
+ it("displays courses in the correct order based on program.courseIds, regardless of API response order", async () => {
+ const { orgX, programA, coursesA } = setupProgramsAndCourses()
+
+ // Mock API to return courses in reverse order from program.courseIds
+ const reversedCoursesA = [...coursesA].reverse()
+ setMockResponse.get(
+ expect.stringContaining(
+ `/api/v2/courses/?id=${programA.courses.join("%2C")}`,
+ ),
+ { results: reversedCoursesA },
+ )
+
+ renderWithProviders()
+
+ const programElement = await screen.findByTestId("org-program-root")
+ const cards = await within(programElement).findAllByTestId(
+ "enrollment-card-desktop",
+ )
+
+ // Verify courses appear in program.courseIds order, not API response order
+ coursesA.forEach((course, i) => {
+ expect(cards[i]).toHaveTextContent(course.title)
+ })
+ })
+
test("Shows correct enrollment status", async () => {
const { orgX, programA, coursesA } = setupProgramsAndCourses()
const enrollments = [
@@ -132,19 +157,16 @@ describe("OrganizationContent", () => {
{ results: [programB] },
)
- // Mock the courses API calls for programs in the collection
- // Use dynamic matching since course IDs are randomly generated
- setMockResponse.get(
- expect.stringContaining(
- `/api/v2/courses/?id=${programA.courses.join("%2C")}`,
- ),
- { results: coursesA },
- )
+ // Mock the bulk course API call with first course from each program
+ const firstCourseA = coursesA.find((c) => c.id === programA.courses[0])
+ const firstCourseB = coursesB.find((c) => c.id === programB.courses[0])
+ const firstCourseIds = [programB.courses[0], programA.courses[0]] // B first, then A to match collection order
+
setMockResponse.get(
expect.stringContaining(
- `/api/v2/courses/?id=${programB.courses.join("%2C")}`,
+ `/api/v2/courses/?id=${firstCourseIds.join("%2C")}`,
),
- { results: coursesB },
+ { results: [firstCourseB, firstCourseA] }, // Response order should match request order
)
renderWithProviders()
@@ -168,8 +190,47 @@ describe("OrganizationContent", () => {
// Verify the order matches the programCollection.programs array [programB.id, programA.id]
const programCards = collection.getAllByTestId("enrollment-card-desktop")
- expect(programCards[0]).toHaveTextContent(coursesB[0].title)
- expect(programCards[1]).toHaveTextContent(coursesA[0].title)
+ expect(programCards[0]).toHaveTextContent(firstCourseB!.title)
+ expect(programCards[1]).toHaveTextContent(firstCourseA!.title)
+ })
+
+ test("Program collection displays the first course from each program", async () => {
+ const { orgX, programA, programCollection, coursesA } =
+ setupProgramsAndCourses()
+
+ programCollection.programs = [programA.id]
+ setMockResponse.get(urls.programCollections.programCollectionsList(), {
+ results: [programCollection],
+ })
+
+ setMockResponse.get(
+ expect.stringContaining(`/api/v2/programs/?id=${programA.id}`),
+ { results: [programA] },
+ )
+
+ // Mock bulk API call for the first course
+ const firstCourseId = programA.courses[0]
+ const firstCourse = coursesA.find((c) => c.id === firstCourseId)
+ setMockResponse.get(
+ expect.stringContaining(`/api/v2/courses/?id=${firstCourseId}`),
+ { results: [firstCourse] },
+ )
+
+ renderWithProviders()
+
+ const collection = await screen.findByTestId("org-program-collection-root")
+
+ // Wait for program cards to be rendered
+ const programCards = await waitFor(() => {
+ const programCards = within(collection).getAllByTestId(
+ "enrollment-card-desktop",
+ )
+ expect(programCards.length).toBeGreaterThan(0)
+ return programCards
+ })
+
+ // Should display the first course by program.courseIds order
+ expect(programCards[0]).toHaveTextContent(firstCourse!.title)
})
test("Does not render a program separately if it is part of a collection", async () => {
@@ -265,34 +326,32 @@ describe("OrganizationContent", () => {
const { orgX, programA, programB, programCollection, coursesB } =
setupProgramsAndCourses()
+ // Modify programA to have no courses to test "at least one program has courses"
+ const programANoCourses = { ...programA, courses: [] }
+
// Set up the collection to include both programs
- programCollection.programs = [programA.id, programB.id]
+ programCollection.programs = [programANoCourses.id, programB.id]
setMockResponse.get(urls.programCollections.programCollectionsList(), {
results: [programCollection],
})
// Mock individual program API calls for the collection
setMockResponse.get(
- expect.stringContaining(`/api/v2/programs/?id=${programA.id}`),
- { results: [programA] },
+ expect.stringContaining(`/api/v2/programs/?id=${programANoCourses.id}`),
+ { results: [programANoCourses] },
)
setMockResponse.get(
expect.stringContaining(`/api/v2/programs/?id=${programB.id}`),
{ results: [programB] },
)
- // Mock programA to have no courses, programB to have courses
- setMockResponse.get(
- expect.stringContaining(
- `/api/v2/courses/?id=${programA.courses.join("%2C")}`,
- ),
- { results: [] },
- )
+ // Mock bulk course API call - only programB has courses, so only its first course should be included
+ const firstCourseBId = programB.courses[0]
+ const firstCourseB = coursesB.find((c) => c.id === firstCourseBId)
+
setMockResponse.get(
- expect.stringContaining(
- `/api/v2/courses/?id=${programB.courses.join("%2C")}`,
- ),
- { results: coursesB },
+ expect.stringContaining(`/api/v2/courses/?id=${firstCourseBId}`),
+ { results: [firstCourseB] },
)
renderWithProviders()
@@ -307,11 +366,11 @@ describe("OrganizationContent", () => {
// Should see the collection header
expect(collection.getByText(programCollection.title)).toBeInTheDocument()
- // Should see programB's courses
+ // Should see programB's course
await waitFor(() => {
- expect(collection.getAllByText(coursesB[0].title).length).toBeGreaterThan(
- 0,
- )
+ expect(
+ collection.getAllByText(firstCourseB!.title).length,
+ ).toBeGreaterThan(0)
})
})
diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
index 2612383a9f..ac9950533e 100644
--- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
+++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
@@ -131,13 +131,9 @@ const ProgramDescription = styled(Typography)({
// Custom hook to handle multiple program queries and check if any have courses
const useProgramCollectionCourses = (programIds: number[], orgId: number) => {
const programQueries = useQueries({
- queries: programIds.map((programId) => ({
- ...programsQueries.programsList({ id: programId, org_id: orgId }),
- queryKey: [
- ...programsQueries.programsList({ id: programId, org_id: orgId })
- .queryKey,
- ],
- })),
+ queries: programIds.map((programId) =>
+ programsQueries.programsList({ id: programId, org_id: orgId }),
+ ),
})
const isLoading = programQueries.some((query) => query.isLoading)
@@ -175,6 +171,25 @@ const OrgProgramCollectionDisplay: React.FC<{
const sanitizedDescription = DOMPurify.sanitize(collection.description ?? "")
const { isLoading, programsWithCourses, hasAnyCourses } =
useProgramCollectionCourses(collection.programIds, orgId)
+ const firstCourseIds = programsWithCourses
+ .map((p) => p?.program.courseIds[0])
+ .filter((id): id is number => id !== undefined)
+ const courses = useQuery({
+ ...coursesQueries.coursesList({
+ id: firstCourseIds,
+ org_id: orgId,
+ }),
+ enabled: firstCourseIds.length > 0,
+ })
+ const rawCourses =
+ courses.data?.results.sort((a, b) => {
+ return firstCourseIds.indexOf(a.id) - firstCourseIds.indexOf(b.id)
+ }) ?? []
+ const transformedCourses = transform.organizationCoursesWithContracts({
+ courses: rawCourses,
+ contracts: contracts ?? [],
+ enrollments: enrollments ?? [],
+ })
const header = (
@@ -214,17 +229,26 @@ const OrgProgramCollectionDisplay: React.FC<{
{header}
- {programsWithCourses.map((item) =>
- item ? (
- (
+
- ) : null,
- )}
+ ))}
+ {transformedCourses.map((course) => (
+
+ ))}
)
@@ -260,8 +284,12 @@ const OrgProgramDisplay: React.FC<{
)
if (programLoading || courses.isLoading) return skeleton
+ const rawCourses =
+ courses.data?.results.sort((a, b) => {
+ return program.courseIds.indexOf(a.id) - program.courseIds.indexOf(b.id)
+ }) ?? []
const transformedCourses = transform.organizationCoursesWithContracts({
- courses: courses.data?.results ?? [],
+ courses: rawCourses,
contracts: contracts ?? [],
enrollments: courseRunEnrollments ?? [],
})
@@ -307,59 +335,6 @@ const OrgProgramDisplay: React.FC<{
)
}
-const ProgramCollectionItem: React.FC<{
- program: DashboardProgram
- contracts?: ContractPage[]
- enrollments?: CourseRunEnrollment[]
- orgId: number
-}> = ({ program, contracts, enrollments, orgId }) => {
- return (
-
- )
-}
-
-const ProgramCard: React.FC<{
- program: DashboardProgram
- contracts?: ContractPage[]
- enrollments?: CourseRunEnrollment[]
- orgId: number
-}> = ({ program, contracts, enrollments, orgId }) => {
- const courses = useQuery(
- coursesQueries.coursesList({
- id: program.courseIds,
- org_id: orgId,
- }),
- )
- const skeleton = (
-
- )
- if (courses.isLoading) return skeleton
- const transformedCourses = transform.organizationCoursesWithContracts({
- courses: courses.data?.results ?? [],
- contracts: contracts ?? [],
- enrollments: enrollments ?? [],
- })
- if (courses.isLoading || !transformedCourses.length) return skeleton
- // For now we assume the first course is the main one for the program.
- const course = transformedCourses[0]
- return (
-
- )
-}
-
const OrganizationRoot = styled.div({
display: "flex",
flexDirection: "column",
diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py
index dae0615681..e8b5049204 100644
--- a/learning_resources/etl/canvas.py
+++ b/learning_resources/etl/canvas.py
@@ -110,10 +110,14 @@ def _get_url_config(bucket, export_tempdir: str, url_config_file: str) -> dict:
bucket.download_file(url_config_file, url_config_path)
url_config = {}
with Path.open(url_config_path, "rb") as f:
- for url_item in json.loads(f.read().decode("utf-8")).get("course_files", []):
+ url_json = json.loads(f.read().decode("utf-8"))
+ for url_item in url_json.get("course_files", []):
url_key = url_item["file_path"]
url_key = unquote_plus(url_key.lstrip(url_key.split("/")[0]))
url_config[url_key] = url_item["url"]
+ for url_item in url_json.get("assignments", []) + url_json.get("pages", []):
+ url_key = url_item.get("name", url_item.get("title"))
+ url_config[url_key] = url_item.get("html_url")
return url_config
@@ -199,22 +203,23 @@ def transform_canvas_content_files(
"""
basedir = course_zipfile.name.split(".")[0]
zipfile_path = course_zipfile.absolute()
- # grab published module and file items
- published_items = [
- Path(item["path"]).resolve()
- for item in parse_module_meta(zipfile_path)["active"]
- ] + [
- Path(item["path"]).resolve()
- for item in parse_files_meta(zipfile_path)["active"]
- ]
- for item in parse_web_content(zipfile_path)["active"]:
- published_items.append(Path(item["path"]).resolve())
- published_items.extend(
- [
- Path(embedded_file).resolve()
- for embedded_file in item.get("embedded_files", [])
- ]
- )
+ all_published_items = (
+ parse_module_meta(zipfile_path)["active"]
+ + parse_files_meta(zipfile_path)["active"]
+ + parse_web_content(zipfile_path)["active"]
+ )
+ published_items = {}
+ for item in all_published_items:
+ path = Path(item["path"]).resolve()
+ published_items[path] = item
+ for embedded_file in item.get("embedded_files", []):
+ embedded_path = Path(embedded_file).resolve()
+ if embedded_path in all_published_items:
+ continue
+ published_items[embedded_path] = {
+ "path": embedded_path,
+ "title": "",
+ }
def _generate_content():
"""Inner generator for yielding content data"""
@@ -223,7 +228,8 @@ def _generate_content():
zipfile.ZipFile(zipfile_path, "r") as course_archive,
):
for member in course_archive.infolist():
- if Path(member.filename).resolve() in published_items:
+ member_path = Path(member.filename).resolve()
+ if member_path in published_items:
course_archive.extract(member, path=olx_path)
log.debug("processing active file %s", member.filename)
else:
@@ -233,7 +239,14 @@ def _generate_content():
url_path = content_data["source_path"].lstrip(
content_data["source_path"].split("/")[0]
)
- content_url = url_config.get(url_path, "")
+ item_meta = published_items.get(
+ Path(content_data["source_path"]).resolve(), {}
+ )
+
+ content_url = url_config.get(url_path) or url_config.get(
+ item_meta.get("title")
+ )
+ content_data["content_title"] = item_meta.get("title")
if content_url:
content_data["url"] = content_url
yield content_data
@@ -561,6 +574,19 @@ def _title_from_html(html: str) -> str:
return title.get_text().strip() if title else ""
+def _title_from_assignment_settings(xml_string: str) -> str:
+ """
+ Extract the title from assignment_settings.xml
+ """
+ try:
+ root = ElementTree.fromstring(xml_string)
+ except Exception:
+ log.exception("Error parsing XML: %s", sys.stderr)
+ return ""
+ title_elem = root.find("cccv1p0:title", NAMESPACES)
+ return title_elem.text.strip() if title_elem is not None and title_elem.text else ""
+
+
def parse_web_content(course_archive_path: str) -> dict:
"""
Parse html pages and assignments and return publish/active status of resources
@@ -588,9 +614,12 @@ def parse_web_content(course_archive_path: str) -> dict:
if assignment_settings:
xml_content = course_archive.read(assignment_settings)
workflow_state = _workflow_state_from_xml(xml_content)
+ title = _title_from_assignment_settings(xml_content)
+ canvas_type = "assignment"
else:
workflow_state = _workflow_state_from_html(html_content)
- title = _title_from_html(html_content)
+ title = _title_from_html(html_content)
+ canvas_type = "page"
lom_elem = (
resource_map_item.get("metadata", {})
@@ -606,6 +635,7 @@ def parse_web_content(course_archive_path: str) -> dict:
{
"title": title,
"path": file_path,
+ "canvas_type": canvas_type,
"embedded_files": embedded_files,
}
)
@@ -614,6 +644,7 @@ def parse_web_content(course_archive_path: str) -> dict:
{
"title": title,
"path": file_path,
+ "canvas_type": canvas_type,
"embedded_files": embedded_files,
}
)
@@ -635,7 +666,7 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict:
title = (
item.find("imscp:title", NAMESPACES).text
if item.find("imscp:title", NAMESPACES) is not None
- else "No Title"
+ else ""
)
resource = root.find(
f'.//imscp:resource[@identifier="{identifierref}"]', NAMESPACES
diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py
index c0d10420d5..2c1c244022 100644
--- a/learning_resources/etl/canvas_test.py
+++ b/learning_resources/etl/canvas_test.py
@@ -195,7 +195,7 @@ def test_parse_module_meta_returns_active_and_unpublished(tmp_path):
"""
module_xml = b"""
-
+
Module 1
-
@@ -981,3 +981,101 @@ def test_embedded_files_from_html(tmp_path, mocker):
)
assert any(file["source_path"] == "web_resources/html_page.html" for file in files)
+
+
+def test_get_url_config_assignments_and_pages(mocker, tmp_path):
+ """
+ Test that _get_url_config correctly maps assignments and pages to URLs using their titles.
+ """
+ hmtl_page_title = "html page"
+
+ url_config = {
+ hmtl_page_title: "https://example.com/htmlpage",
+ "/file1.html": "https://example.com/file1",
+ }
+
+ run = LearningResourceRunFactory.create()
+
+ html_content = f"""
+
+ {hmtl_page_title}
+
+
+
+
+ """
+ module_xml = b"""
+
+
+ Module 1
+
+
-
+ active
+ Item 1
+ false
+ false
+ RES1
+ resource
+
+
+
+
+ """
+ manifest_xml = b"""
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+ module 1
+
+
+
+
+
+ """
+ zip_path = make_canvas_zip_with_module_meta(tmp_path, module_xml, manifest_xml)
+ with zipfile.ZipFile(zip_path, "a") as zf:
+ zf.writestr("web_resources/file1.html", "content of file1")
+ zf.writestr("web_resources/file2.html", "content of file2")
+ zf.writestr("web_resources/html_page.html", html_content)
+ mocker.patch(
+ "learning_resources.etl.utils.extract_text_metadata",
+ return_value={"content": "test"},
+ )
+ mocker.patch("learning_resources.etl.canvas.bulk_resources_unpublished_actions")
+ results = list(
+ transform_canvas_content_files(
+ Path(zip_path),
+ run=run,
+ url_config=url_config,
+ overwrite=False,
+ )
+ )
+ results = list(results)
+ list(
+ zip(
+ [result["content_title"] for result in results],
+ [result["url"] for result in results],
+ )
+ )
+ results = dict(
+ zip(
+ [result["content_title"] for result in results],
+ [result["url"] for result in results],
+ )
+ )
+ assert results["html page"] == "https://example.com/htmlpage"
+ assert results["Item 1"] == "https://example.com/file1"
diff --git a/learning_resources/etl/podcast.py b/learning_resources/etl/podcast.py
index 0b07b84a7b..c1256003ca 100644
--- a/learning_resources/etl/podcast.py
+++ b/learning_resources/etl/podcast.py
@@ -19,6 +19,11 @@
CONFIG_FILE_REPO = "mitodl/open-podcast-data"
CONFIG_FILE_FOLDER = "podcasts"
TIMESTAMP_FORMAT = "%a, %d %b %Y %H:%M:%S %z"
+BROWSER_UA_HEADERS = {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/39.0.2171.95 Safari/537.36"
+}
log = logging.getLogger()
@@ -128,7 +133,7 @@ def extract():
for playlist_config in configs:
rss_url = playlist_config["rss_url"]
try:
- response = requests.get(rss_url) # noqa: S113
+ response = requests.get(rss_url, headers=BROWSER_UA_HEADERS) # noqa: S113
response.raise_for_status()
feed = bs(response.content, "xml")
diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py
index c897ab7086..7e937b4be8 100644
--- a/learning_resources/tasks.py
+++ b/learning_resources/tasks.py
@@ -207,9 +207,13 @@ def get_content_tasks( # noqa: PLR0913
)
if learning_resource_ids:
- learning_resources = LearningResource.objects.filter(
- id__in=learning_resource_ids, etl_source=etl_source
- ).values_list("id", flat=True)
+ learning_resources = (
+ LearningResource.objects.filter(
+ id__in=learning_resource_ids, etl_source=etl_source
+ )
+ .order_by("-id")
+ .values_list("id", flat=True)
+ )
else:
learning_resources = (
LearningResource.objects.filter(Q(published=True) | Q(test_mode=True))
diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py
index e489138e7b..143b5ecfd2 100644
--- a/learning_resources/tasks_test.py
+++ b/learning_resources/tasks_test.py
@@ -238,10 +238,13 @@ def test_get_content_tasks(
3, etl_source=etl_source, platform=platform
)
if with_learning_resource_ids:
- learning_resource_ids = [
- courses[0].learning_resource_id,
- courses[1].learning_resource_id,
- ]
+ learning_resource_ids = sorted(
+ [
+ courses[0].learning_resource_id,
+ courses[1].learning_resource_id,
+ ],
+ reverse=True,
+ )
else:
learning_resource_ids = None
s3_prefix = "course-prefix"
diff --git a/main/settings.py b/main/settings.py
index 63cb4f5585..fd8b8850b2 100644
--- a/main/settings.py
+++ b/main/settings.py
@@ -34,7 +34,7 @@
from main.settings_pluggy import * # noqa: F403
from openapi.settings_spectacular import open_spectacular_settings
-VERSION = "0.43.1"
+VERSION = "0.44.0"
log = logging.getLogger()