From b0509eba77b6dbc7582760c8882b7016f1abb719 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 5 Sep 2025 12:38:33 -0400
Subject: [PATCH 01/10] Update actions/checkout digest to 08eba0b (#2454)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yml            | 12 ++++++------
 .github/workflows/openapi-diff.yml  |  4 ++--
 .github/workflows/publish-pages.yml |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 121cffd28f..4340078b60 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
           - 6379:6379
 
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
 
       - name: update apt
         run: sudo apt-get update -y
@@ -92,7 +92,7 @@ jobs:
   javascript-tests:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
       - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
         with:
           node-version: "^22.0.0"
@@ -151,7 +151,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
 
       - name: Build the Docker image
         env:
@@ -199,7 +199,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
 
       - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
         with:
@@ -223,7 +223,7 @@ jobs:
       GENERATOR_OUTPUT_DIR_VC: ./frontends/api/src/generated/v0
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
       - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
         with:
           node-version: "^22.0.0"
@@ -262,7 +262,7 @@ jobs:
       GENERATOR_OUTPUT_DIR_VC: ./frontends/api/src/generated/v1
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
       - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
         with:
           node-version: "^22.0.0"
diff --git a/.github/workflows/openapi-diff.yml b/.github/workflows/openapi-diff.yml
index 7611fe521b..2e963acf57 100644
--- a/.github/workflows/openapi-diff.yml
+++ b/.github/workflows/openapi-diff.yml
@@ -5,12 +5,12 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Checkout HEAD
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
         with:
           ref: ${{ github.head_ref }}
           path: head
       - name: Checkout BASE
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
         with:
           ref: ${{ github.base_ref }}
           path: base
diff --git a/.github/workflows/publish-pages.yml b/.github/workflows/publish-pages.yml
index 8be9f1897d..90271f976d 100644
--- a/.github/workflows/publish-pages.yml
+++ b/.github/workflows/publish-pages.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4
 
       - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
         with:

From ec1b8707def5e0a72a72b0c60ecd8ffad24f8582 Mon Sep 17 00:00:00 2001
From: Matt Bertrand <mrbertrand@gmail.com>
Date: Fri, 5 Sep 2025 14:57:01 -0400
Subject: [PATCH 02/10] Stop running Sloan news/event tasks (#2493)

---
 main/settings_celery.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/main/settings_celery.py b/main/settings_celery.py
index 41995aa665..eb6c0e265f 100644
--- a/main/settings_celery.py
+++ b/main/settings_celery.py
@@ -108,18 +108,6 @@
             "NEWS_EVENTS_MITPE_NEWS_SCHEDULE_SECONDS", 60 * 60 * 3
         ),  # default is every 3 hours
     },
-    "update_sloan_news": {
-        "task": "news_events.tasks.get_sloan_exec_news",
-        "schedule": get_int(
-            "NEWS_EVENTS_SLOAN_EXEC_NEWS_SCHEDULE_SECONDS", 60 * 60 * 3
-        ),  # default is every 3 hours
-    },
-    "update_sloan_webinars": {
-        "task": "news_events.tasks.get_sloan_exec_webinars",
-        "schedule": get_int(
-            "NEWS_EVENTS_SLOAN_EXEC_WEBINAR_SCHEDULE_SECONDS", 60 * 60 * 12
-        ),  # default is every 12 hours
-    },
     "update_sloan_courses": {
         "task": "learning_resources.tasks.get_sloan_data",
         "schedule": crontab(minute=30, hour=4),  # 12:30am EST

From 0a746ae810b9a099784ebcdbb085017d794a3730 Mon Sep 17 00:00:00 2001
From: Chris Chudzicki <christopher.chudzicki@gmail.com>
Date: Fri, 5 Sep 2025 15:31:25 -0400
Subject: [PATCH 03/10] Toggle NextJS image optimization with an env var
 (#2385)

* turn off image optimization

* add flag to toggle image optimization
---
 env/frontend.env              | 1 +
 frontends/main/next.config.js | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/env/frontend.env b/env/frontend.env
index 0434f13695..0225b9a761 100644
--- a/env/frontend.env
+++ b/env/frontend.env
@@ -1,6 +1,7 @@
 NODE_ENV=development
 PORT=8062
 SENTRY_ENV=dev # Re-enable sentry
+OPTIMIZE_IMAGES="true"
 
 # Environment variables with `NEXT_PUBLIC_` prefix are exposed to the client side
 NEXT_PUBLIC_ORIGIN=${MITOL_APP_BASE_URL}
diff --git a/frontends/main/next.config.js b/frontends/main/next.config.js
index f2f968dbb7..23a66e4ddb 100644
--- a/frontends/main/next.config.js
+++ b/frontends/main/next.config.js
@@ -3,6 +3,10 @@ const { validateEnv } = require("./validateEnv")
 
 validateEnv()
 
+const OPTIMIZE_IMAGES = Boolean(
+  (process.env.OPTIMIZE_IMAGES ?? "true") === "true",
+)
+
 const processFeatureFlags = () => {
   const featureFlagPrefix =
     process.env.NEXT_PUBLIC_POSTHOG_FEATURE_PREFIX || "FEATURE_"
@@ -90,6 +94,7 @@ const nextConfig = {
   transpilePackages: ["@mitodl/smoot-design/ai"],
 
   images: {
+    unoptimized: !OPTIMIZE_IMAGES,
     remotePatterns: [
       {
         hostname: "**",

From 1a8a975d720374f4431da9c48d03d51106ee5964 Mon Sep 17 00:00:00 2001
From: Shankar Ambady <shanbady@gmail.com>
Date: Fri, 5 Sep 2025 16:37:49 -0400
Subject: [PATCH 04/10] adding fix for flaky test (#2491)

---
 learning_resources/tasks_test.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py
index 152c26590d..26e060e4e3 100644
--- a/learning_resources/tasks_test.py
+++ b/learning_resources/tasks_test.py
@@ -16,6 +16,7 @@
 from learning_resources.etl.constants import MARKETING_PAGE_FILE_TYPE, ETLSource
 from learning_resources.factories import (
     LearningResourceFactory,
+    LearningResourcePlatformFactory,
 )
 from learning_resources.models import LearningResource
 from learning_resources.tasks import (
@@ -686,8 +687,19 @@ def test_remove_duplicate_resources(mocker, mocked_celery):
     """
     duplicate_id = "duplicate_id"
 
-    LearningResourceFactory.create_batch(3, readable_id=duplicate_id, published=False)
-    LearningResourceFactory.create(readable_id=duplicate_id)
+    for platform_type in [PlatformType.edx, PlatformType.xpro, PlatformType.youtube]:
+        LearningResourceFactory.create(
+            readable_id=duplicate_id,
+            published=False,
+            platform=LearningResourcePlatformFactory.create(code=platform_type.name),
+        )
+
+    LearningResourceFactory.create(
+        readable_id=duplicate_id,
+        platform=LearningResourcePlatformFactory.create(
+            code=platform_type.mitxonline.name
+        ),
+    )
     assert LearningResource.objects.filter(readable_id=duplicate_id).count() == 4
     with pytest.raises(mocked_celery.replace_exception_class):
         remove_duplicate_resources()

From b8b30cccc32084ad1e6b3e6f6ca688062e0e1b9a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 8 Sep 2025 10:41:14 -0400
Subject: [PATCH 05/10] Update dependency Django to v4.2.24 (#2496)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index d535c8d88d..115dc17662 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1431,14 +1431,14 @@ static3 = "*"
 
 [[package]]
 name = "django"
-version = "4.2.23"
+version = "4.2.24"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "django-4.2.23-py3-none-any.whl", hash = "sha256:dafbfaf52c2f289bd65f4ab935791cb4fb9a198f2a5ba9faf35d7338a77e9803"},
-    {file = "django-4.2.23.tar.gz", hash = "sha256:42fdeaba6e6449d88d4f66de47871015097dc6f1b87910db00a91946295cfae4"},
+    {file = "django-4.2.24-py3-none-any.whl", hash = "sha256:a6527112c58821a0dfc5ab73013f0bdd906539790a17196658e36e66af43c350"},
+    {file = "django-4.2.24.tar.gz", hash = "sha256:40cd7d3f53bc6cd1902eadce23c337e97200888df41e4a73b42d682f23e71d80"},
 ]
 
 [package.dependencies]
@@ -9063,4 +9063,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "~3.12"
-content-hash = "aaf3bc99e70404eca795dc3c392b8730509d674637e69836e51ac25aaca0b5ce"
+content-hash = "9b822fba05eb04db225295485b3c412f1dc240bd8a34270ff24bf0980026098b"
diff --git a/pyproject.toml b/pyproject.toml
index ac8873b96c..e5314fbebe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ requires-poetry = ">2.1,<3"
 
 [tool.poetry.dependencies]
 python = "~3.12"
-Django = "4.2.23"
+Django = "4.2.24"
 attrs = "^25.0.0"
 base36 = "^0.1.1"
 beautifulsoup4 = "^4.8.2"

From 084e253b2839a6c3d407ad444ed85ab0de2a3737 Mon Sep 17 00:00:00 2001
From: James Kachel <james@jkachel.com>
Date: Mon, 8 Sep 2025 11:01:54 -0500
Subject: [PATCH 06/10] Set page size to 30 for the course query for programs
 in the org dash (#2494)

---
 .../DashboardPage/CoursewareDisplay/test-utils.ts   | 13 +++++++++++--
 .../app-pages/DashboardPage/OrganizationContent.tsx |  6 +++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts
index ae3ca0c42a..e97fe24e60 100644
--- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts
+++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts
@@ -154,13 +154,21 @@ const setupProgramsAndCourses = () => {
     { results: [programB] },
   )
   setMockResponse.get(
-    urls.courses.coursesList({ id: programA.courses, org_id: orgX.id }),
+    urls.courses.coursesList({
+      id: programA.courses,
+      org_id: orgX.id,
+      page_size: 30,
+    }),
     {
       results: coursesA.results,
     },
   )
   setMockResponse.get(
-    urls.courses.coursesList({ id: programB.courses, org_id: orgX.id }),
+    urls.courses.coursesList({
+      id: programB.courses,
+      org_id: orgX.id,
+      page_size: 30,
+    }),
     {
       results: coursesB.results,
     },
@@ -262,6 +270,7 @@ function setupOrgDashboardMocks(
       mitxonline.urls.courses.coursesList({
         id: program.courses,
         org_id: org.id,
+        page_size: 30,
       }),
       { results: courses },
     )
diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
index 27b91f0861..f3dacd8356 100644
--- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
+++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx
@@ -250,7 +250,11 @@ const OrgProgramDisplay: React.FC<{
   )
   const hasValidCertificate = !!programEnrollment?.certificate
   const courses = useQuery(
-    coursesQueries.coursesList({ id: program.courseIds, org_id: orgId }),
+    coursesQueries.coursesList({
+      id: program.courseIds,
+      org_id: orgId,
+      page_size: 30,
+    }),
   )
   const skeleton = (
     <Skeleton width="100%" height="65px" style={{ marginBottom: "16px" }} />

From 61c92bc93acc711b35ec6c6f94ef1deb702f876e Mon Sep 17 00:00:00 2001
From: Carey P Gumaer <gumaerc@mit.edu>
Date: Mon, 8 Sep 2025 13:50:24 -0400
Subject: [PATCH 07/10] redirect org users to org dashboard (#2482)

* redirect user to org dashboard on login based on the first org they are determined to be a part of, if they are a part of one

* fix test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* only redirect to org dashboard on first login, and always skip onboarding for org users

* redirect using APP_BASE_URL

* remove org keycloak scope from default env and add a note about enabling it

* fix tests, use settings fixture in authentication views tests

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 authentication/views.py       | 41 +++++++++++++++---
 authentication/views_test.py  | 79 +++++++++++++++++++++++++++++++----
 env/backend.env               |  1 +
 env/backend.local.example.env |  1 +
 main/settings.py              |  1 +
 5 files changed, 111 insertions(+), 12 deletions(-)

diff --git a/authentication/views.py b/authentication/views.py
index 992988186a..6ab3592a3c 100644
--- a/authentication/views.py
+++ b/authentication/views.py
@@ -1,14 +1,16 @@
 """Authentication views"""
 
 import logging
+from urllib.parse import urljoin
 
 from django.contrib.auth import logout
 from django.shortcuts import redirect
 from django.utils.http import url_has_allowed_host_and_scheme, urlencode
+from django.utils.text import slugify
 from django.views import View
 
 from main import settings
-from main.middleware.apisix_user import ApisixUserMiddleware
+from main.middleware.apisix_user import ApisixUserMiddleware, decode_apisix_headers
 
 log = logging.getLogger(__name__)
 
@@ -64,6 +66,8 @@ class CustomLoginView(View):
     Redirect the user to the appropriate url after login
     """
 
+    header = "HTTP_X_USERINFO"
+
     def get(
         self,
         request,
@@ -76,10 +80,32 @@ def get(
         redirect_url = get_redirect_url(request)
         if not request.user.is_anonymous:
             profile = request.user.profile
-            if not profile.has_logged_in:
-                profile.has_logged_in = True
-                profile.save()
-            if (
+
+            apisix_header = decode_apisix_headers(request, self.header)
+
+            # Check if user belongs to any organizations
+            user_organizations = (
+                apisix_header.get("organizations", {}) if apisix_header else {}
+            )
+
+            if user_organizations:
+                # First-time login for org user: redirect to org dashboard
+                if not profile.has_logged_in:
+                    first_org_name = next(iter(user_organizations.keys()))
+                    org_slug = slugify(first_org_name)
+
+                    log.info(
+                        "User %s belongs to organization: %s (slug: %s)",
+                        request.user.email,
+                        first_org_name,
+                        org_slug,
+                    )
+
+                    redirect_url = urljoin(
+                        settings.APP_BASE_URL, f"/dashboard/organization/{org_slug}"
+                    )
+            # Non-organization users: apply onboarding logic
+            elif (
                 not profile.completed_onboarding
                 and request.GET.get("skip_onboarding", "0") == "0"
             ):
@@ -87,4 +113,9 @@ def get(
                 redirect_url = f"{settings.MITOL_NEW_USER_LOGIN_URL}?{params}"
                 profile.completed_onboarding = True
                 profile.save()
+
+            if not profile.has_logged_in:
+                profile.has_logged_in = True
+                profile.save()
+
         return redirect(redirect_url)
diff --git a/authentication/views_test.py b/authentication/views_test.py
index 637af019ba..5f8da73272 100644
--- a/authentication/views_test.py
+++ b/authentication/views_test.py
@@ -3,9 +3,9 @@
 import json
 from base64 import b64encode
 from unittest.mock import MagicMock
+from urllib.parse import urljoin
 
 import pytest
-from django.conf import settings
 from django.test import RequestFactory
 from django.urls import reverse
 
@@ -28,10 +28,18 @@ def test_custom_login(mocker, next_url, allowed):
     assert get_redirect_url(mock_request) == (next_url if allowed else "/app")
 
 
-@pytest.mark.parametrize("has_apisix_header", [True, False])
-@pytest.mark.parametrize("next_url", ["/search", None])
-def test_logout(mocker, next_url, client, user, has_apisix_header):
+@pytest.mark.parametrize(
+    "test_params",
+    [
+        (True, "/search"),
+        (True, None),
+        (False, "/search"),
+        (False, None),
+    ],
+)
+def test_logout(mocker, client, user, test_params, settings):
     """User should be properly redirected and logged out"""
+    has_apisix_header, next_url = test_params
     header_str = b64encode(
         json.dumps(
             {
@@ -55,10 +63,10 @@ def test_logout(mocker, next_url, client, user, has_apisix_header):
     mock_logout.assert_called_once()
 
 
-@pytest.mark.parametrize("is_authenticated", [True])
-@pytest.mark.parametrize("has_next", [False])
-def test_next_logout(mocker, client, user, is_authenticated, has_next):
+@pytest.mark.parametrize("test_params", [(True, False)])
+def test_next_logout(mocker, client, user, test_params, settings):
     """Test logout redirect cache assignment"""
+    is_authenticated, has_next = test_params
     next_url = "https://ocw.mit.edu"
     mock_request = mocker.MagicMock(
         GET={"next": next_url if has_next else None},
@@ -211,3 +219,60 @@ def test_custom_login_view_first_time_login_sets_has_logged_in(mocker):
 
     # Verify redirect was called with the correct URL
     mock_redirect.assert_called_once_with("/dashboard")
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        (
+            (False, False),
+            "/dashboard/organization/test-organization",
+        ),  # First-time login → org dashboard
+        (
+            (False, True),
+            "/dashboard/organization/test-organization",
+        ),  # First-time login → org dashboard
+        ((True, False), "/app"),  # Subsequent login → normal app (not onboarding!)
+        ((True, True), "/app"),  # Subsequent login → normal app
+    ],
+)
+def test_login_org_user_redirect(mocker, client, user, test_case, settings):
+    """Test organization user redirect behavior - org users skip onboarding regardless of onboarding status"""
+    # Unpack test case
+    profile_state, expected_url = test_case
+    has_logged_in, completed_onboarding = profile_state
+
+    # Set up user profile based on test scenario
+    user.profile.has_logged_in = has_logged_in
+    user.profile.completed_onboarding = completed_onboarding
+    user.profile.save()
+
+    header_str = b64encode(
+        json.dumps(
+            {
+                "preferred_username": user.username,
+                "email": user.email,
+                "sub": user.global_id,
+                "organization": {
+                    "Test Organization": {
+                        "role": "member",
+                        "id": "org-123",
+                    }
+                },
+            }
+        ).encode()
+    )
+    client.force_login(user)
+    response = client.get(
+        "/login/",
+        follow=False,
+        HTTP_X_USERINFO=header_str,
+    )
+    assert response.status_code == 302
+    # Handle environment differences - in some envs it returns full URL, in others just path
+    expected_full_url = urljoin(settings.APP_BASE_URL, expected_url)
+    assert response.url in [expected_url, expected_full_url]
+
+    # Verify that org users are never sent to onboarding
+    # (onboarding URL would contain settings.MITOL_NEW_USER_LOGIN_URL)
+    assert settings.MITOL_NEW_USER_LOGIN_URL not in response.url
diff --git a/env/backend.env b/env/backend.env
index acb1a8eecd..ce267160e2 100644
--- a/env/backend.env
+++ b/env/backend.env
@@ -48,6 +48,7 @@ KEYCLOAK_CLIENT_ID=apisix
 KEYCLOAK_CLIENT_SECRET=HckCZXToXfaetbBx0Fo3xbjnC468oMi4 # pragma: allowlist-secret
 KEYCLOAK_DISCOVERY_URL=http://kc.ol.local:8066/realms/ol-local/.well-known/openid-configuration
 KEYCLOAK_REALM_NAME=ol-local
+# For some organization related functionality, add organization:* if you have orgs enabled
 KEYCLOAK_SCOPES="openid profile ol-profile"
 KEYCLOAK_SVC_KEYSTORE_PASSWORD=supertopsecret1234
 KEYCLOAK_SVC_HOSTNAME=kc.ol.local
diff --git a/env/backend.local.example.env b/env/backend.local.example.env
index ef9edf1fba..89ad1cf4e3 100644
--- a/env/backend.local.example.env
+++ b/env/backend.local.example.env
@@ -31,6 +31,7 @@ KEYCLOAK_CLIENT_ID=apisix
 KEYCLOAK_CLIENT_SECRET=HckCZXToXfaetbBx0Fo3xbjnC468oMi4 # pragma: allowlist-secret
 KEYCLOAK_DISCOVERY_URL=http://kc.ol.local:8066/realms/ol-local/.well-known/openid-configuration
 KEYCLOAK_REALM_NAME=ol-local
+# For some organization related functionality, add organization:* if you have orgs enabled
 KEYCLOAK_SCOPES="openid profile ol-profile"
 KEYCLOAK_SVC_KEYSTORE_PASSWORD=supertopsecret1234
 KEYCLOAK_SVC_HOSTNAME=kc.ol.local
diff --git a/main/settings.py b/main/settings.py
index 71f5793700..d9e0e5acc7 100644
--- a/main/settings.py
+++ b/main/settings.py
@@ -356,6 +356,7 @@
         "first_name": "given_name",
         "last_name": "family_name",
         "name": "name",
+        "organizations": "organization",
     },
     "profiles.Profile": {
         "name": "name",

From b26d34f042cf9cf32e5b60a2acc46e1c886f4488 Mon Sep 17 00:00:00 2001
From: Chris Chudzicki <christopher.chudzicki@gmail.com>
Date: Tue, 9 Sep 2025 16:24:02 -0400
Subject: [PATCH 08/10] Fix userlist stale data (#2501)

* fix type mismatch

* oops

* invalidate more things
---
 .../api/src/hooks/learningResources/index.ts  | 24 +++++++++++++++----
 .../src/app/dashboard/my-lists/[id]/page.tsx  | 11 +++++----
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/frontends/api/src/hooks/learningResources/index.ts b/frontends/api/src/hooks/learningResources/index.ts
index b8dad9135a..34774d47c6 100644
--- a/frontends/api/src/hooks/learningResources/index.ts
+++ b/frontends/api/src/hooks/learningResources/index.ts
@@ -97,7 +97,16 @@ const useLearningResourceSetUserListRelationships = () => {
       params: LearningResourcesApiLearningResourcesUserlistsPartialUpdateRequest,
     ) => learningResourcesApi.learningResourcesUserlistsPartialUpdate(params),
     onSettled: () => {
-      queryClient.invalidateQueries({ queryKey: userlistKeys.membershipList() })
+      /**
+       * We need to invalidate:
+       * - membership check
+       * - list of user lists (count has changed)
+       * - userlist detail annd listing for any lists we modified
+       *
+       * That's a lot. Let's just invalidate root.
+       * Additionally, the lists we've removed from the resource are not easily available.
+       */
+      queryClient.invalidateQueries({ queryKey: userlistKeys.root })
     },
   })
 }
@@ -110,9 +119,16 @@ const useLearningResourceSetLearningPathRelationships = () => {
     ) =>
       learningResourcesApi.learningResourcesLearningPathsPartialUpdate(params),
     onSettled: () => {
-      queryClient.invalidateQueries({
-        queryKey: learningPathKeys.membershipList(),
-      })
+      /**
+       * We need to invalidate:
+       * - membership check
+       * - list of user lists (count has changed)
+       * - userlist detail annd listing for any lists we modified
+       *
+       * That's a lot. Let's just invalidate root.
+       * Additionally, the lists we've removed from the resource are not easily available.
+       */
+      queryClient.invalidateQueries({ queryKey: learningPathKeys.root })
     },
   })
 }
diff --git a/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx b/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx
index ce9897ceee..4567085512 100644
--- a/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx
+++ b/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx
@@ -1,14 +1,15 @@
 import React from "react"
 import { UserListDetailsContent } from "@/app-pages/DashboardPage/UserListDetailsContent"
-import { PageParams } from "@/app/types"
 import invariant from "tiny-invariant"
+import { PageParams } from "@/app/types"
 
-const Page: React.FC<PageParams<object, { id: number }>> = async ({
+const Page: React.FC<PageParams<never, { id: string }>> = async ({
   params,
 }) => {
-  const resolved = await params
-  invariant(resolved?.id, "id is required")
-  return <UserListDetailsContent userListId={resolved.id} />
+  const resolved = await params!
+  const id = Number(resolved.id)
+  invariant(id, "id is required")
+  return <UserListDetailsContent userListId={id} />
 }
 
 export default Page

From d281eadfc5a4848fd1f0a8f74f2daa32ce1925fc Mon Sep 17 00:00:00 2001
From: Shankar Ambady <shanbady@gmail.com>
Date: Wed, 10 Sep 2025 09:43:40 -0400
Subject: [PATCH 09/10] ingest canvas html content (#2502)

* initial working version of gathering assignments and pages

* test fix

* check metadata for visibility to students vs authors

* type hints

* remove typehint

* consolidation of namespaces

* explicitely exclude tutor folder no matter what and added tests

* fix test

* more conditionals for assignment workflow state
---
 learning_resources/etl/canvas.py      | 275 +++++++++++++++++++-------
 learning_resources/etl/canvas_test.py | 216 +++++++++++++++++++-
 2 files changed, 423 insertions(+), 68 deletions(-)

diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py
index a6646042d8..f9fa6c7aff 100644
--- a/learning_resources/etl/canvas.py
+++ b/learning_resources/etl/canvas.py
@@ -12,6 +12,7 @@
 from urllib.parse import unquote_plus
 
 import pypdfium2 as pdfium
+from bs4 import BeautifulSoup
 from defusedxml import ElementTree
 from django.conf import settings
 from litellm import completion
@@ -42,6 +43,14 @@
 log = logging.getLogger(__name__)
 
 
+NAMESPACES = {
+    "cccv1p0": "http://canvas.instructure.com/xsd/cccv1p0",
+    "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1",
+    "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource",
+    "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest",
+}
+
+
 def sync_canvas_archive(bucket, key: str, overwrite):
     """
     Sync a Canvas course archive from S3
@@ -180,13 +189,20 @@ def transform_canvas_content_files(
     basedir = course_zipfile.name.split(".")[0]
     zipfile_path = course_zipfile.absolute()
     # grab published module and file items
-    published_items = [
-        Path(item["path"]).resolve()
-        for item in parse_module_meta(zipfile_path)["active"]
-    ] + [
-        Path(item["path"]).resolve()
-        for item in parse_files_meta(zipfile_path)["active"]
-    ]
+    published_items = (
+        [
+            Path(item["path"]).resolve()
+            for item in parse_module_meta(zipfile_path)["active"]
+        ]
+        + [
+            Path(item["path"]).resolve()
+            for item in parse_files_meta(zipfile_path)["active"]
+        ]
+        + [
+            Path(item["path"]).resolve()
+            for item in parse_web_content(zipfile_path)["active"]
+        ]
+    )
 
     def _generate_content():
         """Inner generator for yielding content data"""
@@ -272,20 +288,51 @@ def transform_canvas_problem_files(
 
 
 def parse_context_xml(course_archive_path: str) -> dict:
+    """
+    Parse course_settings/context.xml and return context info
+    """
     with zipfile.ZipFile(course_archive_path, "r") as course_archive:
         context = course_archive.read("course_settings/context.xml")
     root = ElementTree.fromstring(context)
-    namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"}
     context_info = {}
     item_keys = ["course_id", "root_account_id", "canvas_domain", "root_account_name"]
     for key in item_keys:
-        element = root.find(f"ns:{key}", namespaces)
+        element = root.find(f"cccv1p0:{key}", NAMESPACES)
         if element is not None:
             context_info[key] = element.text
 
     return context_info
 
 
+def is_date_locked(lock_at: str, unlock_at: str) -> bool:
+    """
+    Determine if a resource is currently date-locked based
+    on lock_at and unlock_at strings.
+    Args:
+        lock_at (str): ISO 8601 date string when the resource locks
+        unlock_at (str): ISO 8601 date string when the resource unlocks
+    Returns:
+        bool: True if the resource is currently locked, False otherwise
+    """
+    now = now_in_utc()
+    if unlock_at and unlock_at.lower() != "nil":
+        try:
+            unlock_dt = datetime.fromisoformat(unlock_at.replace("Z", "+00:00"))
+            if now < unlock_dt:
+                return True
+        except Exception:
+            log.exception("Error parsing unlock_at date: %s", unlock_at)
+
+    if lock_at and lock_at.lower() != "nil":
+        try:
+            lock_dt = datetime.fromisoformat(lock_at.replace("Z", "+00:00"))
+            if now > lock_dt:
+                return True
+        except Exception:
+            log.exception("Error parsing lock_at date: %s", lock_at)
+    return False
+
+
 def is_file_published(file_meta: dict) -> bool:
     """
     Determine if a Canvas file (from files_meta.xml) is published/visible to students.
@@ -295,35 +342,18 @@ def is_file_published(file_meta: dict) -> bool:
     Returns:
         bool: True if file is published/visible, False otherwise.
     """
-    now = now_in_utc()
 
     hidden = str(file_meta.get("hidden", "false")).lower() == "true"
     locked = str(file_meta.get("locked", "false")).lower() == "true"
-
     unlock_at = file_meta.get("unlock_at")
     lock_at = file_meta.get("lock_at")
-
     visibility = file_meta.get("visibility", "inherit")
-
     # If explicitly hidden or locked → unpublished
     if hidden or locked:
         return False
 
-    if unlock_at and unlock_at.lower() != "nil":
-        try:
-            unlock_dt = datetime.fromisoformat(unlock_at.replace("Z", "+00:00"))
-            if now < unlock_dt:
-                return False
-        except Exception:
-            log.exception("Error parsing date: %s", unlock_at)
-
-    if lock_at and lock_at.lower() != "nil":
-        try:
-            lock_dt = datetime.fromisoformat(lock_at.replace("Z", "+00:00"))
-            if now > lock_dt:
-                return False
-        except Exception:
-            log.exception("Error parsing date: %s", lock_at)
+    if is_date_locked(lock_at, unlock_at):
+        return False
     # Visibility rules
     if visibility in ("course", "inherit"):
         return True
@@ -344,11 +374,9 @@ def parse_files_meta(course_archive_path: str) -> dict:
         files_xml = course_archive.read(files_meta_path)
         manifest_xml = course_archive.read("imsmanifest.xml")
     resource_map = extract_resources_by_identifier(manifest_xml)
-
     root = ElementTree.fromstring(files_xml)
-    namespaces = {"c": "http://canvas.instructure.com/xsd/cccv1p0"}
     try:
-        for file_elem in root.findall(".//c:file", namespaces):
+        for file_elem in root.findall(".//cccv1p0:file", NAMESPACES):
             meta = dict(file_elem.attrib)
             for child in file_elem:
                 tag = child.tag
@@ -367,7 +395,10 @@ def parse_files_meta(course_archive_path: str) -> dict:
                 file_path = Path(file)
                 file_data["path"] = file_path
                 file_data["title"] = file_data.get("display_name")
-                if file_data["published"]:
+                # explicitly exclude files in web_resources/ai/tutor
+                if file_data["published"] and not file.startswith(
+                    settings.CANVAS_TUTORBOT_FOLDER
+                ):
                     publish_status["active"].append(file_data)
                 else:
                     publish_status["unpublished"].append(file_data)
@@ -387,19 +418,18 @@ def parse_module_meta(course_archive_path: str) -> dict:
     resource_map = extract_resources_by_identifierref(manifest_xml)
     publish_status = {"active": [], "unpublished": []}
     try:
-        namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"}
         root = ElementTree.fromstring(module_xml)
-        for module in root.findall(".//ns:module", namespaces):
-            module_title = module.find("ns:title", namespaces).text
-            for item in module.findall("ns:items/ns:item", namespaces):
-                item_state = item.find("ns:workflow_state", namespaces).text
-                item_title = item.find("ns:title", namespaces).text
+        for module in root.findall(".//cccv1p0:module", NAMESPACES):
+            module_title = module.find("cccv1p0:title", NAMESPACES).text
+            for item in module.findall("cccv1p0:items/cccv1p0:item", NAMESPACES):
+                item_state = item.find("cccv1p0:workflow_state", NAMESPACES).text
+                item_title = item.find("cccv1p0:title", NAMESPACES).text
                 identifierref = (
-                    item.find("ns:identifierref", namespaces).text
-                    if item.find("ns:identifierref", namespaces) is not None
+                    item.find("cccv1p0:identifierref", NAMESPACES).text
+                    if item.find("cccv1p0:identifierref", NAMESPACES) is not None
                     else None
                 )
-                content_type = item.find("ns:content_type", namespaces).text
+                content_type = item.find("cccv1p0:content_type", NAMESPACES).text
                 items = resource_map.get(identifierref, {})
                 for item_info in items:
                     for file in item_info.get("files", []):
@@ -419,6 +449,131 @@ def parse_module_meta(course_archive_path: str) -> dict:
     return publish_status
 
 
+def _compact_element(element) -> dict | str | None:
+    """Recursively compact an element into a nested dictionary"""
+    if len(element) == 0:  # No children, return text
+        return element.text.strip() if element.text else None
+    return {
+        child.tag.split("}")[-1] if "}" in child.tag else child.tag: _compact_element(
+            child
+        )
+        for child in element
+    }
+
+
+def _workflow_state_from_html(html: str) -> str:
+    """
+    Extract the workflow_state meta tag from html
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    meta = soup.find("meta", attrs={"name": "workflow_state"})
+    return meta.get("content") if meta else None
+
+
+def _workflow_state_from_xml(xml_string: str) -> bool:
+    """
+    Determine the workflow_state (published/unpublished) from assignment_settings.xml
+    """
+
+    def _get_text(tag):
+        el = root.find(f"cccv1p0:{tag}", NAMESPACES)
+        return el.text.strip() if el is not None and el.text else ""
+
+    try:
+        root = ElementTree.fromstring(xml_string)
+    except Exception:
+        log.exception("Error parsing XML: %s", sys.stderr)
+        return "unpublished"
+
+    if (
+        (
+            # workflow_state must be published
+            _get_text("workflow_state") != "published"
+        )
+        or (
+            # only_visible_to_overrides must not be true
+            _get_text("only_visible_to_overrides") == "true"
+        )
+        or (
+            # hide_in_gradebook must not be true (hidden from gradebook)
+            _get_text("hide_in_gradebook") == "true"
+        )
+    ):
+        return "unpublished"
+
+    lock_at = _get_text("lock_at")
+    unlock_at = _get_text("unlock_at")
+    if _get_text("module_locked") == "true" or is_date_locked(lock_at, unlock_at):
+        return "unpublished"
+
+    return "published"
+
+
+def _title_from_html(html: str) -> str:
+    """
+    Extract the title element from HTML content
+    """
+    soup = BeautifulSoup(html, "html.parser")
+    title = soup.find("title")
+    return title.get_text().strip() if title else ""
+
+
+def parse_web_content(course_archive_path: str) -> dict:
+    """
+    Parse html pages and assignments and return publish/active status of resources
+    """
+
+    publish_status = {"active": [], "unpublished": []}
+
+    with zipfile.ZipFile(course_archive_path, "r") as course_archive:
+        manifest_path = "imsmanifest.xml"
+        if manifest_path not in course_archive.namelist():
+            return publish_status
+        manifest_xml = course_archive.read(manifest_path)
+        resource_map = extract_resources_by_identifier(manifest_xml)
+        for item in resource_map:
+            resource_map_item = resource_map[item]
+            item_link = resource_map_item.get("href")
+            assignment_settings = None
+            for file in resource_map_item.get("files", []):
+                if file.endswith("assignment_settings.xml"):
+                    assignment_settings = file
+            if item_link and item_link.endswith(".html"):
+                file_path = resource_map_item["href"]
+                html_content = course_archive.read(file_path)
+                if assignment_settings:
+                    xml_content = course_archive.read(assignment_settings)
+                    workflow_state = _workflow_state_from_xml(xml_content)
+                else:
+                    workflow_state = _workflow_state_from_html(html_content)
+                title = _title_from_html(html_content)
+
+                lom_elem = (
+                    resource_map_item.get("metadata", {})
+                    .get("lom", {})
+                    .get("educational", {})
+                )
+                # Determine if the content is intended for authors or instructors only
+                intended_role = lom_elem.get("intendedEndUserRole", {}).get("value")
+                authors_only = intended_role and intended_role.lower() != "student"
+
+                if workflow_state in ["active", "published"] and not authors_only:
+                    publish_status["active"].append(
+                        {
+                            "title": title,
+                            "path": file_path,
+                        }
+                    )
+                else:
+                    publish_status["unpublished"].append(
+                        {
+                            "title": title,
+                            "path": file_path,
+                        }
+                    )
+    return publish_status
+
+
 def extract_resources_by_identifierref(manifest_xml: str) -> dict:
     """
     Extract resources from an IMS manifest file and
@@ -426,29 +581,24 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict:
     """
     root = ElementTree.fromstring(manifest_xml)
 
-    namespaces = {
-        "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1",
-        "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource",
-        "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest",
-    }
     # Dictionary to hold resources keyed by identifierref
     resources_dict = defaultdict(list)
     # Find all item elements with identifierref attributes
-    for item in root.findall(".//imscp:item[@identifierref]", namespaces):
+    for item in root.findall(".//imscp:item[@identifierref]", NAMESPACES):
         identifierref = item.get("identifierref")
         title = (
-            item.find("imscp:title", namespaces).text
-            if item.find("imscp:title", namespaces) is not None
+            item.find("imscp:title", NAMESPACES).text
+            if item.find("imscp:title", NAMESPACES) is not None
             else "No Title"
         )
         resource = root.find(
-            f'.//imscp:resource[@identifier="{identifierref}"]', namespaces
+            f'.//imscp:resource[@identifier="{identifierref}"]', NAMESPACES
         )
         if resource is not None:
             # Get all file elements within the resource
             files = [
                 file_elem.get("href")
-                for file_elem in resource.findall("imscp:file", namespaces)
+                for file_elem in resource.findall("imscp:file", NAMESPACES)
             ]
 
             resources_dict[identifierref].append(
@@ -463,17 +613,9 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict:
     file and return a map keyed by identifier.
     """
     root = ElementTree.fromstring(manifest_xml)
-
-    namespaces = {
-        "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1",
-        "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource",
-        "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest",
-    }
-
     resources_dict = {}
-
     # Find all resource elements
-    for resource in root.findall(".//imscp:resource[@identifier]", namespaces):
+    for resource in root.findall(".//imscp:resource[@identifier]", NAMESPACES):
         identifier = resource.get("identifier")
         resource_type = resource.get("type")
         href = resource.get("href")
@@ -481,16 +623,13 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict:
         # Get all file elements within the resource
         files = [
             file_elem.get("href")
-            for file_elem in resource.findall("imscp:file", namespaces)
+            for file_elem in resource.findall("imscp:file", NAMESPACES)
         ]
-
         # Extract metadata if present
         metadata = {}
-        metadata_elem = resource.find("imscp:metadata", namespaces)
+        metadata_elem = resource.find("imscp:metadata", NAMESPACES)
         if metadata_elem is not None:
-            # You can expand this to extract specific metadata fields as needed
-            metadata["has_metadata"] = True
-
+            metadata.update(_compact_element(metadata_elem))
         resources_dict[identifier] = {
             "identifier": identifier,
             "type": resource_type,
@@ -498,7 +637,6 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict:
             "files": files,
             "metadata": metadata,
         }
-
     return resources_dict
 
 
@@ -538,6 +676,9 @@ def pdf_to_base64_images(pdf_path, fmt="JPEG", max_size=2000, quality=85):
 
 
 def _pdf_to_markdown(pdf_path):
+    """
+    Convert a PDF file to markdown using an llm
+    """
     markdown = ""
     for im in pdf_to_base64_images(pdf_path):
         response = completion(
diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py
index 0f9928e2ae..5f9e5536e8 100644
--- a/learning_resources/etl/canvas_test.py
+++ b/learning_resources/etl/canvas_test.py
@@ -6,12 +6,16 @@
 from unittest.mock import MagicMock
 
 import pytest
+from defusedxml import ElementTree
 
 from learning_resources.constants import LearningResourceType, PlatformType
 from learning_resources.etl.canvas import (
+    _compact_element,
     is_file_published,
     parse_canvas_settings,
+    parse_files_meta,
     parse_module_meta,
+    parse_web_content,
     run_for_canvas_archive,
     transform_canvas_content_files,
     transform_canvas_problem_files,
@@ -591,7 +595,7 @@ def test_is_file_published(file_meta, expected):
 
 def test_published_module_and_files_meta_content_ingestion(mocker, tmp_path):
     """
-    Test published files from files_meta and module_meta are retained,
+    Test published files from files_meta and module_meta are retained
     """
 
     module_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
@@ -692,3 +696,213 @@ def test_published_module_and_files_meta_content_ingestion(mocker, tmp_path):
     assert "/file1.html" in result_paths
     assert "/file3.html" in result_paths
     assert bulk_unpub.mock_calls[0].args[0] == [stale_contentfile.id]
+
+
+@pytest.mark.parametrize(
+    ("element", "expected"),
+    [
+        # Test case: Element with no children, only text
+        (
+            ElementTree.fromstring("""<tag>Sample Text</tag>"""),
+            "Sample Text",
+        ),
+        # Test case: Element with children, nested structure
+        (
+            ElementTree.fromstring(
+                """<parent>
+                <child1>Child 1 Text</child1>
+                <child2>
+                    <subchild>Subchild Text</subchild>
+                </child2>
+            </parent>
+        """
+            ),
+            {
+                "child1": "Child 1 Text",
+                "child2": {"subchild": "Subchild Text"},
+            },
+        ),
+        # Test case: Element with mixed text and children
+        (
+            ElementTree.fromstring(
+                """<mixed>Parent Text<child>Child Text</child></mixed>"""
+            ),
+            {
+                "child": "Child Text",
+            },
+        ),
+    ],
+)
+def test_compact_element(element, expected):
+    """
+    Test _compact_element function with nested tags.
+    """
+    result = _compact_element(element)
+    assert result == expected
+
+
+def test_parse_web_content_returns_active_and_unpublished(tmp_path):
+    """
+    Test that parse_web_content returns active and unpublished items
+    """
+    manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
+      <resources>
+        <resource identifier="RES1" type="webcontent" href="file1.html">
+          <file href="file1.html"/>
+        </resource>
+        <resource identifier="RES2" type="webcontent" href="file2.html">
+          <file href="file2.html"/>
+        </resource>
+      </resources>
+    </manifest>
+    """
+    html_content_active = b"""<html>
+    <head><title>Active Content</title><meta name="workflow_state" content="active"></head>
+    <body>Content</body>
+    </html>
+    """
+    html_content_unpublished = b"""<html>
+    <head><title>Unpublished Content</title><meta name="workflow_state" content="unpublished"></head>
+    <body>Content</body>
+    </html>
+    """
+    zip_path = tmp_path / "canvas_course.zip"
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        zf.writestr("imsmanifest.xml", manifest_xml)
+        zf.writestr("file1.html", html_content_active)
+        zf.writestr("file2.html", html_content_unpublished)
+
+    result = parse_web_content(zip_path)
+    assert "active" in result
+    assert "unpublished" in result
+
+    assert any(
+        item["title"] == "Active Content" and item["path"] == "file1.html"
+        for item in result["active"]
+    )
+    assert any(
+        item["title"] == "Unpublished Content" and item["path"] == "file2.html"
+        for item in result["unpublished"]
+    )
+
+
+def test_parse_web_content_handles_missing_workflow_state(tmp_path):
+    """
+    Test that parse_web_content handles missing workflow_state gracefully
+    """
+    manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
+      <resources>
+        <resource identifier="RES1" type="webcontent" href="file1.html">
+          <file href="file1.html"/>
+        </resource>
+      </resources>
+    </manifest>
+    """
+    html_content = b"""<html>
+    <head><title>No Workflow State</title></head>
+    <body>Content</body>
+    </html>
+    """
+    zip_path = tmp_path / "canvas_course.zip"
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        zf.writestr("imsmanifest.xml", manifest_xml)
+        zf.writestr("file1.html", html_content)
+
+    result = parse_web_content(zip_path)
+    assert "active" in result
+    assert "unpublished" in result
+    assert len(result["active"]) == 0
+    assert len(result["unpublished"]) == 1
+    assert result["unpublished"][0]["title"] == "No Workflow State"
+    assert result["unpublished"][0]["path"] == "file1.html"
+
+
+def test_parse_web_content_excludes_instructor_only_content(tmp_path):
+    """
+    Test that parse_web_content excludes content intended for Instructors only
+    """
+    manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
+      <resources>
+        <resource identifier="RES1" type="webcontent" href="file1.html">
+          <file href="file1.html"/>
+          <metadata>
+            <lom>
+              <educational>
+                <intendedEndUserRole>
+                  <value>Instructor</value>
+                </intendedEndUserRole>
+              </educational>
+            </lom>
+          </metadata>
+        </resource>
+      </resources>
+    </manifest>
+    """
+    html_content = b"""<html>
+    <head><title>Instructor Only Content</title><meta name="workflow_state" content="active"></head>
+    <body>Content</body>
+    </html>
+    """
+    zip_path = tmp_path / "canvas_course.zip"
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        zf.writestr("imsmanifest.xml", manifest_xml)
+        zf.writestr("file1.html", html_content)
+
+    result = parse_web_content(zip_path)
+    assert "active" in result
+    assert "unpublished" in result
+    assert len(result["active"]) == 0
+    assert len(result["unpublished"]) == 1
+    assert result["unpublished"][0]["title"] == "Instructor Only Content"
+    assert result["unpublished"][0]["path"] == "file1.html"
+
+
+def test_parse_files_meta_excludes_tutorbot_folder(tmp_path, settings):
+    """
+    Test that parse_files_meta explicitly excludes files in the tutorbot folder
+    even if they are marked as published in the files_meta.xml.
+    """
+    settings.CANVAS_TUTORBOT_FOLDER = "web_resources/ai/tutor/"
+    files_meta_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <files xmlns="http://canvas.instructure.com/xsd/cccv1p0">
+      <file identifier="file1">
+        <display_name>Published File</display_name>
+        <hidden>false</hidden>
+        <locked>false</locked>
+        <path>web_resources/ai/tutor/tutorfile.html</path>
+      </file>
+      <file identifier="file2">
+        <display_name>Regular File</display_name>
+        <hidden>false</hidden>
+        <locked>false</locked>
+        <path>regular_folder/file2.html</path>
+      </file>
+    </files>
+    """
+    manifest_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <manifest xmlns="http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1">
+      <resources>
+        <resource identifier="file1" type="webcontent">
+          <file href="web_resources/ai/tutor/tutorfile.html"/>
+        </resource>
+        <resource identifier="file2" type="webcontent">
+          <file href="regular_folder/file2.html"/>
+        </resource>
+      </resources>
+    </manifest>
+    """
+    zip_path = tmp_path / "canvas_course.zip"
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        zf.writestr("course_settings/files_meta.xml", files_meta_xml)
+        zf.writestr("imsmanifest.xml", manifest_xml)
+
+    result = parse_files_meta(zip_path)
+
+    # Ensure the file in the tutorbot folder is excluded
+    assert len(result["active"]) == 1
+    assert result["active"][0]["path"].name == "file2.html"
+    assert len(result["unpublished"]) == 1
+    assert result["unpublished"][0]["path"].name == "tutorfile.html"

From c896a339ac00b3514e97c3e975ac3946d287fad5 Mon Sep 17 00:00:00 2001
From: Doof <mitx-devops@mit.edu>
Date: Wed, 10 Sep 2025 13:45:18 +0000
Subject: [PATCH 10/10] Release 0.42.3

---
 RELEASE.rst      | 13 +++++++++++++
 main/settings.py |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 5de3bd8312..389c1bfe6c 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -1,6 +1,19 @@
 Release Notes
 =============
 
+Version 0.42.3
+--------------
+
+- ingest canvas html content (#2502)
+- Fix userlist stale data (#2501)
+- redirect org users to org dashboard (#2482)
+- Set page size to 30 for the course query for programs in the org dash (#2494)
+- Update dependency Django to v4.2.24 (#2496)
+- adding fix for flaky test (#2491)
+- Toggle NextJS image optimization with an env var (#2385)
+- Stop running Sloan news/event tasks (#2493)
+- Update actions/checkout digest to 08eba0b (#2454)
+
 Version 0.42.2 (Released September 08, 2025)
 --------------
 
diff --git a/main/settings.py b/main/settings.py
index d9e0e5acc7..3db1565ee2 100644
--- a/main/settings.py
+++ b/main/settings.py
@@ -34,7 +34,7 @@
 from main.settings_pluggy import *  # noqa: F403
 from openapi.settings_spectacular import open_spectacular_settings
 
-VERSION = "0.42.2"
+VERSION = "0.42.3"
 
 log = logging.getLogger()