From b0509eba77b6dbc7582760c8882b7016f1abb719 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 5 Sep 2025 12:38:33 -0400 Subject: [PATCH 01/10] Update actions/checkout digest to 08eba0b (#2454) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 12 ++++++------ .github/workflows/openapi-diff.yml | 4 ++-- .github/workflows/publish-pages.yml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 121cffd28f..4340078b60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - 6379:6379 steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - name: update apt run: sudo apt-get update -y @@ -92,7 +92,7 @@ jobs: javascript-tests: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: "^22.0.0" @@ -151,7 +151,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - name: Build the Docker image env: @@ -199,7 +199,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: @@ -223,7 +223,7 @@ jobs: GENERATOR_OUTPUT_DIR_VC: ./frontends/api/src/generated/v0 runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: "^22.0.0" @@ -262,7 +262,7 @@ jobs: GENERATOR_OUTPUT_DIR_VC: ./frontends/api/src/generated/v1 runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: "^22.0.0" diff --git a/.github/workflows/openapi-diff.yml b/.github/workflows/openapi-diff.yml index 7611fe521b..2e963acf57 100644 --- a/.github/workflows/openapi-diff.yml +++ b/.github/workflows/openapi-diff.yml @@ -5,12 +5,12 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout HEAD - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 with: ref: ${{ github.head_ref }} path: head - name: Checkout BASE - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 with: ref: ${{ github.base_ref }} path: base diff --git a/.github/workflows/publish-pages.yml b/.github/workflows/publish-pages.yml index 8be9f1897d..90271f976d 100644 --- a/.github/workflows/publish-pages.yml +++ b/.github/workflows/publish-pages.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: From ec1b8707def5e0a72a72b0c60ecd8ffad24f8582 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 5 Sep 2025 14:57:01 -0400 Subject: [PATCH 02/10] Stop running Sloan news/event tasks (#2493) --- main/settings_celery.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/main/settings_celery.py b/main/settings_celery.py index 41995aa665..eb6c0e265f 100644 --- a/main/settings_celery.py +++ b/main/settings_celery.py @@ -108,18 +108,6 @@ "NEWS_EVENTS_MITPE_NEWS_SCHEDULE_SECONDS", 60 * 60 * 3 ), # default is every 3 hours }, - "update_sloan_news": { - "task": "news_events.tasks.get_sloan_exec_news", - "schedule": get_int( - "NEWS_EVENTS_SLOAN_EXEC_NEWS_SCHEDULE_SECONDS", 60 * 60 * 3 - ), # default is every 3 hours - }, - "update_sloan_webinars": { - "task": "news_events.tasks.get_sloan_exec_webinars", - "schedule": get_int( - "NEWS_EVENTS_SLOAN_EXEC_WEBINAR_SCHEDULE_SECONDS", 60 * 60 * 12 - ), # default is every 12 hours - }, "update_sloan_courses": { "task": "learning_resources.tasks.get_sloan_data", "schedule": crontab(minute=30, hour=4), # 12:30am EST From 0a746ae810b9a099784ebcdbb085017d794a3730 Mon Sep 17 00:00:00 2001 From: Chris Chudzicki Date: Fri, 5 Sep 2025 15:31:25 -0400 Subject: [PATCH 03/10] Toggle NextJS image optimization with an env var (#2385) * turn off image optimization * add flag to toggle image optimization --- env/frontend.env | 1 + frontends/main/next.config.js | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/env/frontend.env b/env/frontend.env index 0434f13695..0225b9a761 100644 --- a/env/frontend.env +++ b/env/frontend.env @@ -1,6 +1,7 @@ NODE_ENV=development PORT=8062 SENTRY_ENV=dev # Re-enable sentry +OPTIMIZE_IMAGES="true" # Environment variables with `NEXT_PUBLIC_` prefix are exposed to the client side NEXT_PUBLIC_ORIGIN=${MITOL_APP_BASE_URL} diff --git a/frontends/main/next.config.js b/frontends/main/next.config.js index f2f968dbb7..23a66e4ddb 100644 --- a/frontends/main/next.config.js +++ b/frontends/main/next.config.js @@ -3,6 +3,10 @@ const { validateEnv } = require("./validateEnv") validateEnv() +const OPTIMIZE_IMAGES = Boolean( + (process.env.OPTIMIZE_IMAGES ?? "true") === "true", +) + const processFeatureFlags = () => { const featureFlagPrefix = process.env.NEXT_PUBLIC_POSTHOG_FEATURE_PREFIX || "FEATURE_" @@ -90,6 +94,7 @@ const nextConfig = { transpilePackages: ["@mitodl/smoot-design/ai"], images: { + unoptimized: !OPTIMIZE_IMAGES, remotePatterns: [ { hostname: "**", From 1a8a975d720374f4431da9c48d03d51106ee5964 Mon Sep 17 00:00:00 2001 From: Shankar Ambady Date: Fri, 5 Sep 2025 16:37:49 -0400 Subject: [PATCH 04/10] adding fix for flaky test (#2491) --- learning_resources/tasks_test.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index 152c26590d..26e060e4e3 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -16,6 +16,7 @@ from learning_resources.etl.constants import MARKETING_PAGE_FILE_TYPE, ETLSource from learning_resources.factories import ( LearningResourceFactory, + LearningResourcePlatformFactory, ) from learning_resources.models import LearningResource from learning_resources.tasks import ( @@ -686,8 +687,19 @@ def test_remove_duplicate_resources(mocker, mocked_celery): """ duplicate_id = "duplicate_id" - LearningResourceFactory.create_batch(3, readable_id=duplicate_id, published=False) - LearningResourceFactory.create(readable_id=duplicate_id) + for platform_type in [PlatformType.edx, PlatformType.xpro, PlatformType.youtube]: + LearningResourceFactory.create( + readable_id=duplicate_id, + published=False, + platform=LearningResourcePlatformFactory.create(code=platform_type.name), + ) + + LearningResourceFactory.create( + readable_id=duplicate_id, + platform=LearningResourcePlatformFactory.create( + code=platform_type.mitxonline.name + ), + ) assert LearningResource.objects.filter(readable_id=duplicate_id).count() == 4 with pytest.raises(mocked_celery.replace_exception_class): remove_duplicate_resources() From b8b30cccc32084ad1e6b3e6f6ca688062e0e1b9a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:41:14 -0400 Subject: [PATCH 05/10] Update dependency Django to v4.2.24 (#2496) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index d535c8d88d..115dc17662 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1431,14 +1431,14 @@ static3 = "*" [[package]] name = "django" -version = "4.2.23" +version = "4.2.24" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "django-4.2.23-py3-none-any.whl", hash = "sha256:dafbfaf52c2f289bd65f4ab935791cb4fb9a198f2a5ba9faf35d7338a77e9803"}, - {file = "django-4.2.23.tar.gz", hash = "sha256:42fdeaba6e6449d88d4f66de47871015097dc6f1b87910db00a91946295cfae4"}, + {file = "django-4.2.24-py3-none-any.whl", hash = "sha256:a6527112c58821a0dfc5ab73013f0bdd906539790a17196658e36e66af43c350"}, + {file = "django-4.2.24.tar.gz", hash = "sha256:40cd7d3f53bc6cd1902eadce23c337e97200888df41e4a73b42d682f23e71d80"}, ] [package.dependencies] @@ -9063,4 +9063,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "aaf3bc99e70404eca795dc3c392b8730509d674637e69836e51ac25aaca0b5ce" +content-hash = "9b822fba05eb04db225295485b3c412f1dc240bd8a34270ff24bf0980026098b" diff --git a/pyproject.toml b/pyproject.toml index ac8873b96c..e5314fbebe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ requires-poetry = ">2.1,<3" [tool.poetry.dependencies] python = "~3.12" -Django = "4.2.23" +Django = "4.2.24" attrs = "^25.0.0" base36 = "^0.1.1" beautifulsoup4 = "^4.8.2" From 084e253b2839a6c3d407ad444ed85ab0de2a3737 Mon Sep 17 00:00:00 2001 From: James Kachel Date: Mon, 8 Sep 2025 11:01:54 -0500 Subject: [PATCH 06/10] Set page size to 30 for the course query for programs in the org dash (#2494) --- .../DashboardPage/CoursewareDisplay/test-utils.ts | 13 +++++++++++-- .../app-pages/DashboardPage/OrganizationContent.tsx | 6 +++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts index ae3ca0c42a..e97fe24e60 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/test-utils.ts @@ -154,13 +154,21 @@ const setupProgramsAndCourses = () => { { results: [programB] }, ) setMockResponse.get( - urls.courses.coursesList({ id: programA.courses, org_id: orgX.id }), + urls.courses.coursesList({ + id: programA.courses, + org_id: orgX.id, + page_size: 30, + }), { results: coursesA.results, }, ) setMockResponse.get( - urls.courses.coursesList({ id: programB.courses, org_id: orgX.id }), + urls.courses.coursesList({ + id: programB.courses, + org_id: orgX.id, + page_size: 30, + }), { results: coursesB.results, }, @@ -262,6 +270,7 @@ function setupOrgDashboardMocks( mitxonline.urls.courses.coursesList({ id: program.courses, org_id: org.id, + page_size: 30, }), { results: courses }, ) diff --git a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx index 27b91f0861..f3dacd8356 100644 --- a/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx +++ b/frontends/main/src/app-pages/DashboardPage/OrganizationContent.tsx @@ -250,7 +250,11 @@ const OrgProgramDisplay: React.FC<{ ) const hasValidCertificate = !!programEnrollment?.certificate const courses = useQuery( - coursesQueries.coursesList({ id: program.courseIds, org_id: orgId }), + coursesQueries.coursesList({ + id: program.courseIds, + org_id: orgId, + page_size: 30, + }), ) const skeleton = ( From 61c92bc93acc711b35ec6c6f94ef1deb702f876e Mon Sep 17 00:00:00 2001 From: Carey P Gumaer Date: Mon, 8 Sep 2025 13:50:24 -0400 Subject: [PATCH 07/10] redirect org users to org dashboard (#2482) * redirect user to org dashboard on login based on the first org they are determined to be a part of, if they are a part of one * fix test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * only redirect to org dashboard on first login, and always skip onboarding for org users * redirect using APP_BASE_URL * remove org keycloak scope from default env and add a note about enabling it * fix tests, use settings fixture in authentication views tests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- authentication/views.py | 41 +++++++++++++++--- authentication/views_test.py | 79 +++++++++++++++++++++++++++++++---- env/backend.env | 1 + env/backend.local.example.env | 1 + main/settings.py | 1 + 5 files changed, 111 insertions(+), 12 deletions(-) diff --git a/authentication/views.py b/authentication/views.py index 992988186a..6ab3592a3c 100644 --- a/authentication/views.py +++ b/authentication/views.py @@ -1,14 +1,16 @@ """Authentication views""" import logging +from urllib.parse import urljoin from django.contrib.auth import logout from django.shortcuts import redirect from django.utils.http import url_has_allowed_host_and_scheme, urlencode +from django.utils.text import slugify from django.views import View from main import settings -from main.middleware.apisix_user import ApisixUserMiddleware +from main.middleware.apisix_user import ApisixUserMiddleware, decode_apisix_headers log = logging.getLogger(__name__) @@ -64,6 +66,8 @@ class CustomLoginView(View): Redirect the user to the appropriate url after login """ + header = "HTTP_X_USERINFO" + def get( self, request, @@ -76,10 +80,32 @@ def get( redirect_url = get_redirect_url(request) if not request.user.is_anonymous: profile = request.user.profile - if not profile.has_logged_in: - profile.has_logged_in = True - profile.save() - if ( + + apisix_header = decode_apisix_headers(request, self.header) + + # Check if user belongs to any organizations + user_organizations = ( + apisix_header.get("organizations", {}) if apisix_header else {} + ) + + if user_organizations: + # First-time login for org user: redirect to org dashboard + if not profile.has_logged_in: + first_org_name = next(iter(user_organizations.keys())) + org_slug = slugify(first_org_name) + + log.info( + "User %s belongs to organization: %s (slug: %s)", + request.user.email, + first_org_name, + org_slug, + ) + + redirect_url = urljoin( + settings.APP_BASE_URL, f"/dashboard/organization/{org_slug}" + ) + # Non-organization users: apply onboarding logic + elif ( not profile.completed_onboarding and request.GET.get("skip_onboarding", "0") == "0" ): @@ -87,4 +113,9 @@ def get( redirect_url = f"{settings.MITOL_NEW_USER_LOGIN_URL}?{params}" profile.completed_onboarding = True profile.save() + + if not profile.has_logged_in: + profile.has_logged_in = True + profile.save() + return redirect(redirect_url) diff --git a/authentication/views_test.py b/authentication/views_test.py index 637af019ba..5f8da73272 100644 --- a/authentication/views_test.py +++ b/authentication/views_test.py @@ -3,9 +3,9 @@ import json from base64 import b64encode from unittest.mock import MagicMock +from urllib.parse import urljoin import pytest -from django.conf import settings from django.test import RequestFactory from django.urls import reverse @@ -28,10 +28,18 @@ def test_custom_login(mocker, next_url, allowed): assert get_redirect_url(mock_request) == (next_url if allowed else "/app") -@pytest.mark.parametrize("has_apisix_header", [True, False]) -@pytest.mark.parametrize("next_url", ["/search", None]) -def test_logout(mocker, next_url, client, user, has_apisix_header): +@pytest.mark.parametrize( + "test_params", + [ + (True, "/search"), + (True, None), + (False, "/search"), + (False, None), + ], +) +def test_logout(mocker, client, user, test_params, settings): """User should be properly redirected and logged out""" + has_apisix_header, next_url = test_params header_str = b64encode( json.dumps( { @@ -55,10 +63,10 @@ def test_logout(mocker, next_url, client, user, has_apisix_header): mock_logout.assert_called_once() -@pytest.mark.parametrize("is_authenticated", [True]) -@pytest.mark.parametrize("has_next", [False]) -def test_next_logout(mocker, client, user, is_authenticated, has_next): +@pytest.mark.parametrize("test_params", [(True, False)]) +def test_next_logout(mocker, client, user, test_params, settings): """Test logout redirect cache assignment""" + is_authenticated, has_next = test_params next_url = "https://ocw.mit.edu" mock_request = mocker.MagicMock( GET={"next": next_url if has_next else None}, @@ -211,3 +219,60 @@ def test_custom_login_view_first_time_login_sets_has_logged_in(mocker): # Verify redirect was called with the correct URL mock_redirect.assert_called_once_with("/dashboard") + + +@pytest.mark.parametrize( + "test_case", + [ + ( + (False, False), + "/dashboard/organization/test-organization", + ), # First-time login → org dashboard + ( + (False, True), + "/dashboard/organization/test-organization", + ), # First-time login → org dashboard + ((True, False), "/app"), # Subsequent login → normal app (not onboarding!) + ((True, True), "/app"), # Subsequent login → normal app + ], +) +def test_login_org_user_redirect(mocker, client, user, test_case, settings): + """Test organization user redirect behavior - org users skip onboarding regardless of onboarding status""" + # Unpack test case + profile_state, expected_url = test_case + has_logged_in, completed_onboarding = profile_state + + # Set up user profile based on test scenario + user.profile.has_logged_in = has_logged_in + user.profile.completed_onboarding = completed_onboarding + user.profile.save() + + header_str = b64encode( + json.dumps( + { + "preferred_username": user.username, + "email": user.email, + "sub": user.global_id, + "organization": { + "Test Organization": { + "role": "member", + "id": "org-123", + } + }, + } + ).encode() + ) + client.force_login(user) + response = client.get( + "/login/", + follow=False, + HTTP_X_USERINFO=header_str, + ) + assert response.status_code == 302 + # Handle environment differences - in some envs it returns full URL, in others just path + expected_full_url = urljoin(settings.APP_BASE_URL, expected_url) + assert response.url in [expected_url, expected_full_url] + + # Verify that org users are never sent to onboarding + # (onboarding URL would contain settings.MITOL_NEW_USER_LOGIN_URL) + assert settings.MITOL_NEW_USER_LOGIN_URL not in response.url diff --git a/env/backend.env b/env/backend.env index acb1a8eecd..ce267160e2 100644 --- a/env/backend.env +++ b/env/backend.env @@ -48,6 +48,7 @@ KEYCLOAK_CLIENT_ID=apisix KEYCLOAK_CLIENT_SECRET=HckCZXToXfaetbBx0Fo3xbjnC468oMi4 # pragma: allowlist-secret KEYCLOAK_DISCOVERY_URL=http://kc.ol.local:8066/realms/ol-local/.well-known/openid-configuration KEYCLOAK_REALM_NAME=ol-local +# For some organization related functionality, add organization:* if you have orgs enabled KEYCLOAK_SCOPES="openid profile ol-profile" KEYCLOAK_SVC_KEYSTORE_PASSWORD=supertopsecret1234 KEYCLOAK_SVC_HOSTNAME=kc.ol.local diff --git a/env/backend.local.example.env b/env/backend.local.example.env index ef9edf1fba..89ad1cf4e3 100644 --- a/env/backend.local.example.env +++ b/env/backend.local.example.env @@ -31,6 +31,7 @@ KEYCLOAK_CLIENT_ID=apisix KEYCLOAK_CLIENT_SECRET=HckCZXToXfaetbBx0Fo3xbjnC468oMi4 # pragma: allowlist-secret KEYCLOAK_DISCOVERY_URL=http://kc.ol.local:8066/realms/ol-local/.well-known/openid-configuration KEYCLOAK_REALM_NAME=ol-local +# For some organization related functionality, add organization:* if you have orgs enabled KEYCLOAK_SCOPES="openid profile ol-profile" KEYCLOAK_SVC_KEYSTORE_PASSWORD=supertopsecret1234 KEYCLOAK_SVC_HOSTNAME=kc.ol.local diff --git a/main/settings.py b/main/settings.py index 71f5793700..d9e0e5acc7 100644 --- a/main/settings.py +++ b/main/settings.py @@ -356,6 +356,7 @@ "first_name": "given_name", "last_name": "family_name", "name": "name", + "organizations": "organization", }, "profiles.Profile": { "name": "name", From b26d34f042cf9cf32e5b60a2acc46e1c886f4488 Mon Sep 17 00:00:00 2001 From: Chris Chudzicki Date: Tue, 9 Sep 2025 16:24:02 -0400 Subject: [PATCH 08/10] Fix userlist stale data (#2501) * fix type mismatch * oops * invalidate more things --- .../api/src/hooks/learningResources/index.ts | 24 +++++++++++++++---- .../src/app/dashboard/my-lists/[id]/page.tsx | 11 +++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/frontends/api/src/hooks/learningResources/index.ts b/frontends/api/src/hooks/learningResources/index.ts index b8dad9135a..34774d47c6 100644 --- a/frontends/api/src/hooks/learningResources/index.ts +++ b/frontends/api/src/hooks/learningResources/index.ts @@ -97,7 +97,16 @@ const useLearningResourceSetUserListRelationships = () => { params: LearningResourcesApiLearningResourcesUserlistsPartialUpdateRequest, ) => learningResourcesApi.learningResourcesUserlistsPartialUpdate(params), onSettled: () => { - queryClient.invalidateQueries({ queryKey: userlistKeys.membershipList() }) + /** + * We need to invalidate: + * - membership check + * - list of user lists (count has changed) + * - userlist detail annd listing for any lists we modified + * + * That's a lot. Let's just invalidate root. + * Additionally, the lists we've removed from the resource are not easily available. + */ + queryClient.invalidateQueries({ queryKey: userlistKeys.root }) }, }) } @@ -110,9 +119,16 @@ const useLearningResourceSetLearningPathRelationships = () => { ) => learningResourcesApi.learningResourcesLearningPathsPartialUpdate(params), onSettled: () => { - queryClient.invalidateQueries({ - queryKey: learningPathKeys.membershipList(), - }) + /** + * We need to invalidate: + * - membership check + * - list of user lists (count has changed) + * - userlist detail annd listing for any lists we modified + * + * That's a lot. Let's just invalidate root. + * Additionally, the lists we've removed from the resource are not easily available. + */ + queryClient.invalidateQueries({ queryKey: learningPathKeys.root }) }, }) } diff --git a/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx b/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx index ce9897ceee..4567085512 100644 --- a/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx +++ b/frontends/main/src/app/dashboard/my-lists/[id]/page.tsx @@ -1,14 +1,15 @@ import React from "react" import { UserListDetailsContent } from "@/app-pages/DashboardPage/UserListDetailsContent" -import { PageParams } from "@/app/types" import invariant from "tiny-invariant" +import { PageParams } from "@/app/types" -const Page: React.FC> = async ({ +const Page: React.FC> = async ({ params, }) => { - const resolved = await params - invariant(resolved?.id, "id is required") - return + const resolved = await params! + const id = Number(resolved.id) + invariant(id, "id is required") + return } export default Page From d281eadfc5a4848fd1f0a8f74f2daa32ce1925fc Mon Sep 17 00:00:00 2001 From: Shankar Ambady Date: Wed, 10 Sep 2025 09:43:40 -0400 Subject: [PATCH 09/10] ingest canvas html content (#2502) * initial working version of gathering assignments and pages * test fix * check metadata for visibility to students vs authors * type hints * remove typehint * consolidation of namespaces * explicitely exclude tutor folder no matter what and added tests * fix test * more conditionals for assignment workflow state --- learning_resources/etl/canvas.py | 275 +++++++++++++++++++------- learning_resources/etl/canvas_test.py | 216 +++++++++++++++++++- 2 files changed, 423 insertions(+), 68 deletions(-) diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py index a6646042d8..f9fa6c7aff 100644 --- a/learning_resources/etl/canvas.py +++ b/learning_resources/etl/canvas.py @@ -12,6 +12,7 @@ from urllib.parse import unquote_plus import pypdfium2 as pdfium +from bs4 import BeautifulSoup from defusedxml import ElementTree from django.conf import settings from litellm import completion @@ -42,6 +43,14 @@ log = logging.getLogger(__name__) +NAMESPACES = { + "cccv1p0": "http://canvas.instructure.com/xsd/cccv1p0", + "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1", + "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource", + "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest", +} + + def sync_canvas_archive(bucket, key: str, overwrite): """ Sync a Canvas course archive from S3 @@ -180,13 +189,20 @@ def transform_canvas_content_files( basedir = course_zipfile.name.split(".")[0] zipfile_path = course_zipfile.absolute() # grab published module and file items - published_items = [ - Path(item["path"]).resolve() - for item in parse_module_meta(zipfile_path)["active"] - ] + [ - Path(item["path"]).resolve() - for item in parse_files_meta(zipfile_path)["active"] - ] + published_items = ( + [ + Path(item["path"]).resolve() + for item in parse_module_meta(zipfile_path)["active"] + ] + + [ + Path(item["path"]).resolve() + for item in parse_files_meta(zipfile_path)["active"] + ] + + [ + Path(item["path"]).resolve() + for item in parse_web_content(zipfile_path)["active"] + ] + ) def _generate_content(): """Inner generator for yielding content data""" @@ -272,20 +288,51 @@ def transform_canvas_problem_files( def parse_context_xml(course_archive_path: str) -> dict: + """ + Parse course_settings/context.xml and return context info + """ with zipfile.ZipFile(course_archive_path, "r") as course_archive: context = course_archive.read("course_settings/context.xml") root = ElementTree.fromstring(context) - namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"} context_info = {} item_keys = ["course_id", "root_account_id", "canvas_domain", "root_account_name"] for key in item_keys: - element = root.find(f"ns:{key}", namespaces) + element = root.find(f"cccv1p0:{key}", NAMESPACES) if element is not None: context_info[key] = element.text return context_info +def is_date_locked(lock_at: str, unlock_at: str) -> bool: + """ + Determine if a resource is currently date-locked based + on lock_at and unlock_at strings. + Args: + lock_at (str): ISO 8601 date string when the resource locks + unlock_at (str): ISO 8601 date string when the resource unlocks + Returns: + bool: True if the resource is currently locked, False otherwise + """ + now = now_in_utc() + if unlock_at and unlock_at.lower() != "nil": + try: + unlock_dt = datetime.fromisoformat(unlock_at.replace("Z", "+00:00")) + if now < unlock_dt: + return True + except Exception: + log.exception("Error parsing unlock_at date: %s", unlock_at) + + if lock_at and lock_at.lower() != "nil": + try: + lock_dt = datetime.fromisoformat(lock_at.replace("Z", "+00:00")) + if now > lock_dt: + return True + except Exception: + log.exception("Error parsing lock_at date: %s", lock_at) + return False + + def is_file_published(file_meta: dict) -> bool: """ Determine if a Canvas file (from files_meta.xml) is published/visible to students. @@ -295,35 +342,18 @@ def is_file_published(file_meta: dict) -> bool: Returns: bool: True if file is published/visible, False otherwise. """ - now = now_in_utc() hidden = str(file_meta.get("hidden", "false")).lower() == "true" locked = str(file_meta.get("locked", "false")).lower() == "true" - unlock_at = file_meta.get("unlock_at") lock_at = file_meta.get("lock_at") - visibility = file_meta.get("visibility", "inherit") - # If explicitly hidden or locked → unpublished if hidden or locked: return False - if unlock_at and unlock_at.lower() != "nil": - try: - unlock_dt = datetime.fromisoformat(unlock_at.replace("Z", "+00:00")) - if now < unlock_dt: - return False - except Exception: - log.exception("Error parsing date: %s", unlock_at) - - if lock_at and lock_at.lower() != "nil": - try: - lock_dt = datetime.fromisoformat(lock_at.replace("Z", "+00:00")) - if now > lock_dt: - return False - except Exception: - log.exception("Error parsing date: %s", lock_at) + if is_date_locked(lock_at, unlock_at): + return False # Visibility rules if visibility in ("course", "inherit"): return True @@ -344,11 +374,9 @@ def parse_files_meta(course_archive_path: str) -> dict: files_xml = course_archive.read(files_meta_path) manifest_xml = course_archive.read("imsmanifest.xml") resource_map = extract_resources_by_identifier(manifest_xml) - root = ElementTree.fromstring(files_xml) - namespaces = {"c": "http://canvas.instructure.com/xsd/cccv1p0"} try: - for file_elem in root.findall(".//c:file", namespaces): + for file_elem in root.findall(".//cccv1p0:file", NAMESPACES): meta = dict(file_elem.attrib) for child in file_elem: tag = child.tag @@ -367,7 +395,10 @@ def parse_files_meta(course_archive_path: str) -> dict: file_path = Path(file) file_data["path"] = file_path file_data["title"] = file_data.get("display_name") - if file_data["published"]: + # explicitly exclude files in web_resources/ai/tutor + if file_data["published"] and not file.startswith( + settings.CANVAS_TUTORBOT_FOLDER + ): publish_status["active"].append(file_data) else: publish_status["unpublished"].append(file_data) @@ -387,19 +418,18 @@ def parse_module_meta(course_archive_path: str) -> dict: resource_map = extract_resources_by_identifierref(manifest_xml) publish_status = {"active": [], "unpublished": []} try: - namespaces = {"ns": "http://canvas.instructure.com/xsd/cccv1p0"} root = ElementTree.fromstring(module_xml) - for module in root.findall(".//ns:module", namespaces): - module_title = module.find("ns:title", namespaces).text - for item in module.findall("ns:items/ns:item", namespaces): - item_state = item.find("ns:workflow_state", namespaces).text - item_title = item.find("ns:title", namespaces).text + for module in root.findall(".//cccv1p0:module", NAMESPACES): + module_title = module.find("cccv1p0:title", NAMESPACES).text + for item in module.findall("cccv1p0:items/cccv1p0:item", NAMESPACES): + item_state = item.find("cccv1p0:workflow_state", NAMESPACES).text + item_title = item.find("cccv1p0:title", NAMESPACES).text identifierref = ( - item.find("ns:identifierref", namespaces).text - if item.find("ns:identifierref", namespaces) is not None + item.find("cccv1p0:identifierref", NAMESPACES).text + if item.find("cccv1p0:identifierref", NAMESPACES) is not None else None ) - content_type = item.find("ns:content_type", namespaces).text + content_type = item.find("cccv1p0:content_type", NAMESPACES).text items = resource_map.get(identifierref, {}) for item_info in items: for file in item_info.get("files", []): @@ -419,6 +449,131 @@ def parse_module_meta(course_archive_path: str) -> dict: return publish_status +def _compact_element(element) -> dict | str | None: + """Recursively compact an element into a nested dictionary""" + if len(element) == 0: # No children, return text + return element.text.strip() if element.text else None + return { + child.tag.split("}")[-1] if "}" in child.tag else child.tag: _compact_element( + child + ) + for child in element + } + + +def _workflow_state_from_html(html: str) -> str: + """ + Extract the workflow_state meta tag from html + """ + soup = BeautifulSoup(html, "html.parser") + meta = soup.find("meta", attrs={"name": "workflow_state"}) + return meta.get("content") if meta else None + + +def _workflow_state_from_xml(xml_string: str) -> bool: + """ + Determine the workflow_state (published/unpublished) from assignment_settings.xml + """ + + def _get_text(tag): + el = root.find(f"cccv1p0:{tag}", NAMESPACES) + return el.text.strip() if el is not None and el.text else "" + + try: + root = ElementTree.fromstring(xml_string) + except Exception: + log.exception("Error parsing XML: %s", sys.stderr) + return "unpublished" + + if ( + ( + # workflow_state must be published + _get_text("workflow_state") != "published" + ) + or ( + # only_visible_to_overrides must not be true + _get_text("only_visible_to_overrides") == "true" + ) + or ( + # hide_in_gradebook must not be true (hidden from gradebook) + _get_text("hide_in_gradebook") == "true" + ) + ): + return "unpublished" + + lock_at = _get_text("lock_at") + unlock_at = _get_text("unlock_at") + if _get_text("module_locked") == "true" or is_date_locked(lock_at, unlock_at): + return "unpublished" + + return "published" + + +def _title_from_html(html: str) -> str: + """ + Extract the title element from HTML content + """ + soup = BeautifulSoup(html, "html.parser") + title = soup.find("title") + return title.get_text().strip() if title else "" + + +def parse_web_content(course_archive_path: str) -> dict: + """ + Parse html pages and assignments and return publish/active status of resources + """ + + publish_status = {"active": [], "unpublished": []} + + with zipfile.ZipFile(course_archive_path, "r") as course_archive: + manifest_path = "imsmanifest.xml" + if manifest_path not in course_archive.namelist(): + return publish_status + manifest_xml = course_archive.read(manifest_path) + resource_map = extract_resources_by_identifier(manifest_xml) + for item in resource_map: + resource_map_item = resource_map[item] + item_link = resource_map_item.get("href") + assignment_settings = None + for file in resource_map_item.get("files", []): + if file.endswith("assignment_settings.xml"): + assignment_settings = file + if item_link and item_link.endswith(".html"): + file_path = resource_map_item["href"] + html_content = course_archive.read(file_path) + if assignment_settings: + xml_content = course_archive.read(assignment_settings) + workflow_state = _workflow_state_from_xml(xml_content) + else: + workflow_state = _workflow_state_from_html(html_content) + title = _title_from_html(html_content) + + lom_elem = ( + resource_map_item.get("metadata", {}) + .get("lom", {}) + .get("educational", {}) + ) + # Determine if the content is intended for authors or instructors only + intended_role = lom_elem.get("intendedEndUserRole", {}).get("value") + authors_only = intended_role and intended_role.lower() != "student" + + if workflow_state in ["active", "published"] and not authors_only: + publish_status["active"].append( + { + "title": title, + "path": file_path, + } + ) + else: + publish_status["unpublished"].append( + { + "title": title, + "path": file_path, + } + ) + return publish_status + + def extract_resources_by_identifierref(manifest_xml: str) -> dict: """ Extract resources from an IMS manifest file and @@ -426,29 +581,24 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict: """ root = ElementTree.fromstring(manifest_xml) - namespaces = { - "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1", - "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource", - "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest", - } # Dictionary to hold resources keyed by identifierref resources_dict = defaultdict(list) # Find all item elements with identifierref attributes - for item in root.findall(".//imscp:item[@identifierref]", namespaces): + for item in root.findall(".//imscp:item[@identifierref]", NAMESPACES): identifierref = item.get("identifierref") title = ( - item.find("imscp:title", namespaces).text - if item.find("imscp:title", namespaces) is not None + item.find("imscp:title", NAMESPACES).text + if item.find("imscp:title", NAMESPACES) is not None else "No Title" ) resource = root.find( - f'.//imscp:resource[@identifier="{identifierref}"]', namespaces + f'.//imscp:resource[@identifier="{identifierref}"]', NAMESPACES ) if resource is not None: # Get all file elements within the resource files = [ file_elem.get("href") - for file_elem in resource.findall("imscp:file", namespaces) + for file_elem in resource.findall("imscp:file", NAMESPACES) ] resources_dict[identifierref].append( @@ -463,17 +613,9 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict: file and return a map keyed by identifier. """ root = ElementTree.fromstring(manifest_xml) - - namespaces = { - "imscp": "http://www.imsglobal.org/xsd/imsccv1p1/imscp_v1p1", - "lom": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/resource", - "lomimscc": "http://ltsc.ieee.org/xsd/imsccv1p1/LOM/manifest", - } - resources_dict = {} - # Find all resource elements - for resource in root.findall(".//imscp:resource[@identifier]", namespaces): + for resource in root.findall(".//imscp:resource[@identifier]", NAMESPACES): identifier = resource.get("identifier") resource_type = resource.get("type") href = resource.get("href") @@ -481,16 +623,13 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict: # Get all file elements within the resource files = [ file_elem.get("href") - for file_elem in resource.findall("imscp:file", namespaces) + for file_elem in resource.findall("imscp:file", NAMESPACES) ] - # Extract metadata if present metadata = {} - metadata_elem = resource.find("imscp:metadata", namespaces) + metadata_elem = resource.find("imscp:metadata", NAMESPACES) if metadata_elem is not None: - # You can expand this to extract specific metadata fields as needed - metadata["has_metadata"] = True - + metadata.update(_compact_element(metadata_elem)) resources_dict[identifier] = { "identifier": identifier, "type": resource_type, @@ -498,7 +637,6 @@ def extract_resources_by_identifier(manifest_xml: str) -> dict: "files": files, "metadata": metadata, } - return resources_dict @@ -538,6 +676,9 @@ def pdf_to_base64_images(pdf_path, fmt="JPEG", max_size=2000, quality=85): def _pdf_to_markdown(pdf_path): + """ + Convert a PDF file to markdown using an llm + """ markdown = "" for im in pdf_to_base64_images(pdf_path): response = completion( diff --git a/learning_resources/etl/canvas_test.py b/learning_resources/etl/canvas_test.py index 0f9928e2ae..5f9e5536e8 100644 --- a/learning_resources/etl/canvas_test.py +++ b/learning_resources/etl/canvas_test.py @@ -6,12 +6,16 @@ from unittest.mock import MagicMock import pytest +from defusedxml import ElementTree from learning_resources.constants import LearningResourceType, PlatformType from learning_resources.etl.canvas import ( + _compact_element, is_file_published, parse_canvas_settings, + parse_files_meta, parse_module_meta, + parse_web_content, run_for_canvas_archive, transform_canvas_content_files, transform_canvas_problem_files, @@ -591,7 +595,7 @@ def test_is_file_published(file_meta, expected): def test_published_module_and_files_meta_content_ingestion(mocker, tmp_path): """ - Test published files from files_meta and module_meta are retained, + Test published files from files_meta and module_meta are retained """ module_xml = b""" @@ -692,3 +696,213 @@ def test_published_module_and_files_meta_content_ingestion(mocker, tmp_path): assert "/file1.html" in result_paths assert "/file3.html" in result_paths assert bulk_unpub.mock_calls[0].args[0] == [stale_contentfile.id] + + +@pytest.mark.parametrize( + ("element", "expected"), + [ + # Test case: Element with no children, only text + ( + ElementTree.fromstring("""Sample Text"""), + "Sample Text", + ), + # Test case: Element with children, nested structure + ( + ElementTree.fromstring( + """ + Child 1 Text + + Subchild Text + + + """ + ), + { + "child1": "Child 1 Text", + "child2": {"subchild": "Subchild Text"}, + }, + ), + # Test case: Element with mixed text and children + ( + ElementTree.fromstring( + """Parent TextChild Text""" + ), + { + "child": "Child Text", + }, + ), + ], +) +def test_compact_element(element, expected): + """ + Test _compact_element function with nested tags. + """ + result = _compact_element(element) + assert result == expected + + +def test_parse_web_content_returns_active_and_unpublished(tmp_path): + """ + Test that parse_web_content returns active and unpublished items + """ + manifest_xml = b""" + + + + + + + + + + + """ + html_content_active = b""" + Active Content + Content + + """ + html_content_unpublished = b""" + Unpublished Content + Content + + """ + zip_path = tmp_path / "canvas_course.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("imsmanifest.xml", manifest_xml) + zf.writestr("file1.html", html_content_active) + zf.writestr("file2.html", html_content_unpublished) + + result = parse_web_content(zip_path) + assert "active" in result + assert "unpublished" in result + + assert any( + item["title"] == "Active Content" and item["path"] == "file1.html" + for item in result["active"] + ) + assert any( + item["title"] == "Unpublished Content" and item["path"] == "file2.html" + for item in result["unpublished"] + ) + + +def test_parse_web_content_handles_missing_workflow_state(tmp_path): + """ + Test that parse_web_content handles missing workflow_state gracefully + """ + manifest_xml = b""" + + + + + + + + """ + html_content = b""" + No Workflow State + Content + + """ + zip_path = tmp_path / "canvas_course.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("imsmanifest.xml", manifest_xml) + zf.writestr("file1.html", html_content) + + result = parse_web_content(zip_path) + assert "active" in result + assert "unpublished" in result + assert len(result["active"]) == 0 + assert len(result["unpublished"]) == 1 + assert result["unpublished"][0]["title"] == "No Workflow State" + assert result["unpublished"][0]["path"] == "file1.html" + + +def test_parse_web_content_excludes_instructor_only_content(tmp_path): + """ + Test that parse_web_content excludes content intended for Instructors only + """ + manifest_xml = b""" + + + + + + + + + Instructor + + + + + + + + """ + html_content = b""" + Instructor Only Content + Content + + """ + zip_path = tmp_path / "canvas_course.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("imsmanifest.xml", manifest_xml) + zf.writestr("file1.html", html_content) + + result = parse_web_content(zip_path) + assert "active" in result + assert "unpublished" in result + assert len(result["active"]) == 0 + assert len(result["unpublished"]) == 1 + assert result["unpublished"][0]["title"] == "Instructor Only Content" + assert result["unpublished"][0]["path"] == "file1.html" + + +def test_parse_files_meta_excludes_tutorbot_folder(tmp_path, settings): + """ + Test that parse_files_meta explicitly excludes files in the tutorbot folder + even if they are marked as published in the files_meta.xml. + """ + settings.CANVAS_TUTORBOT_FOLDER = "web_resources/ai/tutor/" + files_meta_xml = b""" + + + Published File + false + false + web_resources/ai/tutor/tutorfile.html + + + Regular File + false + false + regular_folder/file2.html + + + """ + manifest_xml = b""" + + + + + + + + + + + """ + zip_path = tmp_path / "canvas_course.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("course_settings/files_meta.xml", files_meta_xml) + zf.writestr("imsmanifest.xml", manifest_xml) + + result = parse_files_meta(zip_path) + + # Ensure the file in the tutorbot folder is excluded + assert len(result["active"]) == 1 + assert result["active"][0]["path"].name == "file2.html" + assert len(result["unpublished"]) == 1 + assert result["unpublished"][0]["path"].name == "tutorfile.html" From c896a339ac00b3514e97c3e975ac3946d287fad5 Mon Sep 17 00:00:00 2001 From: Doof Date: Wed, 10 Sep 2025 13:45:18 +0000 Subject: [PATCH 10/10] Release 0.42.3 --- RELEASE.rst | 13 +++++++++++++ main/settings.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index 5de3bd8312..389c1bfe6c 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,19 @@ Release Notes ============= +Version 0.42.3 +-------------- + +- ingest canvas html content (#2502) +- Fix userlist stale data (#2501) +- redirect org users to org dashboard (#2482) +- Set page size to 30 for the course query for programs in the org dash (#2494) +- Update dependency Django to v4.2.24 (#2496) +- adding fix for flaky test (#2491) +- Toggle NextJS image optimization with an env var (#2385) +- Stop running Sloan news/event tasks (#2493) +- Update actions/checkout digest to 08eba0b (#2454) + Version 0.42.2 (Released September 08, 2025) -------------- diff --git a/main/settings.py b/main/settings.py index d9e0e5acc7..3db1565ee2 100644 --- a/main/settings.py +++ b/main/settings.py @@ -34,7 +34,7 @@ from main.settings_pluggy import * # noqa: F403 from openapi.settings_spectacular import open_spectacular_settings -VERSION = "0.42.2" +VERSION = "0.42.3" log = logging.getLogger()