From dde8a75cf4a466bbd54bf7ef68b52330b7bfa3be Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:12:34 -0400 Subject: [PATCH 01/18] removing unused arg --- learning_resources/etl/canvas.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py index 040e86a151..cbb71954e4 100644 --- a/learning_resources/etl/canvas.py +++ b/learning_resources/etl/canvas.py @@ -9,10 +9,10 @@ from pathlib import Path from tempfile import TemporaryDirectory +import pypdfium2 as pdfium from defusedxml import ElementTree from django.conf import settings from litellm import completion -from pdf2image import convert_from_path from PIL import Image from learning_resources.constants import ( @@ -323,7 +323,7 @@ def extract_resources_by_identifierref(manifest_xml: str) -> dict: return dict(resources_dict) -def pdf_to_base64_images(pdf_path, dpi=200, fmt="JPEG", max_size=2000, quality=85): +def pdf_to_base64_images(pdf_path, fmt="JPEG", max_size=2000, quality=85): """ Convert a PDF file to a list of base64 encoded images (one per page). Resizes images to reduce file size while keeping good OCR quality. @@ -338,26 +338,24 @@ def pdf_to_base64_images(pdf_path, dpi=200, fmt="JPEG", max_size=2000, quality=8 Returns: list: List of base64 encoded strings (one per page) """ - images = convert_from_path(pdf_path, dpi=dpi) - base64_images = [] - for image in images: + pdf = pdfium.PdfDocument(pdf_path) + for page_index in range(len(pdf)): + page = pdf.get_page(page_index) + image = page.render(scale=2).to_pil() + page.close() # Resize the image if it's too large (preserving aspect ratio) if max(image.size) > max_size: image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) - buffered = BytesIO() - # Save with optimized settings if fmt.upper() == "JPEG": image.save(buffered, format="JPEG", quality=quality, optimize=True) else: # PNG image.save(buffered, format="PNG", optimize=True) - img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") - base64_images.append(img_str) - - return base64_images + yield img_str + pdf.close() def _pdf_to_markdown(pdf_path): From b45bb856d54ca5b53f25d2a877675c544d7b7271 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:15:54 -0400 Subject: [PATCH 02/18] adding dep --- poetry.lock | 25 ++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 782f978038..391412383a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6389,6 +6389,29 @@ docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] full = ["Pillow (>=8.0.0)", "cryptography"] image = ["Pillow (>=8.0.0)"] +[[package]] +name = "pypdfium2" +version = "4.30.0" +description = "Python bindings to PDFium" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, + {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be"}, + {file = "pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e"}, + {file = "pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c"}, + {file = "pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29"}, + {file = "pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16"}, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -9105,4 +9128,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "63e448c31390942ead05c25627a91c91d5bfec50d2e6460432ef7868c5e0ffe8" +content-hash = "339ddac52de367d3a8b7b96b769e5314b2b70b92f36d4f174d70ebd2fe291341" diff --git a/pyproject.toml b/pyproject.toml index 551a4983fc..5cc46c0a16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ uwsgitop = "^0.12" wrapt = "^1.14.1" youtube-transcript-api = "^1.0.0" pdf2image = "^1.17.0" +pypdfium2 = "^4.30.0" [tool.poetry.group.dev.dependencies] bpython = "^0.25" From 3e0041526b96a3f2e200bf7a768642ef8526d27b Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:25:48 -0400 Subject: [PATCH 03/18] removing old dep --- poetry.lock | 17 +---------------- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/poetry.lock b/poetry.lock index 391412383a..85e9185648 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5512,21 +5512,6 @@ pygments = "*" [package.extras] testing = ["ipython", "pexpect", "pytest", "pytest-cov"] -[[package]] -name = "pdf2image" -version = "1.17.0" -description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"}, - {file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"}, -] - -[package.dependencies] -pillow = "*" - [[package]] name = "pexpect" version = "4.9.0" @@ -9128,4 +9113,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "339ddac52de367d3a8b7b96b769e5314b2b70b92f36d4f174d70ebd2fe291341" +content-hash = "04bc62389781a0c453f7df3965a1b6bc2e728c5ed68377392776108ab0ea09b6" diff --git a/pyproject.toml b/pyproject.toml index 5cc46c0a16..08b107de84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,6 @@ uwsgi = "^2.0.29" uwsgitop = "^0.12" wrapt = "^1.14.1" youtube-transcript-api = "^1.0.0" -pdf2image = "^1.17.0" pypdfium2 = "^4.30.0" [tool.poetry.group.dev.dependencies] From 462c8cf77355fc098846acb4911295fc4fb773f6 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:30:09 -0400 Subject: [PATCH 04/18] try install from source --- poetry.lock | 29 +++++++++++------------------ pyproject.toml | 5 ++++- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/poetry.lock b/poetry.lock index 85e9185648..03880f3ba3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6376,26 +6376,19 @@ image = ["Pillow (>=8.0.0)"] [[package]] name = "pypdfium2" -version = "4.30.0" +version = "5.0.0b2+13.g9701b0e9" description = "Python bindings to PDFium" optional = false -python-versions = ">=3.6" +python-versions = ">= 3.6" groups = ["main"] -files = [ - {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, - {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, - {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854"}, - {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2"}, - {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad"}, - {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f"}, - {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163"}, - {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e"}, - {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be"}, - {file = "pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e"}, - {file = "pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c"}, - {file = "pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29"}, - {file = "pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16"}, -] +files = [] +develop = false + +[package.source] +type = "git" +url = "https://github.com/pypdfium2-team/pypdfium2" +reference = "HEAD" +resolved_reference = "9701b0e9a35a1e3dd51aac7904ebab3120eaa15a" [[package]] name = "pyreadline3" @@ -9113,4 +9106,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "04bc62389781a0c453f7df3965a1b6bc2e728c5ed68377392776108ab0ea09b6" +content-hash = "1d6cd2aeb0c18dd44ff2c1d0b93b7ffb1fda259c67e30990fbc63340b9d37798" diff --git a/pyproject.toml b/pyproject.toml index 08b107de84..bdedd72aa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ django-cors-headers = "^4.0.0" django-filter = "^2.4.0" django-guardian = "^3.0.0" django-health-check = { git = "https://github.com/revsys/django-health-check", rev="b0500d14c338040984f02ee34ffbe6643b005084" } # pragma: allowlist secret +pypdfium2 = { git = "https://github.com/pypdfium2-team/pypdfium2"} # pragma: allowlist secret + django-imagekit = "^5.0.0" django-ipware = "^7.0.0" django-json-widget = "^2.0.0" @@ -111,7 +113,8 @@ uwsgi = "^2.0.29" uwsgitop = "^0.12" wrapt = "^1.14.1" youtube-transcript-api = "^1.0.0" -pypdfium2 = "^4.30.0" + + [tool.poetry.group.dev.dependencies] bpython = "^0.25" From 4a9865c7a6f6beb8dd56dab31fd541c73510cdba Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:36:28 -0400 Subject: [PATCH 05/18] pin python --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49d82b6bac..af931530c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,6 +45,7 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: + python-version: "3.12" python-version-file: "pyproject.toml" cache: "poetry" From 169fd9af78a1e0b98c50144ac8539648411f34e8 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 13:59:52 -0400 Subject: [PATCH 06/18] pin python --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af931530c9..916d967f12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,7 +46,6 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" - python-version-file: "pyproject.toml" cache: "poetry" - name: Validate lockfile From defdb9811f9ed90b52e8d0c8830f074380138031 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 14:05:07 -0400 Subject: [PATCH 07/18] test --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 916d967f12..148a3cb600 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,9 +45,10 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version-file: "pyproject.toml" cache: "poetry" - + - name: Check Python version + run: python --version - name: Validate lockfile run: poetry check --lock From 78d5e4a5fe543de9c010beb62b23e7ee9febe532 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 14:22:53 -0400 Subject: [PATCH 08/18] explicitely use right version of python --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 148a3cb600..8e3f965e3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,8 @@ jobs: run: python --version - name: Validate lockfile run: poetry check --lock - + - name: Set Poetry Python + run: poetry env use python3.12 - name: Install dependencies run: poetry install --no-interaction From 32e6c1916e86d811fe5adbd8d102140e8d71e2db Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 14:29:54 -0400 Subject: [PATCH 09/18] explicitely use right version of python --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8e3f965e3f..5f98aef921 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,8 @@ jobs: with: python-version-file: "pyproject.toml" cache: "poetry" + - name: Install poetry with pip + run: python -m pip install poetry - name: Check Python version run: python --version - name: Validate lockfile @@ -54,7 +56,9 @@ jobs: - name: Set Poetry Python run: poetry env use python3.12 - name: Install dependencies - run: poetry install --no-interaction + run: | + source $(poetry env info --path)/bin/activate + poetry install --no-interaction - name: Create test local state run: ./scripts/test/stub-data.sh From f7f7da078b6f598dc70680d50a2988d5a57be583 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 14:56:16 -0400 Subject: [PATCH 10/18] remobing unused steps --- .github/workflows/ci.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f98aef921..59a0a78b5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,17 +49,12 @@ jobs: cache: "poetry" - name: Install poetry with pip run: python -m pip install poetry - - name: Check Python version - run: python --version - name: Validate lockfile run: poetry check --lock - - name: Set Poetry Python - run: poetry env use python3.12 - name: Install dependencies run: | source $(poetry env info --path)/bin/activate poetry install --no-interaction - - name: Create test local state run: ./scripts/test/stub-data.sh From eea03eef348276c282ad449c59da4f2f325cf4e5 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 13 Aug 2025 15:32:18 -0400 Subject: [PATCH 11/18] remove comment --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bdedd72aa8..50cd1668e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ django-cors-headers = "^4.0.0" django-filter = "^2.4.0" django-guardian = "^3.0.0" django-health-check = { git = "https://github.com/revsys/django-health-check", rev="b0500d14c338040984f02ee34ffbe6643b005084" } # pragma: allowlist secret -pypdfium2 = { git = "https://github.com/pypdfium2-team/pypdfium2"} # pragma: allowlist secret +pypdfium2 = { git = "https://github.com/pypdfium2-team/pypdfium2"} django-imagekit = "^5.0.0" django-ipware = "^7.0.0" From 8dde57791a14535b6a03604041bcedf1389fa113 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 10:43:57 -0400 Subject: [PATCH 12/18] fixing other bug with canvas task json serialization --- learning_resources/etl/canvas.py | 2 +- learning_resources/tasks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/learning_resources/etl/canvas.py b/learning_resources/etl/canvas.py index cbb71954e4..6e0ef24042 100644 --- a/learning_resources/etl/canvas.py +++ b/learning_resources/etl/canvas.py @@ -71,7 +71,7 @@ def sync_canvas_archive(bucket, key: str, overwrite): run.checksum = checksum run.save() - return resource_readable_id, run + return resource_readable_id def _course_url(course_archive_path) -> str: diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py index 51d3914b73..16db1722b0 100644 --- a/learning_resources/tasks.py +++ b/learning_resources/tasks.py @@ -511,7 +511,7 @@ def sync_canvas_courses(canvas_course_ids, overwrite): for archive in latest_archives.values(): key = archive.key log.info("Ingesting canvas course %s", key) - resource_readable_id, canvas_run = ingest_canvas_course( + resource_readable_id = ingest_canvas_course( key, overwrite=overwrite, ) From d5584b4e70e9d0381a00952129a136cca7a67639 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 11:15:32 -0400 Subject: [PATCH 13/18] adding latext .tex files --- learning_resources/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/learning_resources/constants.py b/learning_resources/constants.py index 78d673da4e..ef2c835fdd 100644 --- a/learning_resources/constants.py +++ b/learning_resources/constants.py @@ -147,6 +147,7 @@ class LearningResourceRelationTypes(TextChoices): ".json", ".md", ".pdf", + ".tex", ".ppt", ".pptx", ".rtf", From 985c168dd01094eafda9c5b59e7470bf432732ad Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 11:21:26 -0400 Subject: [PATCH 14/18] adding migration --- ...marizerconfiguration_allowed_extensions.py | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 learning_resources/migrations/0094_alter_contentsummarizerconfiguration_allowed_extensions.py diff --git a/learning_resources/migrations/0094_alter_contentsummarizerconfiguration_allowed_extensions.py b/learning_resources/migrations/0094_alter_contentsummarizerconfiguration_allowed_extensions.py new file mode 100644 index 0000000000..20f381762c --- /dev/null +++ b/learning_resources/migrations/0094_alter_contentsummarizerconfiguration_allowed_extensions.py @@ -0,0 +1,67 @@ +# Generated by Django 4.2.23 on 2025-08-14 15:20 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0093_tutorproblem_view_group"), + ] + + operations = [ + migrations.AlterField( + model_name="contentsummarizerconfiguration", + name="allowed_extensions", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField( + choices=[ + (".csv", ".csv"), + (".doc", ".doc"), + (".docx", ".docx"), + (".htm", ".htm"), + (".html", ".html"), + (".json", ".json"), + (".m", ".m"), + (".mat", ".mat"), + (".md", ".md"), + (".pdf", ".pdf"), + (".ppt", ".ppt"), + (".pptx", ".pptx"), + (".ps", ".ps"), + (".py", ".py"), + (".r", ".r"), + (".rtf", ".rtf"), + (".sjson", ".sjson"), + (".srt", ".srt"), + (".txt", ".txt"), + (".vtt", ".vtt"), + (".xls", ".xls"), + (".xlsx", ".xlsx"), + (".xml", ".xml"), + (".doc", ".doc"), + (".docx", ".docx"), + (".htm", ".htm"), + (".html", ".html"), + (".json", ".json"), + (".md", ".md"), + (".pdf", ".pdf"), + (".tex", ".tex"), + (".ppt", ".ppt"), + (".pptx", ".pptx"), + (".rtf", ".rtf"), + (".sjson", ".sjson"), + (".srt", ".srt"), + (".txt", ".txt"), + (".vtt", ".vtt"), + (".xml", ".xml"), + ], + max_length=128, + ), + blank=True, + default=list, + null=True, + size=None, + ), + ), + ] From 05cfd559f7a046c08f0722401914e0b39e9f509b Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 11:35:35 -0400 Subject: [PATCH 15/18] fix test --- learning_resources/tasks_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index ceeb2cf3bb..249cdb6cbd 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -658,7 +658,7 @@ def test_sync_canvas_courses(settings, mocker, django_assert_num_queries, canvas # Patch ingest_canvas_course to return the readable_ids for the two non-stale courses mock_ingest_course = mocker.patch( "learning_resources.tasks.ingest_canvas_course", - side_effect=[("course1", lr1.runs.first()), ("course2", lr2.runs.first())], + side_effect=["course1", "course2"], ) sync_canvas_courses(canvas_course_ids=canvas_ids, overwrite=False) From e46f516d54fad3de6b8ed2b193a3223583b8114e Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 12:14:22 -0400 Subject: [PATCH 16/18] pinning to pypi and removing unused ci steps --- .github/workflows/ci.yml | 2 -- poetry.lock | 29 ++++++++++++++++++----------- pyproject.toml | 3 ++- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 59a0a78b5a..13406a36bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,8 +47,6 @@ jobs: with: python-version-file: "pyproject.toml" cache: "poetry" - - name: Install poetry with pip - run: python -m pip install poetry - name: Validate lockfile run: poetry check --lock - name: Install dependencies diff --git a/poetry.lock b/poetry.lock index 03880f3ba3..85e9185648 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6376,19 +6376,26 @@ image = ["Pillow (>=8.0.0)"] [[package]] name = "pypdfium2" -version = "5.0.0b2+13.g9701b0e9" +version = "4.30.0" description = "Python bindings to PDFium" optional = false -python-versions = ">= 3.6" +python-versions = ">=3.6" groups = ["main"] -files = [] -develop = false - -[package.source] -type = "git" -url = "https://github.com/pypdfium2-team/pypdfium2" -reference = "HEAD" -resolved_reference = "9701b0e9a35a1e3dd51aac7904ebab3120eaa15a" +files = [ + {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, + {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad"}, + {file = "pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e"}, + {file = "pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be"}, + {file = "pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e"}, + {file = "pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c"}, + {file = "pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29"}, + {file = "pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16"}, +] [[package]] name = "pyreadline3" @@ -9106,4 +9113,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "~3.12" -content-hash = "1d6cd2aeb0c18dd44ff2c1d0b93b7ffb1fda259c67e30990fbc63340b9d37798" +content-hash = "04bc62389781a0c453f7df3965a1b6bc2e728c5ed68377392776108ab0ea09b6" diff --git a/pyproject.toml b/pyproject.toml index 50cd1668e6..d76f251f33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ django-cors-headers = "^4.0.0" django-filter = "^2.4.0" django-guardian = "^3.0.0" django-health-check = { git = "https://github.com/revsys/django-health-check", rev="b0500d14c338040984f02ee34ffbe6643b005084" } # pragma: allowlist secret -pypdfium2 = { git = "https://github.com/pypdfium2-team/pypdfium2"} + django-imagekit = "^5.0.0" django-ipware = "^7.0.0" @@ -113,6 +113,7 @@ uwsgi = "^2.0.29" uwsgitop = "^0.12" wrapt = "^1.14.1" youtube-transcript-api = "^1.0.0" +pypdfium2 = "^4.30.0" From f07a4a0432fa91d64b9f5f3389aacbef7afcde3a Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 12:44:22 -0400 Subject: [PATCH 17/18] pushing fix for ci --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13406a36bf..59a0a78b5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,8 @@ jobs: with: python-version-file: "pyproject.toml" cache: "poetry" + - name: Install poetry with pip + run: python -m pip install poetry - name: Validate lockfile run: poetry check --lock - name: Install dependencies From e3c729534a592326a79fd627db0ac4404b8c9161 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Thu, 14 Aug 2025 12:59:37 -0400 Subject: [PATCH 18/18] test --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 59a0a78b5a..121cffd28f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,10 +51,13 @@ jobs: run: python -m pip install poetry - name: Validate lockfile run: poetry check --lock + - name: Set Poetry Python + run: poetry env use python3.12 - name: Install dependencies run: | source $(poetry env info --path)/bin/activate poetry install --no-interaction + - name: Create test local state run: ./scripts/test/stub-data.sh