py-pdf · MartinThoma · Sep 3, 2023 · Sep 3, 2023 · Sep 3, 2023
diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml
@@ -76,6 +76,9 @@ jobs:
     - name: Install pypdf
       run: |
         pip install .
+    - name: Prepare
+      run: |
+        python -c "from tests import download_test_pdfs; download_test_pdfs()"
     - name: Test with pytest
       run: |
         python -m coverage run --parallel-mode -m pytest tests -vv

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -104,3 +104,14 @@ def is_sublist(child_list, parent_list):
     if parent_list[0] == child_list[0]:
         return is_sublist(child_list[1:], parent_list[1:])
     return is_sublist(child_list, parent_list[1:])
+
+
+def download_test_pdfs():
+    """
+    Run this before the tests are executed to ensure you have everything locally.
+
+    This is especially important to avoid pytest timeouts.
+    """
+    pdfs = [("https://arxiv.org/pdf/2201.00214.pdf", "2201.00214.pdf")]
+    for url, name in pdfs:
+        get_data_from_url(url, name=name)
diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -1565,7 +1565,7 @@ def test_watermarking_speed():
     name = "bgwatermark.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     url = "https://arxiv.org/pdf/2201.00214.pdf"
-    name = "src_doc.pdf"
+    name = "2201.00214.pdf"
     writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
     for p in writer.pages:
         p.merge_page(reader.pages[0], over=False)