From 755023defc58ac26edcdae78c661a976c4793814 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Thu, 25 Aug 2022 23:03:48 +0200
Subject: [PATCH 1/4] ROB : fix errors/warnings on no /resources with
 extract_text

fix  #1272 (in text) and #1269 (in Xform)
---
 PyPDF2/_page.py    | 5 ++++-
 tests/test_page.py | 9 ++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
index 77a15ab32..a12822881 100644
--- a/PyPDF2/_page.py
+++ b/PyPDF2/_page.py
@@ -1140,7 +1140,10 @@ def _extract_text(
         cmaps: Dict[
             str, Tuple[str, float, Union[str, Dict[int, str]], Dict[str, str]]
         ] = {}
-        resources_dict = cast(DictionaryObject, obj["/Resources"])
+        try:
+            resources_dict = cast(DictionaryObject, obj["/Resources"])
+        except Exception:
+            return ""  # no resources means no text is possible (no font)
         if "/Font" in resources_dict:
             for f in cast(DictionaryObject, resources_dict["/Font"]):
                 cmaps[f] = build_char_map(f, space_width, obj)
diff --git a/tests/test_page.py b/tests/test_page.py
index 2a9c97b00..9797e75e0 100644
--- a/tests/test_page.py
+++ b/tests/test_page.py
@@ -238,6 +238,13 @@ def test_extract_text_single_quote_op():
         page.extract_text()
 
 
+def test_no_ressources_on_text_extract():
+    url = "https://raw.githubusercontent.com/eagletrt/wiki/0f3f16309604f665a47595c890d15af1b3aec6d6/fenice-telemetry-tx/PCB%20Outputs/Pdf/Edge%20Mount%20SMA/TelemetryTX_EM.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name="tika-964029.pdf")))
+    for page in reader.pages:
+        page.extract_text()
+
+
 def test_iss_1142():
     # check fix for problem of context save/restore (q/Q)
     url = "https://github.com/py-pdf/PyPDF2/files/9150656/ST.2019.PDF"
@@ -280,7 +287,7 @@ def test_extract_text_page_pdf_impossible_decode_xform(caplog):
     for page in reader.pages:
         page.extract_text()
     warn_msgs = normalize_warnings(caplog.text)
-    assert warn_msgs == [" impossible to decode XFormObject /Meta203"]
+    assert warn_msgs == [""]  # text extraction recognise no texg
 
 
 def test_extract_text_operator_t_star():  # L1266, L1267

From 61a306b436bedf0d50ae8110347fa73fc81de488 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 26 Aug 2022 08:02:15 +0200
Subject: [PATCH 2/4] Update tests/test_page.py

Co-authored-by: Matthew Peveler <matt.peveler@gmail.com>
---
 tests/test_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_page.py b/tests/test_page.py
index 9797e75e0..e74690d80 100644
--- a/tests/test_page.py
+++ b/tests/test_page.py
@@ -287,7 +287,7 @@ def test_extract_text_page_pdf_impossible_decode_xform(caplog):
     for page in reader.pages:
         page.extract_text()
     warn_msgs = normalize_warnings(caplog.text)
-    assert warn_msgs == [""]  # text extraction recognise no texg
+    assert warn_msgs == [""]  # text extraction recognise no text
 
 
 def test_extract_text_operator_t_star():  # L1266, L1267

From 2cb4c5f90d93e7c0d89bf82763d88cacc31f7019 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 26 Aug 2022 08:04:20 +0200
Subject: [PATCH 3/4] Update tests/test_page.py

Co-authored-by: Matthew Peveler <matt.peveler@gmail.com>
---
 tests/test_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_page.py b/tests/test_page.py
index e74690d80..90e40c0a5 100644
--- a/tests/test_page.py
+++ b/tests/test_page.py
@@ -239,7 +239,7 @@ def test_extract_text_single_quote_op():
 
 
 def test_no_ressources_on_text_extract():
-    url = "https://raw.githubusercontent.com/eagletrt/wiki/0f3f16309604f665a47595c890d15af1b3aec6d6/fenice-telemetry-tx/PCB%20Outputs/Pdf/Edge%20Mount%20SMA/TelemetryTX_EM.pdf"
+    url = "https://github.com/py-pdf/PyPDF2/files/9428434/TelemetryTX_EM.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name="tika-964029.pdf")))
     for page in reader.pages:
         page.extract_text()

From a4feaba0204246bec4cc733648e2bf15ec3a4747 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 27 Aug 2022 21:08:17 +0200
Subject: [PATCH 4/4] look for ressources in Parents

---
 PyPDF2/_page.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
index a12822881..f818ff544 100644
--- a/PyPDF2/_page.py
+++ b/PyPDF2/_page.py
@@ -1141,9 +1141,14 @@ def _extract_text(
             str, Tuple[str, float, Union[str, Dict[int, str]], Dict[str, str]]
         ] = {}
         try:
-            resources_dict = cast(DictionaryObject, obj["/Resources"])
+            objr = obj
+            while NameObject("/Resources") not in objr:
+                # /Resources can be inherited sometimes so we look to parents
+                objr = objr["/Parent"].get_object()
+                # if no parents we will have no /Resources will be available => an exception wil be raised
+            resources_dict = cast(DictionaryObject, objr["/Resources"])
         except Exception:
-            return ""  # no resources means no text is possible (no font)
+            return ""  # no resources means no text is possible (no font) we consider the file as not damaged, no need to check for TJ or Tj
         if "/Font" in resources_dict:
             for f in cast(DictionaryObject, resources_dict["/Font"]):
                 cmaps[f] = build_char_map(f, space_width, obj)