py-pdf · MartinThoma · Oct 9, 2022 · Sep 27, 2022 · Sep 27, 2022 · Sep 28, 2022
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -1490,8 +1490,13 @@ def process_operation(operator: bytes, operands: List) -> None:
             # Table 5.5 page 406
             elif operator == b"Td":
                 check_crlf_space = True
-                tm_matrix[4] += float(operands[0])
-                tm_matrix[5] += float(operands[1])
+                # A special case is a translating only tm:
+                # tm[0..5] = 1 0 0 1 e f,
+                # i.e. tm[4] += tx, tm[5] += ty.
+                tx = float(operands[0])
+                ty = float(operands[1])
+                tm_matrix[4] += tx * tm_matrix[0] + ty * tm_matrix[2]
+                tm_matrix[5] += tx * tm_matrix[1] + ty * tm_matrix[3]
             elif operator == b"Tm":
                 check_crlf_space = True
                 tm_matrix = [

diff --git a/resources/Sample_Td-matrix.pdf b/resources/Sample_Td-matrix.pdf
diff --git a/tests/test_page.py b/tests/test_page.py
@@ -297,7 +297,14 @@ def test_iss_1142():
     name = "st2019.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     txt = reader.pages[3].extract_text()
-    assert txt.find("有限公司郑州分公司") > 0
+    # The following text is contained in two different cells:
+    # assert txt.find("有限公司郑州分公司") > 0
+    # 有限公司 = limited company
+    # 郑州分公司 = branch office in Zhengzhou
+    # First cell (see page 4/254):
+    assert txt.find("郑州药素电子商务有限公司") > 0
+    # Next cell (first cell in next line):
+    assert txt.find("郑州分公司") > 0
 
 
 @pytest.mark.parametrize(
@@ -604,6 +611,25 @@ def filter_first_table(r):
     assert texts.font_dict["/Encoding"] == "/WinAnsiEncoding"
     assert text_dat_of_date.font_size == 9.96
 
+    # Test 3: Read a table in a document using a non-translating
+    #         but scaling Tm-operand
+    reader = PdfReader(RESOURCE_ROOT / "Sample_Td-matrix.pdf")
+    page_td_model = reader.pages[0]
+    # We store the translations of the Td-executions.
+    list_Td = []
+
+    def visitor_td(op, args, cm, tm):
+        if op == b"Td":
+            list_Td.append((tm[4], tm[5]))
+
+    page_td_model.extract_text(visitor_operand_after=visitor_td)
+    assert len(list_Td) == 4
+    # Check the translations of the four Td-executions.
+    assert list_Td[0] == (210.0, 110.0)
+    assert list_Td[1] == (410.0, 110.0)
+    assert list_Td[2] == (210.0, 210.0)
+    assert list_Td[3] == (410.0, 210.0)
+
 
 @pytest.mark.parametrize(
     ("pdf_path", "password", "embedded", "unembedded"),