Merge pull request #8 from TakamiChie/master

Fixed a problem when loading old Japanese PDFs.
sfneal · Jun 9, 2021 · 462cb9c · 462cb9c
2 parents 18c0559 + 8691814
commit 462cb9c
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 1 deletion.
diff --git a/PyPDF3/generic.py b/PyPDF3/generic.py
@@ -489,7 +489,11 @@ def readFromStream(stream, pdf):
             # with a '#' followed by the symbol's hex number
             if not pdf.strict:
                 warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
-                return NameObject(name)
+                try:
+                    ss = name.decode("Shift-JIS").split("+")
+                    return NameObject(f"{ss[0]}+{''.join([f'#{c:X}' for c in ss[1].encode()])}")
+                except (UnicodeEncodeError, UnicodeDecodeError) as e:
+                    return NameObject(name)
             else:
                 raise utils.PdfReadError("Illegal character in Name Object")
 

diff --git a/Resources/includeSJISfontname.pdf b/Resources/includeSJISfontname.pdf
diff --git a/tests/tests.py b/tests/tests.py
@@ -1,3 +1,4 @@
+import tempfile
 import os
 import sys
 import unittest
@@ -61,6 +62,30 @@ def test_PdfReaderJpegImage(self):
                              msg='PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n' 
                              % (imagetext, binascii.hexlify(data).decode()))
 
+    def test_PdfReaderincludeSJISfontname(self):
+        '''
+        Test loading and parsing of a file. Extract text of the file and compare to expected
+        textual output. Expected outcome: file loads, text matches expected.
+        Condition: Includes the font name of Shift JIS in the file to be read.
+        '''
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with open(os.path.join(RESOURCE_ROOT, 'includeSJISfontname.pdf'), 'rb') as inputfile:
+                # Load PDF file from file
+                ipdf = PdfFileReader(inputfile, strict=False)
+                tempfn = os.path.join(tmpdir, "temp.pdf")
+
+                writer = PdfFileWriter()
+                for page in ipdf.pages:
+                    writer.addPage(page)
+                with open(tempfn, "wb") as pdf:
+                    writer.write(pdf)
+
+                with open(tempfn, "rb") as pdf:
+                    pdf = PdfFileReader(pdf)
+                    pdf.getPage(0)["/Resources"]["/Font"]["/F1"]
+                    self.assertTrue(True,msg='No error occurred')
+
 class AddJsTestCase(unittest.TestCase):
 
     def setUp(self):