Skip to content
This repository has been archived by the owner on Jul 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #8 from TakamiChie/master
Browse files Browse the repository at this point in the history
Fixed a problem when loading old Japanese PDFs.
  • Loading branch information
sfneal committed Jun 9, 2021
2 parents 18c0559 + 8691814 commit 462cb9c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
6 changes: 5 additions & 1 deletion PyPDF3/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,11 @@ def readFromStream(stream, pdf):
# with a '#' followed by the symbol's hex number
if not pdf.strict:
warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
return NameObject(name)
try:
ss = name.decode("Shift-JIS").split("+")
return NameObject(f"{ss[0]}+{''.join([f'#{c:X}' for c in ss[1].encode()])}")
except (UnicodeEncodeError, UnicodeDecodeError) as e:
return NameObject(name)
else:
raise utils.PdfReadError("Illegal character in Name Object")

Expand Down
Binary file added Resources/includeSJISfontname.pdf
Binary file not shown.
25 changes: 25 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tempfile
import os
import sys
import unittest
Expand Down Expand Up @@ -61,6 +62,30 @@ def test_PdfReaderJpegImage(self):
msg='PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
% (imagetext, binascii.hexlify(data).decode()))

def test_PdfReaderincludeSJISfontname(self):
'''
Test loading and parsing of a file. Extract text of the file and compare to expected
textual output. Expected outcome: file loads, text matches expected.
Condition: Includes the font name of Shift JIS in the file to be read.
'''

with tempfile.TemporaryDirectory() as tmpdir:
with open(os.path.join(RESOURCE_ROOT, 'includeSJISfontname.pdf'), 'rb') as inputfile:
# Load PDF file from file
ipdf = PdfFileReader(inputfile, strict=False)
tempfn = os.path.join(tmpdir, "temp.pdf")

writer = PdfFileWriter()
for page in ipdf.pages:
writer.addPage(page)
with open(tempfn, "wb") as pdf:
writer.write(pdf)

with open(tempfn, "rb") as pdf:
pdf = PdfFileReader(pdf)
pdf.getPage(0)["/Resources"]["/Font"]["/F1"]
self.assertTrue(True,msg='No error occurred')

class AddJsTestCase(unittest.TestCase):

def setUp(self):
Expand Down

0 comments on commit 462cb9c

Please sign in to comment.