Skip to content

Commit

Permalink
Update hocr-pdf
Browse files Browse the repository at this point in the history
- Use base64.b64decode instead of base64.decodestring;
fix #145
- Add warning when no JPG images are found in directory;
fix #123
- Improve error handling; fix #35
  • Loading branch information
zuphilip committed Mar 3, 2019
1 parent 68f525f commit d756f75
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions hocr-pdf
Expand Up @@ -50,11 +50,15 @@ class StdoutWrapper:

def export_pdf(playground, default_dpi, savefile=False):
"""Create a searchable PDF from a pile of HOCR + JPEG"""
images = sorted(glob.glob(os.path.join(playground, '*.jpg')))
if len(images) == 0:
print("WARNING: No JPG images found in the folder", playground,
"\nScript cannot proceed without them and will terminate now.\n")
sys.exit(0)
load_invisible_font()
pdf = Canvas(savefile if savefile else StdoutWrapper(), pageCompression=1)
pdf.setCreator('hocr-tools')
pdf.setTitle(os.path.basename(playground))
images = sorted(glob.glob(os.path.join(playground, '*.jpg')))
dpi = default_dpi
for image in images:
im = Image.open(image)
Expand Down Expand Up @@ -142,7 +146,7 @@ pylRc2Zn+XDQWZIL8iO5KC9S+1RekOex1uOyZGR/w/Hf1lhzqVfFsxE39B/ws7Rm3N3nDrhPuMfc
w3R/aE28KsfY2J+RPNp+j+KaOoCey4h+Dd48b9O5G0v2K7j0AM6s+5WQ/E0wVoK+pA6/3bup7bJf
CMGjwvxTsr74/f/F95m3TH9x8o0/TU//N+7/D/ScVcA=
""".encode('latin1')
uncompressed = bytearray(zlib.decompress(base64.decodestring(font)))
uncompressed = bytearray(zlib.decompress(base64.b64decode(font)))
ttf = io.BytesIO(uncompressed)
setattr(ttf, "name", "(invisible.ttf)")
pdfmetrics.registerFont(TTFont('invisible', ttf))
Expand All @@ -162,4 +166,6 @@ if __name__ == "__main__":
help="Save to this file instead of outputting to stdout"
)
args = parser.parse_args()
if not os.path.isdir(args.imgdir):
sys.exit("ERROR: Given path '" + args.imgdir + "' is not a directory")
export_pdf(args.imgdir, 300, args.savefile)

0 comments on commit d756f75

Please sign in to comment.