Permalink
Browse files

Made output more compatible to the hOCR spec.

- properties "image" and "bbox" for the "ocr_page" element.
- correct orientation of coordinate system
  • Loading branch information...
jze committed Dec 2, 2016
1 parent 976a3ba commit 060ff214f4c3b76d90469a1ce025988d0c97ab94
Showing with 4 additions and 2 deletions.
  1. +4 −2 ocropus-hocr
View
@@ -63,7 +63,9 @@ for arg in args.files:
base,_ = ocrolib.allsplitext(arg)
try:
E("===",arg)
- P("<div class='ocr_page' title='file %s'>"%arg)
+ image = ocrolib.read_image_binary(arg)
+ height, width = image.shape
+ P("<div class='ocr_page' title='image %s; bbox 0 0 %d %d'>"%(arg,width,height))
# to proceed, we need a pseg file and a
# subdirectory containing text lines
@@ -88,7 +90,7 @@ for arg in args.files:
# and insert paragraph breaks as needed
id = regions.id(i)
- y0,x0,y1,x1 = regions.bboxMath(i)
+ y0,x0,y1,x1 = regions.bbox(i)
if last_coords is not None:
lx0,ly0 = last_coords
dx,dy = x0-lx0,y1-ly0

0 comments on commit 060ff21

Please sign in to comment.