Skip to content

Commit

Permalink
differentiate image number and page number
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Dec 6, 2021
1 parent 553e0fd commit 27dffe8
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
13 changes: 11 additions & 2 deletions mets_mods2tei/api/mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self):
self.mods = None
self.page_map = {}
self.order_map = {}
self.orderlabel_map = {}
self.img_map = {}
self.alto_map = {}
self.struct_links = {}
Expand Down Expand Up @@ -324,6 +325,8 @@ def __spur(self):
self.page_map[div.get_ID()] = div
if div.get_ORDER():
self.order_map[div.get_ID()] = div.get_ORDER()
if div.get_ORDERLABEL():
self.orderlabel_map[div.get_ID()] = div.get_ORDERLABEL()
for fptr in div.get_fptr():
if fptr.get_FILEID() in fulltext_map:
self.alto_map[div.get_ID()] = fulltext_map[fptr.get_FILEID()]
Expand Down Expand Up @@ -514,6 +517,12 @@ def get_alto(self, phys_id):

def get_order(self, phys_id):
"""
Return the manually set order for a given physical ID
Return the logical (manually set) page number for a given physical ID
"""
return self.order_map.get(phys_id, "-1")
return self.order_map.get(phys_id, "0")

def get_orderlabel(self, phys_id):
"""
Return the logical (manually set) page label for a given physical ID
"""
return self.orderlabel_map.get(phys_id, "")
9 changes: 8 additions & 1 deletion mets_mods2tei/api/tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,14 @@ def __add_ocr_to_node(self, node, mets):
self.alto_map[alto_link] = alto

pb = etree.SubElement(node, "%spb" % TEI)
pb.set("facs", "#f{:04d}".format(int(mets.get_order(struct_link))))
try:
pagenum = list(mets.page_map.keys()).index(struct_link)
pb.set("facs", "#f{:04d}".format(pagenum + 1))
except ValueError:
self.logger.warning("cannot determine image number for '%s'", struct_link)
pagenum = mets.get_orderlabel(struct_link) or mets.get_order(struct_link)
if pagenum:
pb.set("n", str(pagenum))
pb.set("corresp", mets.get_img(struct_link))

for text_block in alto.get_text_blocks():
Expand Down

0 comments on commit 27dffe8

Please sign in to comment.