Skip to content

Commit

Permalink
feat: Annotate generated documents with specific tags
Browse files Browse the repository at this point in the history
  • Loading branch information
Rithsek99 committed Feb 14, 2024
1 parent 2f7c194 commit 4207890
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 26 deletions.
25 changes: 23 additions & 2 deletions marie/pipe/extract_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
split_filename,
store_assets,
)
from marie.renderer import PdfRenderer, TextRenderer, TiffRenderer
from marie.renderer import PdfRenderer, PngRenderer, TextRenderer, TiffRenderer
from marie.renderer.adlib_renderer import AdlibRenderer
from marie.renderer.blob_renderer import BlobRenderer
from marie.utils.docs import docs_from_image, frames_from_file
Expand Down Expand Up @@ -259,6 +259,7 @@ def execute_frames_pipeline(
# TODO : Convert to execution pipeline
self.render_pdf(ref_id, frames, ocr_results, root_asset_dir)
self.render_tiff(ref_id, frames, ocr_results, root_asset_dir)
# self.render_png(ref_id, frames, ocr_results, root_asset_dir)
self.render_blobs(ref_id, frames, ocr_results, root_asset_dir)
self.render_adlib(ref_id, frames, ocr_results, root_asset_dir)

Expand Down Expand Up @@ -417,7 +418,27 @@ def render_tiff(self, ref_id: str, frames, results, root_asset_dir) -> None:
renderer.render(
frames,
results,
output_filename=os.path.join(output_dir, "results.tif"),
output_filename=os.path.join(output_dir, "results_clean.tif"),
**{
"overlay": False,
},
)

def render_png(self, ref_id: str, frames, results, root_asset_dir) -> None:
output_dir = ensure_exists(os.path.join(root_asset_dir, "png"))
renderer = PngRenderer(config={})
renderer.render(
frames,
results,
output_filename=os.path.join(output_dir, "results.png"),
**{
"overlay": True,
},
)
renderer.render(
frames,
results,
output_filename=os.path.join(output_dir, "results_clean.png"),
**{
"overlay": False,
},
Expand Down
1 change: 1 addition & 0 deletions marie/renderer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys

from .png_renderer import PngRenderer
from .renderer import ResultRenderer

from .text_renderer import TextRenderer # isort:skip depends on ResultRenderer
Expand Down
5 changes: 1 addition & 4 deletions marie/renderer/pdf_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,7 @@ def render(
logger.error(e, stack_info=True, exc_info=True)

# add specific tag
metadata = {
'/Producer': self.name,
'/Creator': self.name,
}
metadata = {'/Producer': self.name}
writer.addMetadata(metadata)
with open(output_filename, "wb") as output:
writer.write(output)
Expand Down
22 changes: 19 additions & 3 deletions marie/renderer/tiff_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,30 @@ def __render_page(
text = "".join([c + "\n" for c in text])
continue

font_size = determine_font_size(h)
# TIFF rendering
left_pad = 5
px0 = x
py0 = y
lh = h

# Find baseline for the word
if wid in word2line:
line = word2line[wid]
line_bbox = line["bbox"]
ly = line_bbox[1]
lh = line_bbox[3]
py0 = ly + lh * 0.2 # best fit
# py0 = ly
# py0 = img_h - ly - lh * 0.80
# py0 = img_h - y # + (h / 2)

font_size = determine_font_size(lh)
try:
font = ImageFont.truetype("Helvetica.ttf", font_size)
except IOError:
font = ImageFont.load_default()

draw.text((x, y), text, font=font, fill=(0, 0, 0))
draw.text((px0 + left_pad, py0), text, font=font, fill=(0, 0, 0))

return img_pil
except Exception as indent:
Expand Down Expand Up @@ -115,7 +132,6 @@ def render(
metadata = {"Producer": self.name, "Number of Pages": len(images)}
description = json.dumps(metadata)
# Save as a multi-page TIFF
# images[0].save(output_filename, format='tiff', save_all=True, append_images=images[1:], compression="tiff_deflate", metadata=description, duration=500)
with tifffile.TiffWriter(output_filename, bigtiff=True) as t:
for img in images:
t.write(data=np.array(img), description=description, compression=8)
Expand Down
31 changes: 14 additions & 17 deletions tests/integration/check_pdf_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def from_json_file(filename):
img_path = "./assets/english/Lines/005.png"
img_path = "~/tmp/analysis/marie-issues/107/195668453-0004.png"
img_path = "~/tmp/analysis/marie-issues/106/195664193-0006.png"
img_path = "~/Desktop/11302023_28082_5_452_.tif"
img_path = os.path.expanduser(img_path)

if not os.path.exists(img_path):
Expand Down Expand Up @@ -104,13 +105,6 @@ def from_json_file(filename):
indent=2,
cls=NumpyEncoder,
)

# output_filename = "/tmp/fragments/result.pdf"
# print("Testing pdf render")
#
# renderer = PdfRenderer(config={"preserve_interword_spaces": True})
# renderer.render(image, result, output_filename)

if True:
key = img_path.split("/")[-1]
image = cv2.imread(img_path)
Expand All @@ -119,18 +113,21 @@ def from_json_file(filename):
print("Testing pdf render")
results = from_json_file("/tmp/fragments/results.json")

# renderer = PdfRenderer(config={"preserve_interword_spaces": True})
# renderer.render(image, results, output_filename)

renderer = PdfRenderer(config={"preserve_interword_spaces": True})
renderer.render(
frames=[image],
results=results,
output_filename=output_filename
results=[results],
output_filename=output_filename,
**{
"overlay": True,
}
)

# print("Testing text render")
#
# output_filename = "/tmp/fragments/result.txt"
# renderer = TextRenderer(config={"preserve_interword_spaces": True})
# renderer.render(image, result, output_filename)
renderer.render(
frames=[image],
results=[results],
output_filename=output_filename.replace(".pdf", "_clean.pdf"),
**{
"overlay": False,
}
)
129 changes: 129 additions & 0 deletions tests/integration/check_png_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import io
import json
import os

import cv2
import numpy
from PIL import Image

from marie.boxes import BoxProcessorUlimDit
from marie.boxes.box_processor import PSMode
from marie.document import TrOcrProcessor
from marie.numpyencoder import NumpyEncoder
from marie.renderer.png_renderer import PngRenderer
from marie.utils.utils import ensure_exists


def __scale_width(src, target_size, crop_size, method=Image.BICUBIC):
img = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
ow, oh = img.size
if ow == target_size and oh >= crop_size:
return img
w = target_size
h = int(max(target_size * oh / ow, crop_size))

pil_image = img.resize((w, h), method)
open_cv_image = numpy.array(pil_image)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1]
return open_cv_image


# https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-for-resizing-image
def __scale_height(img, target_size, crop_size, method=Image.LANCZOS):
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)

ow, oh = img.size
scale = oh / target_size
print(scale)
w = ow / scale
h = target_size # int(max(oh / scale, crop_size))
return img.resize((int(w), int(h)), method)


def from_json_file(filename):
with io.open(filename, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
return data


if __name__ == "__main__":
import faulthandler

faulthandler.enable()

work_dir_boxes = ensure_exists("/tmp/boxes")
work_dir_icr = ensure_exists("/tmp/icr")
ensure_exists("/tmp/fragments")

img_path = "~/Desktop/11302023_28082_5_452_.tif"
img_path = os.path.expanduser(img_path)

if not os.path.exists(img_path):
raise Exception(f"File not found : {img_path}")

if True:
key = img_path.split("/")[-1]
image = cv2.imread(img_path)

box = BoxProcessorUlimDit(work_dir=work_dir_boxes, cuda=True)
icr = TrOcrProcessor(work_dir=work_dir_icr, cuda=True)

(
boxes,
fragments,
lines,
_,
lines_bboxes,
) = box.extract_bounding_boxes(key, "field", image, PSMode.SPARSE)

print(lines)
result, overlay_image = icr.recognize(
key, "test", image, boxes, fragments, lines, return_overlay=True
)

print("Results -----------------")
print(result['meta'])
print(result['words'])
print(result['lines'])
cv2.imwrite("/tmp/fragments/overlay.png", overlay_image)
json_path = os.path.join("/tmp/fragments", "results.json")

with open(json_path, "w") as json_file:
json.dump(
result,
json_file,
sort_keys=True,
separators=(",", ": "),
ensure_ascii=False,
indent=2,
cls=NumpyEncoder,
)

if True:
key = img_path.split("/")[-1]
image = cv2.imread(img_path)

output_filename = "/tmp/fragments/result.png"
print("Testing PNG render")
results = from_json_file("/tmp/fragments/results.json")

renderer = PngRenderer(config={"preserve_interword_spaces": True})
renderer.render(
frames=[image],
results=[results],
output_filename=output_filename,
**{
"overlay": True,
}
)
renderer.render(
frames=[image],
results=[results],
output_filename=output_filename.replace(".png", "_clean.png"),
**{
"overlay": False,
}
)
8 changes: 8 additions & 0 deletions tests/integration/check_tiff_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ def from_json_file(filename):
frames=[image],
results=[results],
output_filename=output_filename,
**{
"overlay": True,
}
)
renderer.render(
frames=[image],
results=[results],
output_filename=output_filename.replace(".tif", "_clean.tif"),
**{
"overlay": False,
}
Expand Down

0 comments on commit 4207890

Please sign in to comment.