Skip to content

Commit

Permalink
New feature: modified_pdf, export a PDF with just the annotated pages
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrla committed Dec 27, 2020
1 parent 51383c3 commit e56c4fd
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
8 changes: 7 additions & 1 deletion remarks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def main():
action="store_true",
help="Create a '*_remarks.pdf' file with all annotated pages merged into the original (unannotated) PDF",
)
parser.add_argument(
"--modified_pdf",
dest="modified_pdf",
action="store_true",
help="Create a '*_remarks-only.pdf' file with all annotated pages",
)
parser.add_argument(
"-v",
"--version",
Expand All @@ -54,7 +60,7 @@ def main():
"-h", "--help", action="help", help="Show this help message",
)

parser.set_defaults(combined_pdf=False)
parser.set_defaults(combined_pdf=False, modified_pdf=False)

args = parser.parse_args()
args_dict = vars(args)
Expand Down
22 changes: 16 additions & 6 deletions remarks/remarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def run_remarks(
pdf_name=None,
ann_type=None,
combined_pdf=False,
modified_pdf=False,
):
for path in pathlib.Path(f"{input_dir}/").glob("*.pdf"):
pages = list_pages_uuids(path)
Expand All @@ -52,11 +53,14 @@ def run_remarks(
page_magnitude = math.floor(math.log10(len(pages))) + 1
in_device_path = get_ui_path(path)

_dir = pathlib.Path(f"{output_dir}/{in_device_path}/{name}/")
_dir.mkdir(parents=True, exist_ok=True)
out_path = pathlib.Path(f"{output_dir}/{in_device_path}/{name}/")
out_path.mkdir(parents=True, exist_ok=True)

pdf_src = fitz.open(path)

if modified_pdf:
mod_pdf = fitz.open()

print(f"Working on PDF file: {path}")
print(f'PDF visibleName: "{name}"')
print(f"PDF in-device directory: {in_device_path}")
Expand Down Expand Up @@ -84,7 +88,7 @@ def run_remarks(
if "svg" in targets:
svg_str = draw_svg(parsed_data)

subdir = prepare_subdir(_dir, "svg")
subdir = prepare_subdir(out_path, "svg")
with open(f"{subdir}/{page_idx:0{page_magnitude}}.svg", "w") as f:
f.write(svg_str)

Expand Down Expand Up @@ -123,15 +127,15 @@ def run_remarks(
ann_page = draw_pdf(parsed_data, ann_page)

if "pdf" in targets:
subdir = prepare_subdir(_dir, "pdf")
subdir = prepare_subdir(out_path, "pdf")
ann_doc.save(f"{subdir}/{page_idx:0{page_magnitude}}.pdf")

if "png" in targets:
# (2, 2) is a short-hand for 2x zoom on x and y
# ref: https://pymupdf.readthedocs.io/en/latest/page.html#Page.getPixmap
pixmap = ann_page.getPixmap(matrix=fitz.Matrix(2, 2))

subdir = prepare_subdir(_dir, "png")
subdir = prepare_subdir(out_path, "png")
pixmap.writePNG(f"{subdir}/{page_idx:0{page_magnitude}}.png")

if "md" in targets:
Expand All @@ -142,7 +146,7 @@ def run_remarks(

# TODO: maybe also add highlighted image (pixmap) extraction?

subdir = prepare_subdir(_dir, "md")
subdir = prepare_subdir(out_path, "md")
with open(f"{subdir}/{page_idx:0{page_magnitude}}.md", "w") as f:
f.write(md_str)

Expand All @@ -157,6 +161,9 @@ def run_remarks(
f"Found highlighted text but couldn't create markdown from page #{page_idx}"
)

if modified_pdf:
mod_pdf.insertPDF(ann_doc, start_at=-1)

if combined_pdf:
x_max, y_max = get_ann_max_bound(parsed_data)
ann_outside = (x_max > pdf_w_adj) or (y_max > pdf_h_adj)
Expand All @@ -177,4 +184,7 @@ def run_remarks(
if combined_pdf:
pdf_src.save(f"{output_dir}/{name} _remarks.pdf")

if modified_pdf:
mod_pdf.save(f"{output_dir}/{name} _remarks-only.pdf")

pdf_src.close()

0 comments on commit e56c4fd

Please sign in to comment.