PDF 1.7 defines 25 different annotation types:
- Text
- Link
- FreeText
- Line, Square, Circle, Polygon, PolyLine, Highlight, Underline, Squiggly, StrikeOut
- Stamp, Caret, Ink
- Popup
- FileAttachment
- Sound, Movie
- Widget, Screen
- PrinterMark
- TrapNet
- Watermark
- 3D
In general, annotations can be read like this:
from pypdf import PdfReader
reader = PdfReader("commented.pdf")
for page in reader.pages:
if "/Annots" in page:
for annot in page["/Annots"]:
obj = annot.get_object()
annotation = {"subtype": obj["/Subtype"], "location": obj["/Rect"]}
print(annotation)
Reading the most common ones is described here.
from pypdf import PdfReader
reader = PdfReader("example.pdf")
for page in reader.pages:
if "/Annots" in page:
for annot in page["/Annots"]:
subtype = annot.get_object()["/Subtype"]
if subtype == "/Text":
print(annot.get_object()["/Contents"])
from pypdf import PdfReader
reader = PdfReader("commented.pdf")
for page in reader.pages:
if "/Annots" in page:
for annot in page["/Annots"]:
subtype = annot.get_object()["/Subtype"]
if subtype == "/Highlight":
coords = annot.get_object()["/QuadPoints"]
x1, y1, x2, y2, x3, y3, x4, y4 = coords
from pypdf import PdfReader
reader = PdfReader("example.pdf")
attachments = {}
for page in reader.pages:
if "/Annots" in page:
for annotation in page["/Annots"]:
subtype = annot.get_object()["/Subtype"]
if subtype == "/FileAttachment":
fileobj = annotobj["/FS"]
attachments[fileobj["/F"]] = fileobj["/EF"]["/F"].get_data()