In [1]:
import fitz  # PyMuPDF

def fill_pdf_fields(pdf_path, output_path, field_data):
    pdf_document = fitz.open(pdf_path)
    types = set()
    
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        
        # Iterate over the fillable fields
        widgets = page.widgets()
        if widgets:
            for widget in widgets:
                field_type = widget.field_type
                field_name = widget.field_name
                types.add(field_type)
                
                if field_type:  # Check if it's a fillable field
                    if field_name in field_data:  # Check if the field name is in field_data
                        field_value = field_data[field_name]  # Get the value from field_data
                        
                        if field_type != 2:  # Not a checkbox
                            widget.field_value = field_value  # Update the field value
                        else:
                            # Checkbox handling
                            if field_value.lower() in ["yes", "true", "1"]:
                                widget.field_value = "1"  # Check the checkbox
                            else:
                                widget.field_value = "0"  # Uncheck the checkbox
                        
                        widget.update()  # Apply the update to the widget
    
    pdf_document.save(output_path)
    pdf_document.close()
    print(types)
    print(f"PDF saved to {output_path}")

# Example usage
if __name__ == "__main__":
    pdf_path = "C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf"
    output_path = "C:/Users/sekhar/Downloads/test2.pdf"

    field_data = {
        "SUSPECT ADVERSE REACTION REPORT": "Adverse Reaction Report by raja sekhar",
        "1a COUNTRY": 'INDIA',
        "7  13 DESCRIBE REACTIONS including relevant testslab data": "Patient experienced severe headache and nausea.",
        "PatientDied": "yes",
        "3_Sex": "Male",
        "InpatientHospitalisation": "no",
        "Persistence_Incapacity": "yes",
        "LifeThreatening": "No",
        "14_SuspectDrugs": "Drug A",
        "15_DailyDoses": "2 times a day",
        "16_RoutesOfAdmin": "Oral",
    }
    
    fill_pdf_fields(pdf_path, output_path, field_data)


{2, 3, 5, 7}
PDF saved to C:/Users/sekhar/Downloads/test2.pdf


In [6]:
import pymupdf
doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf') # open a document
out = open('output.txt', 'wb')
for page in doc:
    text = page.get_text().encode('utf-8') # get plain text (is in UTF-8)
    out.write(text) # write text of page
    out.write(bytes((12,)))  # write page delimeter (from feed 0*0C)
out.close()
print(out)

<_io.BufferedWriter name='output.txt'>


In [8]:
import pymupdf

doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf')

for page_index in range(len(doc)):
    page = doc[page_index]
    image_list = page.get_images()

    if image_list:
        print(f'found {len(image_list)} images on page {page_index}')
    else:
        print('No images found on page', page_index)
    
    for image_index, img in enumerate(image_list, start=1): # enumerate the image list
        xref = img[0] # get the XREF of the image
        pix = pymupdf.Pixmap(doc, xref) # create a Pixmap

        if pix.n - pix.alpha > 3: # CMYK: convert to RGB first
            pix = pymupdf.Pixmap(pymupdf.csRGB, pix)

        pix.save("page_%s-image_%s.png" % (page_index, image_index)) # save the image as png
        pix = None

No images found on page 0
found 1 images on page 1
No images found on page 2
No images found on page 3
No images found on page 4
No images found on page 5


In [9]:
import pymupdf
from pprint import pprint

doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf')
page = doc[1]
tabs = page.find_tables()
print(f"{len(tabs.tables)} found on {page} ")

if tabs.tables:
    pprint(tabs[0].extract())


0 found on page 1 of C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf 


In [10]:
import pymupdf

for page in doc:
    for annot in page.annots():
        print(f'Annotation on page: {page.number} with type: {annot.type} and rect: {annot.rect}')

In [11]:
import pymupdf

# Open the PDF document
doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf')

# Iterate over each page of the document
for page in doc:
    # Find all instances of "Jane Doe" on the current page
    instances = page.search_for("PATIENT DIED")

    # Redact each instance of "Jane Doe" on the current page
    for inst in instances:
        page.add_redact_annot(inst)

    # Apply the redactions to the current page
    page.apply_redactions()

# Save the modified document
doc.save('redacted_document.pdf')

# Close the document
doc.close()

In [12]:
import pymupdf

# Open the PDF document
doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf')

# Get the first page
page = doc[0]

# Add an area to redact
rect = [0,0,200,200]

# Add a redacction annotation which will have a red fill color
page.add_redact_annot(rect, fill=(1,0,0))

# Apply the redactions to the current page, but ignore vector graphics
page.apply_redactions(graphics=0)

# Save the modified document
doc.save('redactied_document.pdf')

# Close the document
doc.close()

In [14]:
import pymupdf  # import the bindings
# fname = sys.argv[1]  # get filename from command line
doc = pymupdf.open('C:/Users/sekhar/Downloads/Fillable-Form_CIOMS-to-E2B.pdf')  # open document
for page in doc:  # iterate through the pages
    pix = page.get_pixmap()  # render page to an image
    pix.save("page-%i.png" % page.number)  # store image as a PNG

In [12]:
import pymupdf

# Open the PDF document
doc = pymupdf.open("D:\Sekhar\Fillable-Form_CIOMS-to-E2B.pdf")

# Get the first page
page = doc[1]
print(page.rect)

Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562)
