In [2]:
import layoutparser as lp
import cv2
import fitz
import numpy as np
from PIL import Image
import os

def detect_elements(image):
    model = lp.Detectron2LayoutModel('lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config',
                                     extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.4],
                                     label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"})
    layout = model.detect(image)
    table_blocks = lp.Layout([b for b in layout if b.type == "Table"])
    figure_blocks = lp.Layout([b for b in layout if b.type == "Figure"])
    return table_blocks, figure_blocks

def convert_to_fitz_coordinates(blocks, scale_factor):
    fitz_coordinates = []
    for block in blocks:
        x1, y1, x2, y2 = block.coordinates
        fitz_coords = fitz.Rect(
            x1 * scale_factor, 
            y1 * scale_factor, 
            x2 * scale_factor, 
            y2 * scale_factor
        )
        fitz_coordinates.append(fitz_coords)
    return fitz_coordinates

def process_pdf(pdf_path):
    # Open the PDF
    doc = fitz.open(pdf_path)
    
    for page_num in range(len(doc)):
        # Convert PDF page to image
        pix = doc[page_num].get_pixmap(matrix=fitz.Matrix(2, 2))
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        img_np = np.array(img)
        
        # Detect tables and figures
        table_blocks, figure_blocks = detect_elements(img_np)
        
        # Convert coordinates
        scale_factor = 1 / 2  # because we used fitz.Matrix(2, 2) earlier
        table_coordinates = convert_to_fitz_coordinates(table_blocks, scale_factor)
        figure_coordinates = convert_to_fitz_coordinates(figure_blocks, scale_factor)
        
        # Draw rectangles on the page
        for rect in table_coordinates:
            doc[page_num].draw_rect(rect, color=(1, 0, 0), width=2)  # Red for tables
        for rect in figure_coordinates:
            doc[page_num].draw_rect(rect, color=(0, 0, 1), width=2)  # Blue for figures
    
    # Save the modified PDF
    output_path = "output_" + os.path.basename(pdf_path)
    doc.save(output_path)
    doc.close()
    
    return output_path

# Usage
pdf_path = "/Users/pratham/Desktop/Bynd/PDF-AI3/reports/sunPharmaReport_removed.pdf"
output_pdf = process_pdf(pdf_path)
print(f"Processed PDF saved as: {output_pdf}")

The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generat

Processed PDF saved as: output_sunPharmaReport_removed.pdf
