# Setup

Start with create the function for exporting pdf.

In [1]:
import os
import fitz  # PyMuPDF
from PIL import Image


def convert_pdf_to_jpg(pdf_file_path, output_folder, max_width, resolution):
    # Open the PDF document
    pdf_document = fitz.open(pdf_file_path)

    # Extract file name
    base_filename = os.path.splitext(os.path.basename(pdf_file_path))[0]

    # Convert each page to JPEG images with specific resolution
    for page_number in range(pdf_document.page_count):
        page = pdf_document[page_number]
        image = page.get_pixmap(matrix=fitz.Matrix(1, 1).prescale(resolution / 72, resolution / 72))

        # If current image width larger than max_width charge the size of converted image
        width, height = image.width, image.height

        if width > max_width:
            new_height = int(height * max_width / width)
            image = page.get_pixmap(matrix=fitz.Matrix(1, 1).prescale(resolution / 72, resolution / 72))

        # Generate image file name to save
        image_file_poth = os.path.join(output_folder, f"{base_filename}_page{page_number + 1}.jpg")

        # Save image
        image.save(image_file_poth)

    # Close the PDF document
    pdf_document.close()


def batch_convert_pdf_to_jpg(input_folder, output_folder, max_width, resolution):
    # Create output folder if not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Searching for all PDF files in input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".pdf"):
            pdf_file_path = os.path.join(input_folder, filename)

            # Call convert function for each file
            convert_pdf_to_jpg(pdf_file_path, output_folder, max_width, resolution)

            # log for convert file
            print(f"Converted {filename} to JPG images.")

Then set for the input and output locations.

In [3]:
input_folder = "datasets/pdf_input"  # Provide the path to the folder containing PDFs
output_folder = "fiftyone/coco_test"  # Provide the path to the output folder

max_width = 4961  # Specify the maximum image width in pixels, 4960 is A3 at 300 DPI
# Landscape A3 is 4960x3508 at 300 DPI
resolution = 300  # Specify the resolution in DPI

# Call function to convert PDF file in input folder
batch_convert_pdf_to_jpg(input_folder, output_folder, max_width, resolution)

Converted 14780-8120-25-22-0020_P&ID OXIDATION UNIT CHARCOAL ADSORBER B_Z1.pdf to JPG images.
Converted 14780-8120-25-22-0019_P&ID OXIDATION UNIT CHARCOAL ADSORBER A_Z1.pdf to JPG images.
Converted 14780-8120-25-21-0005_P&ID Q-MAX UNIT ALKYLATION REACTOR FEED HEATER_Z1.pdf to JPG images.
Converted 14780-8120-25-21-0003A_P&ID Q-MAX UNIT PROPYLENE FEED ARSINE GUARD BED_Z1.pdf to JPG images.
Converted 14780-8120-25-22-0021_P&ID OXIDATION UNIT CHARCOAL ADSORBER C_Z1.pdf to JPG images.
Converted 14780-8120-25-21-0006_P&ID Q-MAX UNIT ALKYLATION REACTOR FEED_Z1.pdf to JPG images.
Converted 14780-8120-25-21-0003_P&ID Q-MAX UNIT PROPYLENE FEED GUARD BEDS_Z1.pdf to JPG images.
Converted 14780-8120-25-22-0023_P&ID OXIDATION UNIT CHP SUMP_Z1.pdf to JPG images.
Converted 14780-8120-25-21-0004_P&ID Q-MAX UNIT PROPYLENE FEED SURGE DRUM_Z1.pdf to JPG images.
Converted 14780-8120-25-22-0022_P&ID OXIDATION UNIT CHP PROCESS WATER PUMPS_Z1.pdf to JPG images.


Create Coco object to generate json file for use in batch prediction.

In [4]:
# Create coco_dataset.json
# import utils
from sahi.utils.coco import Coco, CocoCategory, CocoImage, CocoAnnotation, CocoPrediction
from sahi.utils.file import load_json, save_json, list_files
from PIL import Image

# Function to determine image WxH
def get_image_size(image_path):
    with Image.open(image_path) as img:
        width, height = img.size
    return width, height

image_path = output_folder  # Get images path from output folder previously
image_files = list_files(output_folder, [".jpg", ".png"])  # work with JPG or PNG file
coco_dataset_json_template = "datasets/json/pid_dataset.json"  # Category Template for PID_dataset
coco_json_output = os.path.join(output_folder,"coco_dataset.json")  # COCO dataset to be saved in output folder

# Create coco object
coco = Coco()

# Add images to Coco dataset
image_files.sort()

for file in image_files:
    # Get file name
    filename = os.path.basename(file)

    # Split the file name and extension
    filename_without_extension, extension = os.path.splitext(filename)

    # Reconstruct file name without abosulte path
    filename = filename_without_extension + extension

    # Logging the file name
    print(filename)

    # Get the width and height of the image
    width, height = get_image_size(file)

    # Create CocoImage instant to add to dataset
    coco_img = CocoImage(file_name=filename, width=width, height=height)

    # Add image to coco dataset
    coco.add_image(coco_img)

# Create dataset for category loading
data = load_json(coco_dataset_json_template)
categories = data["categories"]

# Add category to coco dataset
for index, category in enumerate(categories):
    coco.add_category(CocoCategory(id=index, name=category["name"], supercategory=category["supercategory"]))

# Save JSON file for coco dataset
save_json(coco.json, coco_json_output)

There are 15 listed files in folder: coco_test/
14780-8120-25-21-0003A_P&ID Q-MAX UNIT PROPYLENE FEED ARSINE GUARD BED_Z1_page1.jpg
14780-8120-25-21-0003_P&ID Q-MAX UNIT PROPYLENE FEED GUARD BEDS_Z1_page1.jpg
14780-8120-25-21-0004_P&ID Q-MAX UNIT PROPYLENE FEED SURGE DRUM_Z1_page1.jpg
14780-8120-25-21-0005_P&ID Q-MAX UNIT ALKYLATION REACTOR FEED HEATER_Z1_page1.jpg
14780-8120-25-21-0006_P&ID Q-MAX UNIT ALKYLATION REACTOR FEED_Z1_page1.jpg
14780-8120-25-22-0004_P&ID OXIDATION UNIT FRESH FEED HEADER_Z1_page1.jpg
14780-8120-25-22-0005A_P&ID OXIDATION UNIT CAUSTIC WASH CIRCULATION PUMPS_Z1_page1.jpg
14780-8120-25-22-0005_P&ID OXIDATION UNIT FEED WASH COLUMN_Z1_page1.jpg
14780-8120-25-22-0006_P&ID OXIDATION UNIT COMBINED FEED SURGE DRUM_Z1_page1.jpg
14780-8120-25-22-0007_P&ID OXIDATION UNIT OXIDIZER AIR COMPRESSOR_Z1_page1.jpg
14780-8120-25-22-0019_P&ID OXIDATION UNIT CHARCOAL ADSORBER A_Z1_page1.jpg
14780-8120-25-22-0020_P&ID OXIDATION UNIT CHARCOAL ADSORBER B_Z1_page1.jpg
14780-8120-25-22

Import necessory modules.

In [5]:
# Import module
from sahi.predict import predict
import fiftyone as fo
import fiftyone.utils.coco as fouc
import fiftyone.utils.annotations as foua

08/22/2023 14:52:47 - INFO - httpx -   HTTP Request: POST https://www.google-analytics.com/collect "HTTP/1.1 200 OK"


Run the batech prediciton and get the result.

In [6]:
# Set up the arguments for sahi.prediction
source_path = output_folder
dataset_json_path = coco_json_output
model_type = "yolov8"
model_path = "yolo_weights/best_20230813.pt"
model_config_path = "datasets/yaml/dataset.yaml"
model_device = "cpu"
image_size = 640  # segmentation size

# get batch predict result
result = predict(
    model_type=model_type, # one of 'yolov5', 'mmdet', 'detectron2'
    model_path=model_path, # path to model weight file
    model_config_path=model_config_path, # for detectron2 and mmdet models
    model_confidence_threshold=0.1,
    model_device=model_device, # or 'cuda:0'
    source=source_path, # image or folder path
    no_standard_prediction=True,
    no_sliced_prediction=False,
    slice_height=image_size,
    slice_width=image_size,
    overlap_height_ratio=0.1, 
    overlap_width_ratio=0.1,
    export_pickle=False,
    export_crop=False,
    novisual=True,
    dataset_json_path=dataset_json_path,
    return_dict=True
)

# Set the prediction path for saving result
prediction_path = str(result["export_dir"]) + "/result.json"
print("The result is save to", prediction_path)

indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 15/15 [00:00<00:00, 43721.03it/s]


Performing inference on images:   0%|          | 0/15 [00:00<?, ?it/s]

Performing prediction on 54 number of slices.


Performing inference on images:   7%|▋         | 1/15 [00:23<05:22, 23.04s/it]

Prediction time is: 22597.80 ms


Performing inference on images:   7%|▋         | 1/15 [00:23<05:22, 23.04s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  13%|█▎        | 2/15 [00:34<03:32, 16.34s/it]

Prediction time is: 11273.80 ms


Performing inference on images:  13%|█▎        | 2/15 [00:35<03:32, 16.34s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  20%|██        | 3/15 [00:46<02:53, 14.42s/it]

Prediction time is: 11840.87 ms


Performing inference on images:  20%|██        | 3/15 [00:47<02:53, 14.42s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  27%|██▋       | 4/15 [00:59<02:31, 13.76s/it]

Prediction time is: 12397.18 ms


Performing inference on images:  27%|██▋       | 4/15 [00:59<02:31, 13.76s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  33%|███▎      | 5/15 [01:12<02:13, 13.34s/it]

Prediction time is: 12278.02 ms


Performing inference on images:  33%|███▎      | 5/15 [01:12<02:13, 13.34s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  40%|████      | 6/15 [01:24<01:55, 12.89s/it]

Prediction time is: 11740.65 ms


Performing inference on images:  40%|████      | 6/15 [01:24<01:55, 12.89s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  47%|████▋     | 7/15 [01:36<01:40, 12.55s/it]

Prediction time is: 11584.49 ms


Performing inference on images:  47%|████▋     | 7/15 [01:36<01:40, 12.55s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  53%|█████▎    | 8/15 [01:48<01:27, 12.45s/it]

Prediction time is: 11932.45 ms


Performing inference on images:  53%|█████▎    | 8/15 [01:48<01:27, 12.45s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  60%|██████    | 9/15 [02:00<01:14, 12.38s/it]

Prediction time is: 11932.80 ms


Performing inference on images:  60%|██████    | 9/15 [02:00<01:14, 12.38s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  67%|██████▋   | 10/15 [02:12<01:01, 12.32s/it]

Prediction time is: 11912.11 ms


Performing inference on images:  67%|██████▋   | 10/15 [02:13<01:01, 12.32s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  73%|███████▎  | 11/15 [02:25<00:50, 12.53s/it]

Prediction time is: 12608.49 ms


Performing inference on images:  73%|███████▎  | 11/15 [02:25<00:50, 12.53s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  80%|████████  | 12/15 [02:37<00:36, 12.33s/it]

Prediction time is: 11606.62 ms


Performing inference on images:  80%|████████  | 12/15 [02:37<00:36, 12.33s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  87%|████████▋ | 13/15 [02:49<00:24, 12.21s/it]

Prediction time is: 11659.95 ms


Performing inference on images:  87%|████████▋ | 13/15 [02:49<00:24, 12.21s/it]

Performing prediction on 54 number of slices.


Performing inference on images:  93%|█████████▎| 14/15 [03:02<00:12, 12.31s/it]

Prediction time is: 12299.70 ms


Performing inference on images:  93%|█████████▎| 14/15 [03:02<00:12, 12.31s/it]

Performing prediction on 54 number of slices.


Performing inference on images: 100%|██████████| 15/15 [03:14<00:00, 12.98s/it]

Prediction time is: 12357.03 ms
Prediction results are successfully exported to runs/predict/exp
The result is save to runs/predict/exp/result.json





Create FiftyOne dataset for view and exporting.

In [7]:
# Load COCO formatted dataset
coco_dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=source_path,
    labels_path=dataset_json_path,
    include_id=True,
)

# Verify that the class list for our dataset was imported
print(coco_dataset.default_classes)  # ['airplane', 'apple', ...]

# Add COCO prediction to 'predictions' field of dataset
classes = coco_dataset.default_classes
fouc.add_coco_labels(coco_dataset, "predictions", prediction_path, classes)

coco_dataset.save()

#Verify that predictions were added to image
print(coco_dataset.count("predictions"))

 100% |███████████████████| 15/15 [41.8ms elapsed, 0s remaining, 359.2 samples/s]     


08/22/2023 15:01:46 - INFO - eta.core.utils -    100% |███████████████████| 15/15 [41.8ms elapsed, 0s remaining, 359.2 samples/s]     


['angle valve', 'ball valve', 'butterfly valve', 'check valve', 'gate valve', 'globe valve', 'plug valve', 'safety valve', 'three way valve', 'control valve']
15


View dataset in FiftyOne app.

In [8]:
# Start the fiftyone session
session = fo.launch_app(coco_dataset)

print(session)

# Blocks execution until the App is closed
session.wait()

Dataset:          2023.08.22.15.01.45
Media type:       image
Num samples:      15
Selected samples: 0
Selected labels:  0
Session URL:      http://localhost:5151/
Notebook sessions cannot wait




In [9]:
session.close()

Export the result image.

In [10]:
# Set the path for saving inferenced result
draw_labels_path = output_folder + "/draw-labels"

# Customize annotation rendering
config = foua.DrawConfig(
    {
        "font_size": 5,
        "bbox_linewidth": 1,
        "show_object_names": False,
        "show_object_attrs": False,
        "show_all_confidences": False,
        "per_object_label_colors": True,
    }
)

# Save all inferenced images
coco_dataset.draw_labels(
    output_dir=draw_labels_path,
    label_fields=None,
    overwrite=True,
    config=config,
)

 100% |█████████████████████| 0/0 [16.5ms elapsed, ? remaining, ? samples/s] 


08/22/2023 15:05:44 - INFO - eta.core.utils -    100% |█████████████████████| 0/0 [16.5ms elapsed, ? remaining, ? samples/s] 


[]