In [4]:
import fitz  # PyMuPDF
import os

def extract_images_from_pdf(input_pdf, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Open the PDF file
    pdf_document = fitz.open(input_pdf)

    image_counter = 0

    # Iterate over PDF pages
    for page_number in range(len(pdf_document)):
        page = pdf_document[page_number]
        image_list = page.get_images(full=True)

        # Print the number of images found on the page
        print(f"[INFO] Found {len(image_list)} images on page {page_number + 1}")

        for img_index, img in enumerate(image_list):
            xref = img[0]
            base_image = pdf_document.extract_image(xref)
            image_bytes = base_image["image"]

            # Determine image extension
            image_ext = base_image["ext"]
            image_filename = f"{output_folder}/page_{page_number + 1}_img_{image_counter + 1}.{image_ext}"

            with open(image_filename, "wb") as image_file:
                image_file.write(image_bytes)

            print(f"[INFO] Saved image: {image_filename}")
            image_counter += 1




In [5]:
input_pdf_path = "urban.pdf"
output_folder_path = "D:/VSCODE/INTEREXT/cat/out"
extract_images_from_pdf(input_pdf_path, output_folder_path)

[INFO] Found 1 images on page 1
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_1_img_1.jpeg
[INFO] Found 1 images on page 2
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_2_img_2.jpeg
[INFO] Found 346 images on page 3
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_3.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_4.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_5.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_6.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_7.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_8.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_9.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_10.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_11.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_12.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_13.png
[INFO] Saved image: D:/VSCODE/INTEREXT/cat/out/page_3_img_14.png