# PyMuPDF
#### Pulled every page successfully - however the images need to be a smaller file size

In [4]:
import os
import fitz  # PyMuPDF
import io
from PIL import Image

In [5]:
# Output directory for the extracted images
output_dir = "extracted_images"
# Desired output image format
output_format = "png"
# Minimum width and height for extracted images
min_width = 100
min_height = 100
# Create the output directory if it does not exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [7]:
# file path you want to extract images from
file = "Largen & Spawls 2010 part VI snakes.pdf"
# open the file
pdf_file = fitz.open(file)

In [8]:
# Iterate over PDF pages
for page_index in range(len(pdf_file)):
    # Get the page itself
    page = pdf_file[page_index]
    # Get image list
    image_list = page.get_images(full=True)
    # Print the number of images found on this page
    if image_list:
        print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
    else:
        print(f"[!] No images found on page {page_index}")
    # Iterate over the images on the page
    for image_index, img in enumerate(image_list, start=1):
        # Get the XREF of the image
        xref = img[0]
        # Extract the image bytes
        base_image = pdf_file.extract_image(xref)
        image_bytes = base_image["image"]
        # Get the image extension
        image_ext = base_image["ext"]
        # Load it to PIL
        image = Image.open(io.BytesIO(image_bytes))
        # Check if the image meets the minimum dimensions and save it
        if image.width >= min_width and image.height >= min_height:
            image.save(
                open(os.path.join(output_dir, f"image{page_index + 1}_{image_index}.{output_format}"), "wb"),
                format=output_format.upper())
        else:
            print(f"[-] Skipping image {image_index} on page {page_index} due to its small size.")


[+] Found a total of 1 images in page 0
[+] Found a total of 1 images in page 1
[+] Found a total of 1 images in page 2
[+] Found a total of 1 images in page 3
[+] Found a total of 1 images in page 4
[+] Found a total of 1 images in page 5
[+] Found a total of 1 images in page 6
[+] Found a total of 1 images in page 7
[+] Found a total of 1 images in page 8
[+] Found a total of 1 images in page 9
[+] Found a total of 1 images in page 10
[+] Found a total of 1 images in page 11
[+] Found a total of 1 images in page 12
[+] Found a total of 1 images in page 13
[+] Found a total of 1 images in page 14
[+] Found a total of 1 images in page 15
[+] Found a total of 1 images in page 16
[+] Found a total of 1 images in page 17
[+] Found a total of 1 images in page 18
[+] Found a total of 1 images in page 19
[+] Found a total of 1 images in page 20
[+] Found a total of 1 images in page 21
[+] Found a total of 1 images in page 22
[+] Found a total of 1 images in page 23
[+] Found a total of 1 ima