In [1]:
import fitz  # PyMuPDF 라이브러리
import os

In [2]:
def extract_images_from_pdf(pdf_path, output_dir="extracted_images"):
    """PyMuPDF를 사용하여 PDF에서 모든 이미지를 추출합니다."""
    # 결과 이미지를 저장할 폴더 생성
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # PDF 파일 열기
    doc = fitz.open(pdf_path)
    image_count = 0

    # 모든 페이지를 순회
    for page_num, page in enumerate(doc):
        # 페이지에서 이미지 리스트 가져오기
        image_list = page.get_images(full=True)

        # 각 이미지에 대해 작업
        for img_index, img in enumerate(image_list):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]

            # 이미지 파일로 저장
            image_filename = f"image_{page_num+1}_{img_index+1}.{image_ext}"
            image_path = os.path.join(output_dir, image_filename)
            
            with open(image_path, "wb") as image_file:
                image_file.write(image_bytes)
            
            image_count += 1
            print(f"저장 완료: {image_path}")

    print(f"\n총 {image_count}개의 이미지를 추출했습니다.")
    doc.close()

In [3]:
pdf_file_path = r"C:\ITStudy\100_SelfProject\2nd_TechSeminar\Data\CRAFT.pdf"
extract_images_from_pdf(pdf_file_path)

저장 완료: extracted_images\image_1_1.jpeg
저장 완료: extracted_images\image_1_2.png
저장 완료: extracted_images\image_1_3.jpeg
저장 완료: extracted_images\image_1_4.jpeg
저장 완료: extracted_images\image_1_5.png
저장 완료: extracted_images\image_1_6.jpeg
저장 완료: extracted_images\image_1_7.png
저장 완료: extracted_images\image_1_8.jpeg
저장 완료: extracted_images\image_1_9.jpeg
저장 완료: extracted_images\image_3_1.png
저장 완료: extracted_images\image_3_2.jpeg
저장 완료: extracted_images\image_3_3.png
저장 완료: extracted_images\image_3_4.png
저장 완료: extracted_images\image_3_5.png
저장 완료: extracted_images\image_3_6.jpeg
저장 완료: extracted_images\image_4_1.png
저장 완료: extracted_images\image_4_2.png
저장 완료: extracted_images\image_4_3.png
저장 완료: extracted_images\image_4_4.png
저장 완료: extracted_images\image_4_5.png
저장 완료: extracted_images\image_4_6.png
저장 완료: extracted_images\image_4_7.png
저장 완료: extracted_images\image_4_8.png
저장 완료: extracted_images\image_4_9.png
저장 완료: extracted_images\image_4_10.png
저장 완료: extracted_images\image_4_11.png
저장

표 이미지는 추출 불가!