# Main

In [29]:
# pip install pillow
# pip install PyMuPDF

In [1]:
import fitz  # PyMuPDF
from PIL import Image
import os

def pdf_to_images(pdf_path, output_format="jpg"):
    # Check if the file exists
    if not os.path.exists(pdf_path):
        print(f"The file '{pdf_path}' does not exist.")
        return

    # Extract the PDF file name (without extension)
    pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]

    # Determine output folder and image format
    if output_format.lower() == "png":
        output_folder = f"{pdf_name}_png"
        output_format_1 = "png"
    else:
        output_folder = f"{pdf_name}_jpg"
        output_format_1 = "jpeg"

    os.makedirs(output_folder, exist_ok=True)

    # Open the PDF file
    pdf_document = fitz.open(pdf_path)

    # Iterate through each page in the PDF
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document[page_number]

        # Convert the page to an image
        image = page.get_pixmap()

        # Create an Image object from the image data
        img = Image.frombytes("RGB", [image.width, image.height], image.samples)

        # Save the image
        image_filename = f"{output_folder}/page_{page_number + 1}.{output_format}"
        img.save(image_filename, format=output_format_1.upper())

        print(f"Page {page_number + 1} saved as {image_filename}")

    # Close the PDF file
    pdf_document.close()

In [3]:
pdf_to_images("LT E-Bill Tehsil Office, Ramtek.pdf", output_format="jpg")

Page 1 saved as LT E-Bill Tehsil Office, Ramtek_jpg/page_1.jpg
Page 2 saved as LT E-Bill Tehsil Office, Ramtek_jpg/page_2.jpg
Page 3 saved as LT E-Bill Tehsil Office, Ramtek_jpg/page_3.jpg


## Rough

In [8]:
import fitz  # PyMuPDF
from PIL import Image
import os

def pdf_to_jpg(pdf_path):
    # Check if the file exists
    if not os.path.exists(pdf_path):
        print(f"The file '{pdf_path}' does not exist.")
        return

    # Extract the PDF file name (without extension)
    pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]

    # Create output folder
    output_folder = f"{pdf_name}_jpg"
    os.makedirs(output_folder, exist_ok=True)

    # Open the PDF file
    pdf_document = fitz.open(pdf_path)

    # Iterate through each page in the PDF
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document[page_number]

        # Convert the page to an image
        image = page.get_pixmap()

        # Create an Image object from the image data
        img = Image.frombytes("RGB", [image.width, image.height], image.samples)

        # Save the image as a JPEG file
        image_filename = f"{output_folder}/page_{page_number + 1}.jpg"
        img.save(image_filename)

        print(f"Page {page_number + 1} saved as {image_filename}")

    # Close the PDF file
    pdf_document.close()

# if __name__ == "__main__":
#     # Replace 'input.pdf' with the path to your PDF file
#     pdf_path = "input.pdf"

#     pdf_to_jpg(pdf_path)

In [9]:
pdf_to_jpg("HT_LTIP E-Bill suncity .pdf")

Page 1 saved as HT_LTIP E-Bill suncity _jpg/page_1.jpg
Page 2 saved as HT_LTIP E-Bill suncity _jpg/page_2.jpg
Page 3 saved as HT_LTIP E-Bill suncity _jpg/page_3.jpg
Page 4 saved as HT_LTIP E-Bill suncity _jpg/page_4.jpg


In [2]:
import PyPDF2
from PIL import Image
import os

def pdf_to_jpg(pdf_path):
    # Check if the file exists
    if not os.path.exists(pdf_path):
        print(f"The file '{pdf_path}' does not exist.")
        return

    # Extract the PDF file name (without extension)
    pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]

    # Create output folder
    output_folder = f"{pdf_name}_jpg"
    os.makedirs(output_folder, exist_ok=True)

    # Open the PDF file
    with open(pdf_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)

        # Iterate through each page in the PDF
        for page_number in range(len(pdf_reader.pages)):
            # Get the page
            page = pdf_reader.pages[page_number]
            
           


In [4]:
def pdf_to_png(pdf_path):
    # Check if the file exists
    if not os.path.exists(pdf_path):
        print(f"The file '{pdf_path}' does not exist.")
        return

    # Extract the PDF file name (without extension)
    pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]

    # Create output folder
    output_folder = f"{pdf_name}_png"
    os.makedirs(output_folder, exist_ok=True)

    # Open the PDF file
    pdf_document = fitz.open(pdf_path)

    # Iterate through each page in the PDF
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document[page_number]

        # Convert the page to an image
        image = page.get_pixmap()

        # Create an Image object from the image data
        img = Image.frombytes("RGB", [image.width, image.height], image.samples)

        # Save the image as a PNG file
        image_filename = f"{output_folder}/page_{page_number + 1}.png"
        img.save(image_filename, format="PNG")

        print(f"Page {page_number + 1} saved as {image_filename}")

    # Close the PDF file
    pdf_document.close()

In [10]:
pdf_to_png("HT_LTIP E-Bill suncity .pdf")

Page 1 saved as HT_LTIP E-Bill suncity _png/page_1.png
Page 2 saved as HT_LTIP E-Bill suncity _png/page_2.png
Page 3 saved as HT_LTIP E-Bill suncity _png/page_3.png
Page 4 saved as HT_LTIP E-Bill suncity _png/page_4.png


In [27]:
pdf_to_images("HT_LTIP E-Bill Vipul gupta.pdf", output_format="png")

Page 1 saved as HT_LTIP E-Bill Vipul gupta_png/page_1.png
Page 2 saved as HT_LTIP E-Bill Vipul gupta_png/page_2.png
Page 3 saved as HT_LTIP E-Bill Vipul gupta_png/page_3.png
Page 4 saved as HT_LTIP E-Bill Vipul gupta_png/page_4.png
