In [None]:
!pip install pymupdf
!pip install pillow



In [None]:
import fitz
from PIL import Image
from io import BytesIO

In [None]:
def compress_pdf(input_pdf, output_pdf, dpi=72, quality=50):
    """
    Compress a PDF by reducing image resolution and quality.

    Args:
        input_pdf (str): Path to the input PDF file.
        output_pdf (str): Path to save the compressed output PDF.
        dpi (int): Resolution for compressing images (default is 72).
        quality (int): JPEG quality for compressing images (default is 50, lower means more compression).
    """
    # Open the input PDF
    doc = fitz.open(input_pdf)

    # Create a new PDF document to store the compressed pages
    compressed_doc = fitz.open()

    # Iterate through each page in the document
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        # Create a new page in the compressed document with the same dimensions
        new_page = compressed_doc.new_page(width=page.rect.width, height=page.rect.height)

        # Get image list for each page
        image_list = page.get_images(full=True)

        # Compress each image on the page
        for img_index, img in enumerate(image_list):
            xref = img[0]  # XREF is the reference number for the image

            # Extract the image data
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]

            # Open the image using PIL for compression
            image = Image.open(BytesIO(image_bytes))

            # Downscale image dimensions based on DPI (reduce image resolution)
            original_size = image.size
            new_size = (int(original_size[0] * dpi / 300), int(original_size[1] * dpi / 300))
            image = image.resize(new_size, Image.Resampling.LANCZOS)  # Updated from Image.ANTIALIAS

            # Convert to RGB if necessary (many PDFs store images in different formats)
            if image.mode in ("RGBA", "P"):
                image = image.convert("RGB")

            # Compress image by reducing resolution and saving with adjusted DPI
            img_buffer = BytesIO()
            image.save(img_buffer, format="JPEG", quality=quality, dpi=(dpi, dpi))  # Compress the image
            img_buffer.seek(0)

            # Insert the compressed image into the new page
            new_page.insert_image(page.rect, stream=img_buffer.read())

    # Save the new compressed PDF
    compressed_doc.save(output_pdf)
    compressed_doc.close()
    doc.close()

    print(f"PDF compression completed. Saved as {output_pdf}")

In [None]:
# Example usage:
input_pdf = "/content/24102024163001(38mb).pdf"
output_pdf = "compressed_output.pdf"

# Call the function with the desired DPI and quality
desired_dpi = 72  # Adjust the DPI as needed (lower DPI means lower resolution)
jpeg_quality = 80  # Lower the quality for more compression (0 to 100)
compress_pdf(input_pdf, output_pdf, dpi=desired_dpi, quality=jpeg_quality)

PDF compression completed. Saved as compressed_output.pdf
