<a href="https://colab.research.google.com/github/mghulamqadir/pdf-to-images-tool/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip -q install pymupdf pillow

import shutil, zipfile
from pathlib import Path
import fitz
from PIL import Image as PILImage
import ipywidgets as widgets
from IPython.display import display, clear_output, Image as IPyImage, HTML
from google.colab import files

# ---------- Core conversion ----------
def pdf_to_compressed_jpgs(pdf_path: Path, out_dir: Path, zoom: float, jpg_quality: int, max_width, prefix: str):
    if out_dir.exists():
        shutil.rmtree(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    doc = fitz.open(str(pdf_path))
    matrix = fitz.Matrix(float(zoom), float(zoom))

    image_paths = []
    for i in range(len(doc)):
        page = doc[i]
        pix = page.get_pixmap(matrix=matrix, alpha=False)
        img = PILImage.frombytes("RGB", (pix.width, pix.height), pix.samples)

        if max_width and img.width > max_width:
            new_h = int(img.height * (max_width / img.width))
            img = img.resize((max_width, new_h), PILImage.LANCZOS)

        out_file = out_dir / f"{prefix}_{i+1:03d}.jpg"
        img.save(out_file, "JPEG", quality=int(jpg_quality), optimize=True, progressive=True)
        image_paths.append(out_file)

    doc.close()
    return image_paths

def make_zip(image_paths, zip_path: Path):
    if zip_path.exists():
        zip_path.unlink()
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for p in image_paths:
            zf.write(p, arcname=p.name)
    return zip_path

def unique_name(existing: set[str], filename: str) -> str:
    """Make filename unique like 'file.pdf' -> 'file (2).pdf'."""
    p = Path(filename)
    stem, suffix = p.stem, p.suffix
    if filename not in existing:
        return filename
    k = 2
    while True:
        candidate = f"{stem} ({k}){suffix}"
        if candidate not in existing:
            return candidate
        k += 1

# ---------- UI ----------
title = widgets.HTML("<h3>üìÑ‚û°Ô∏èüñºÔ∏è PDF ‚Üí Separate JPG per Page (Supports duplicate names)</h3>")

uploader = widgets.FileUpload(accept=".pdf", multiple=True)
pdf_selector = widgets.Dropdown(options=[], description="PDF")
clear_btn = widgets.Button(description="Clear uploads", button_style="warning", icon="trash")

zoom_slider = widgets.FloatSlider(value=2.0, min=1.0, max=4.0, step=0.25, description="Zoom", continuous_update=False)
quality_slider = widgets.IntSlider(value=65, min=30, max=95, step=1, description="JPG Quality", continuous_update=False)
max_width_dd = widgets.Dropdown(
    options=[("No resize", None), ("1200px (smaller)", 1200), ("1600px (balanced)", 1600), ("2000px (clearer)", 2000)],
    value=1600,
    description="Max Width",
)
prefix_txt = widgets.Text(value="page", description="Prefix")
preview_count = widgets.IntSlider(value=10, min=1, max=50, step=1, description="Preview", continuous_update=False)

convert_btn = widgets.Button(description="Convert & Preview", button_style="success", icon="play")
download_zip_btn = widgets.Button(description="Download ZIP", button_style="primary", icon="download")
download_zip_btn.disabled = True

status_out = widgets.Output()
preview_out = widgets.Output()

# State keeps OUR OWN uploads (so duplicates work)
state = {
    "uploads": {},        # display_name -> {"orig_name": str, "content": bytes}
    "selected": None,
    "zip_path": None,
    "image_paths": []
}

def refresh_dropdown(select_latest=True):
    names = list(state["uploads"].keys())
    pdf_selector.options = names
    if names and select_latest:
        pdf_selector.value = names[-1]
        state["selected"] = names[-1]

def on_upload_change(change):
    # Copy uploaded files into our own state with unique names
    if not uploader.value:
        return

    existing = set(state["uploads"].keys())

    for orig_name, item in uploader.value.items():
        display_name = unique_name(existing, orig_name)
        existing.add(display_name)
        state["uploads"][display_name] = {"orig_name": orig_name, "content": item["content"]}

    # Clear uploader so user can upload more (even with same names) without overwriting
    uploader.value.clear()

    refresh_dropdown(select_latest=True)

    with status_out:
        clear_output()
        print(f"‚úÖ Stored uploads: {len(state['uploads'])}")
        print(f"Selected: {state['selected']}")

def on_pdf_select(change):
    state["selected"] = change["new"]

def render_preview(paths, n, thumb_width=420):
    with preview_out:
        clear_output()
        if not paths:
            print("No images yet.")
            return
        n = min(n, len(paths))
        display(HTML(f"<b>Showing {n} of {len(paths)} images</b><br><br>"))
        for i in range(n):
            display(HTML(f"<div style='font-weight:600;margin:10px 0 6px 0;'>üñºÔ∏è {paths[i].name}</div>"))
            display(IPyImage(filename=str(paths[i]), width=thumb_width))

def on_convert(_):
    with status_out:
        clear_output()

        if not state["uploads"] or not state["selected"]:
            print("‚ùå Upload PDF(s) first.")
            return

        selected = state["selected"]
        pdf_bytes = state["uploads"][selected]["content"]
        pdf_path = Path(selected)  # use display name so duplicates are unique on disk
        pdf_path.write_bytes(pdf_bytes)

        prefix = (prefix_txt.value.strip().replace(" ", "_") or "page")
        out_dir = Path("compressed_images")
        zip_path = Path(f"{pdf_path.stem}_images.zip")

        print(f"‚úÖ Using: {selected}")
        print("üîÑ Converting...")

        paths = pdf_to_compressed_jpgs(
            pdf_path=pdf_path,
            out_dir=out_dir,
            zoom=zoom_slider.value,
            jpg_quality=quality_slider.value,
            max_width=max_width_dd.value,
            prefix=prefix,
        )

        make_zip(paths, zip_path)

        state["zip_path"] = zip_path
        state["image_paths"] = paths
        download_zip_btn.disabled = False

        size_mb = zip_path.stat().st_size / (1024 * 1024)
        print(f"‚úÖ Done! Pages: {len(paths)}")
        print(f"üßæ ZIP ready: {zip_path.name} ({size_mb:.2f} MB)")

    render_preview(state["image_paths"], preview_count.value)

def on_preview_change(change):
    if change["name"] == "value" and state["image_paths"]:
        render_preview(state["image_paths"], change["new"])

def on_download_zip(_):
    if state["zip_path"] and Path(state["zip_path"]).exists():
        files.download(str(state["zip_path"]))

def on_clear(_):
    state["uploads"].clear()
    state["selected"] = None
    state["zip_path"] = None
    state["image_paths"] = []
    pdf_selector.options = []
    download_zip_btn.disabled = True
    with status_out:
        clear_output()
        print("üßπ Cleared uploads and results.")
    with preview_out:
        clear_output()

uploader.observe(on_upload_change, names="value")
pdf_selector.observe(on_pdf_select, names="value")
preview_count.observe(on_preview_change, names="value")
convert_btn.on_click(on_convert)
download_zip_btn.on_click(on_download_zip)
clear_btn.on_click(on_clear)

ui = widgets.VBox([
    title,
    widgets.HTML("<b>1) Upload PDF(s)</b>"),
    uploader,
    widgets.HBox([pdf_selector, clear_btn]),
    widgets.HTML("<b>2) Settings</b>"),
    widgets.HBox([zoom_slider, quality_slider]),
    widgets.HBox([max_width_dd, prefix_txt]),
    preview_count,
    widgets.HBox([convert_btn, download_zip_btn]),
    status_out,
    widgets.HTML("<hr><b>Preview</b>"),
    preview_out
])

display(ui)


VBox(children=(HTML(value='<h3>üìÑ‚û°Ô∏èüñºÔ∏è PDF ‚Üí Separate JPG per Page (Supports duplicate names)</h3>'), HTML(value‚Ä¶