In [6]:
# Install required packages for Google Colab
!pip install gradio PyPDF2 reportlab

import gradio as gr
import PyPDF2
from PyPDF2 import PdfWriter, PdfReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.colors import Color
from reportlab.lib.units import inch
import io
import os
import tempfile
from typing import List, Tuple, Optional
import zipfile

class PDFToolkit:
    def __init__(self):
        pass

    def merge_pdfs(self, files: List) -> Tuple[Optional[str], str]:
        """Merge multiple PDF files into one"""
        if not files or len(files) < 2:
            return None, "Please upload at least 2 PDF files to merge."

        try:
            merger = PdfWriter()

            for file in files:
                reader = PdfReader(file.name)
                for page in reader.pages:
                    merger.add_page(page)

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                merger.write(output_file)

            return output_path, "PDFs merged successfully!"

        except Exception as e:
            return None, f"Error merging PDFs: {str(e)}"

    def split_pdf(self, file, pages_per_split: int = 1) -> Tuple[Optional[str], str]:
        """Split PDF into individual pages or groups of pages"""
        if not file:
            return None, "Please upload a PDF file to split."

        try:
            reader = PdfReader(file.name)
            total_pages = len(reader.pages)

            if pages_per_split <= 0:
                pages_per_split = 1

            # Create a zip file containing all split PDFs
            zip_path = tempfile.mktemp(suffix='.zip')

            with zipfile.ZipFile(zip_path, 'w') as zipf:
                for i in range(0, total_pages, pages_per_split):
                    writer = PdfWriter()

                    # Add pages to current split
                    end_page = min(i + pages_per_split, total_pages)
                    for j in range(i, end_page):
                        writer.add_page(reader.pages[j])

                    # Write to temporary file
                    split_path = tempfile.mktemp(suffix='.pdf')
                    with open(split_path, 'wb') as split_file:
                        writer.write(split_file)

                    # Add to zip
                    split_name = f"split_{i+1}-{end_page}.pdf"
                    zipf.write(split_path, split_name)
                    os.unlink(split_path)

            return zip_path, f"PDF split into {(total_pages + pages_per_split - 1) // pages_per_split} files!"

        except Exception as e:
            return None, f"Error splitting PDF: {str(e)}"

    def remove_pages(self, file, pages_to_remove: str) -> Tuple[Optional[str], str]:
        """Remove specific pages from PDF"""
        if not file:
            return None, "Please upload a PDF file."

        try:
            reader = PdfReader(file.name)
            total_pages = len(reader.pages)

            # Parse pages to remove
            pages_to_remove_list = []
            for page_range in pages_to_remove.split(','):
                page_range = page_range.strip()
                if '-' in page_range:
                    start, end = map(int, page_range.split('-'))
                    pages_to_remove_list.extend(range(start-1, end))  # Convert to 0-based
                else:
                    pages_to_remove_list.append(int(page_range) - 1)  # Convert to 0-based

            writer = PdfWriter()
            for i, page in enumerate(reader.pages):
                if i not in pages_to_remove_list:
                    writer.add_page(page)

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

            return output_path, f"Removed pages: {pages_to_remove}. New PDF has {len(writer.pages)} pages."

        except Exception as e:
            return None, f"Error removing pages: {str(e)}"

    def compress_pdf(self, file) -> Tuple[Optional[str], str]:
        """Basic PDF compression by rewriting content"""
        if not file:
            return None, "Please upload a PDF file to compress."

        try:
            reader = PdfReader(file.name)
            writer = PdfWriter()

            for page in reader.pages:
                # Compress content streams
                page.compress_content_streams()
                writer.add_page(page)

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

            # Calculate size reduction
            original_size = os.path.getsize(file.name)
            compressed_size = os.path.getsize(output_path)
            reduction = (1 - compressed_size / original_size) * 100

            return output_path, f"PDF compressed! Size reduced by {reduction:.1f}%"

        except Exception as e:
            return None, f"Error compressing PDF: {str(e)}"

    def rotate_pages(self, file, rotation_angle: int, page_range: str = "all") -> Tuple[Optional[str], str]:
        """Rotate pages in PDF"""
        if not file:
            return None, "Please upload a PDF file to rotate."

        try:
            reader = PdfReader(file.name)
            writer = PdfWriter()
            total_pages = len(reader.pages)

            # Parse page range
            if page_range.lower() == "all":
                pages_to_rotate = list(range(total_pages))
            else:
                pages_to_rotate = []
                for page_spec in page_range.split(','):
                    page_spec = page_spec.strip()
                    if '-' in page_spec:
                        start, end = map(int, page_spec.split('-'))
                        pages_to_rotate.extend(range(start-1, end))
                    else:
                        pages_to_rotate.append(int(page_spec) - 1)

            for i, page in enumerate(reader.pages):
                if i in pages_to_rotate:
                    page.rotate(rotation_angle)
                writer.add_page(page)

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

            return output_path, f"Pages rotated by {rotation_angle} degrees!"

        except Exception as e:
            return None, f"Error rotating pages: {str(e)}"

    def extract_text(self, file) -> str:
        """Extract text from PDF"""
        if not file:
            return "Please upload a PDF file to extract text from."

        try:
            reader = PdfReader(file.name)
            text = ""

            for page_num, page in enumerate(reader.pages):
                text += f"\n--- Page {page_num + 1} ---\n"
                text += page.extract_text()
                text += "\n"

            return text.strip()

        except Exception as e:
            return f"Error extracting text: {str(e)}"

    def add_watermark(self, file, watermark_text: str, opacity: float = 0.3) -> Tuple[Optional[str], str]:
        """Add text watermark to PDF"""
        if not file:
            return None, "Please upload a PDF file to add watermark."

        if not watermark_text.strip():
            return None, "Please enter watermark text."

        try:
            reader = PdfReader(file.name)
            writer = PdfWriter()

            # Create watermark
            watermark_buffer = io.BytesIO()
            c = canvas.Canvas(watermark_buffer, pagesize=letter)

            # Set watermark properties - using RGB with opacity
            c.setFillColorRGB(0.5, 0.5, 0.5, alpha=opacity)  # Gray color with opacity
            c.setFont("Helvetica-Bold", 50)

            # Get text width for centering
            text_width = c.stringWidth(watermark_text, "Helvetica-Bold", 50)

            # Add watermark text diagonally across the page
            c.saveState()
            c.translate(letter[0]/2, letter[1]/2)  # Center of page
            c.rotate(45)  # 45 degree rotation
            c.drawString(-text_width/2, 0, watermark_text)  # Center the text
            c.restoreState()
            c.save()

            watermark_buffer.seek(0)
            watermark_reader = PdfReader(watermark_buffer)
            watermark_page = watermark_reader.pages[0]

            # Apply watermark to each page
            for page in reader.pages:
                page.merge_page(watermark_page)
                writer.add_page(page)

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

            return output_path, f"Watermark '{watermark_text}' added successfully!"

        except Exception as e:
            return None, f"Error adding watermark: {str(e)}"

    def rearrange_pages(self, file, new_order: str) -> Tuple[Optional[str], str]:
        """Rearrange pages in PDF according to specified order"""
        if not file:
            return None, "Please upload a PDF file to rearrange."

        try:
            reader = PdfReader(file.name)
            total_pages = len(reader.pages)

            # Parse new order
            page_order = []
            for page_num in new_order.split(','):
                page_num = page_num.strip()
                if page_num.isdigit():
                    page_idx = int(page_num) - 1  # Convert to 0-based
                    if 0 <= page_idx < total_pages:
                        page_order.append(page_idx)
                    else:
                        return None, f"Page {page_num} is out of range. PDF has {total_pages} pages."

            if not page_order:
                return None, "Please specify a valid page order."

            writer = PdfWriter()
            for page_idx in page_order:
                writer.add_page(reader.pages[page_idx])

            output_path = tempfile.mktemp(suffix='.pdf')
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

            return output_path, f"Pages rearranged! New order: {new_order}"

        except Exception as e:
            return None, f"Error rearranging pages: {str(e)}"

# Initialize the toolkit
toolkit = PDFToolkit()

# Create Gradio interface optimized for Colab
def create_interface():
    with gr.Blocks(title="PDF Toolkit - Personal iLovePDF Clone", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 📄 PDF Toolkit - Personal iLovePDF Clone")
        gr.Markdown("A comprehensive PDF manipulation tool running in Google Colab!")

        with gr.Tabs():
            # Merge PDFs Tab
            with gr.TabItem("🔗 Merge PDFs"):
                merge_files = gr.File(label="Upload PDF files to merge", file_count="multiple", file_types=[".pdf"])
                merge_btn = gr.Button("Merge PDFs", variant="primary")
                merge_output = gr.File(label="Download Merged PDF")
                merge_status = gr.Textbox(label="Status", interactive=False)

                merge_btn.click(
                    toolkit.merge_pdfs,
                    inputs=[merge_files],
                    outputs=[merge_output, merge_status]
                )

            # Split PDF Tab
            with gr.TabItem("✂️ Split PDF"):
                split_file = gr.File(label="Upload PDF to split", file_types=[".pdf"])
                split_pages = gr.Number(label="Pages per split", value=1, minimum=1)
                split_btn = gr.Button("Split PDF", variant="primary")
                split_output = gr.File(label="Download Split PDFs (ZIP)")
                split_status = gr.Textbox(label="Status", interactive=False)

                split_btn.click(
                    toolkit.split_pdf,
                    inputs=[split_file, split_pages],
                    outputs=[split_output, split_status]
                )

            # Remove Pages Tab
            with gr.TabItem("🗑️ Remove Pages"):
                remove_file = gr.File(label="Upload PDF", file_types=[".pdf"])
                pages_to_remove = gr.Textbox(
                    label="Pages to remove",
                    placeholder="e.g., 1,3,5-7 (page numbers separated by commas, ranges with hyphens)"
                )
                remove_btn = gr.Button("Remove Pages", variant="primary")
                remove_output = gr.File(label="Download Modified PDF")
                remove_status = gr.Textbox(label="Status", interactive=False)

                remove_btn.click(
                    toolkit.remove_pages,
                    inputs=[remove_file, pages_to_remove],
                    outputs=[remove_output, remove_status]
                )

            # Compress PDF Tab
            with gr.TabItem("🗜️ Compress PDF"):
                compress_file = gr.File(label="Upload PDF to compress", file_types=[".pdf"])
                compress_btn = gr.Button("Compress PDF", variant="primary")
                compress_output = gr.File(label="Download Compressed PDF")
                compress_status = gr.Textbox(label="Status", interactive=False)

                compress_btn.click(
                    toolkit.compress_pdf,
                    inputs=[compress_file],
                    outputs=[compress_output, compress_status]
                )

            # Rotate Pages Tab
            with gr.TabItem("🔄 Rotate Pages"):
                rotate_file = gr.File(label="Upload PDF", file_types=[".pdf"])
                rotation_angle = gr.Dropdown(
                    choices=[90, 180, 270],
                    label="Rotation angle",
                    value=90
                )
                rotate_pages = gr.Textbox(
                    label="Pages to rotate",
                    value="all",
                    placeholder="'all' or page numbers like '1,3,5-7'"
                )
                rotate_btn = gr.Button("Rotate Pages", variant="primary")
                rotate_output = gr.File(label="Download Rotated PDF")
                rotate_status = gr.Textbox(label="Status", interactive=False)

                rotate_btn.click(
                    toolkit.rotate_pages,
                    inputs=[rotate_file, rotation_angle, rotate_pages],
                    outputs=[rotate_output, rotate_status]
                )

            # Extract Text Tab
            with gr.TabItem("📝 Extract Text"):
                extract_file = gr.File(label="Upload PDF to extract text", file_types=[".pdf"])
                extract_btn = gr.Button("Extract Text", variant="primary")
                extracted_text = gr.Textbox(label="Extracted Text", lines=10, max_lines=20)

                extract_btn.click(
                    toolkit.extract_text,
                    inputs=[extract_file],
                    outputs=[extracted_text]
                )

            # Add Watermark Tab
            with gr.TabItem("💧 Add Watermark"):
                watermark_file = gr.File(label="Upload PDF", file_types=[".pdf"])
                watermark_text = gr.Textbox(label="Watermark text", placeholder="CONFIDENTIAL")
                watermark_opacity = gr.Slider(
                    label="Opacity",
                    minimum=0.1,
                    maximum=1.0,
                    value=0.3,
                    step=0.1
                )
                watermark_btn = gr.Button("Add Watermark", variant="primary")
                watermark_output = gr.File(label="Download Watermarked PDF")
                watermark_status = gr.Textbox(label="Status", interactive=False)

                watermark_btn.click(
                    toolkit.add_watermark,
                    inputs=[watermark_file, watermark_text, watermark_opacity],
                    outputs=[watermark_output, watermark_status]
                )

            # Rearrange Pages Tab
            with gr.TabItem("🔀 Rearrange Pages"):
                rearrange_file = gr.File(label="Upload PDF", file_types=[".pdf"])
                new_order = gr.Textbox(
                    label="New page order",
                    placeholder="e.g., 3,1,4,2 (page numbers in desired order)"
                )
                rearrange_btn = gr.Button("Rearrange Pages", variant="primary")
                rearrange_output = gr.File(label="Download Rearranged PDF")
                rearrange_status = gr.Textbox(label="Status", interactive=False)

                rearrange_btn.click(
                    toolkit.rearrange_pages,
                    inputs=[rearrange_file, new_order],
                    outputs=[rearrange_output, rearrange_status]
                )

        # Add Colab-specific information
        gr.Markdown("""
        ## 💡 Google Colab Tips:
        - **File Upload**: Click the upload areas to select files from your computer
        - **Downloads**: Processed files will be available for download directly in the interface
        - **Large Files**: Colab has memory limits, so very large PDFs might timeout
        - **Page numbers**: Always use 1-based numbering (first page is 1, not 0)
        - **Sharing**: The interface is running locally in your Colab environment

        ## 📋 Usage Examples:
        - **Remove pages**: "1,3,5-7" removes pages 1, 3, and pages 5 through 7
        - **Rotate pages**: "all" rotates all pages, or "1,3-5" for specific pages
        - **Rearrange**: "3,1,4,2" puts page 3 first, then page 1, then 4, then 2
        - **Split**: Set to 1 for individual pages, higher for groups
        """)

    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_interface()
    # For Colab, use share=True to get a public link that works with Colab's environment
    demo.launch(
        share=True,  # Creates a public shareable link for Colab
        debug=True   # Helpful for debugging in Colab
    )

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://8419a7edb5430bdacd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://8419a7edb5430bdacd.gradio.live
