Merge Pdf

In [None]:
import os
from PyPDF2 import PdfMerger
import ipywidgets as widgets
from IPython.display import display

# Folder containing the PDF files
folder_path = "FoSS"

# Get all PDF files in the folder
pdf_files = [f for f in os.listdir(folder_path) if f.endswith(".pdf")]

# Create a multiple selection widget
file_selector = widgets.SelectMultiple(
    options=pdf_files,
    value=[],
    description="PDF Files",
    rows=10
)

# Create an output filename input widget
output_name = widgets.Text(
    value="merged_output.pdf",
    description="Output File:",
    placeholder="Enter output file name"
)

# Create a button to trigger the merge
merge_button = widgets.Button(
    description="Merge PDFs",
    button_style="success"  # Green button
)

# Display widgets
display(file_selector, output_name, merge_button)

# Callback function for merging PDFs
def merge_pdfs(button):
    selected_files = list(file_selector.value)
    if not selected_files:
        print("No files selected!")
        return

    # Full paths for selected files
    full_paths = [os.path.join(folder_path, f) for f in selected_files]

    # Merge PDFs
    merger = PdfMerger()
    for file_path in full_paths:
        merger.append(file_path)

    output_path = os.path.join(folder_path, output_name.value)
    merger.write(output_path)
    merger.close()

    print(f"Merged PDFs into {output_path}")

# Attach the callback function to the button click event
merge_button.on_click(merge_pdfs)


Merged Motivation_upasana_Riedenhaldenstrasse47.pdf and UpasanaSwisscomContract.pdf into upasana_ridehnehaldenstrasse47_merged.pdf


Split PDF

In [None]:
from PyPDF2 import PdfReader, PdfWriter

input_pdf = "file.pdf"
output_pdf = "extracted_pages.pdf"

reader = PdfReader(input_pdf)
writer = PdfWriter()

# Extract specific pages (e.g., pages 2 to 4)
for page_num in range(1, 4):  # PyPDF2 is zero-indexed
    writer.add_page(reader.pages[page_num])

# Write the new PDF
with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"Extracted pages written to {output_pdf}")


Compress PDF

In [2]:
import pikepdf
import os

input_pdf = "upasana_ridehnehaldenstrasse47_merged.pdf"
output_pdf = f"compressed_{input_pdf}.pdf"

# Get the size of the original PDF
original_size = os.path.getsize(input_pdf)


# Open the PDF and remove unused objects
with pikepdf.open(input_pdf) as pdf:
    # Reduce file size by removing embedded fonts or unused objects
    pdf.save(output_pdf, linearize=True)

# Get the size of the compressed PDF
compressed_size = os.path.getsize(output_pdf)

# Print sizes
print(f"Original PDF size: {original_size / 1024:.2f} KB")
print(f"Compressed PDF size: {compressed_size / 1024:.2f} KB")
print(f"Compression reduced the size by {((original_size - compressed_size) / original_size) * 100:.2f}%")

Original PDF size: 386.45 KB
Compressed PDF size: 387.19 KB
Compression reduced the size by -0.19%


Extract Text from PDF

In [None]:
from PyPDF2 import PdfReader

input_pdf = "file.pdf"

reader = PdfReader(input_pdf)

for page_num, page in enumerate(reader.pages):
    print(f"--- Page {page_num + 1} ---")
    print(page.extract_text())


Add a Watermark to a PDF

In [None]:
from PyPDF2 import PdfReader, PdfWriter

input_pdf = "file.pdf"
watermark_pdf = "watermark.pdf"
output_pdf = "watermarked.pdf"

reader = PdfReader(input_pdf)
watermark = PdfReader(watermark_pdf).pages[0]  # Assume the watermark is on the first page

writer = PdfWriter()

for page in reader.pages:
    page.merge_page(watermark)  # Overlay the watermark
    writer.add_page(page)

with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"Watermarked PDF saved as {output_pdf}")


Rotate Pages

In [None]:
from PyPDF2 import PdfReader, PdfWriter

input_pdf = "file.pdf"
output_pdf = "rotated.pdf"

reader = PdfReader(input_pdf)
writer = PdfWriter()

# Rotate all pages 90 degrees clockwise
for page in reader.pages:
    page.rotate(90)  # Options: 90, 180, 270
    writer.add_page(page)

with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"Rotated PDF saved as {output_pdf}")


Encrypt/Password-Protect a PDF

In [None]:
from PyPDF2 import PdfWriter

input_pdf = "file.pdf"
output_pdf = "encrypted.pdf"
password = "mypassword"

writer = PdfWriter()
writer.append(input_pdf)
writer.encrypt(password)

with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"Encrypted PDF saved as {output_pdf}")


Decrypt a Password-Protected PDF

In [None]:
from PyPDF2 import PdfReader, PdfWriter

input_pdf = "encrypted.pdf"
output_pdf = "decrypted.pdf"
password = "mypassword"

reader = PdfReader(input_pdf)
reader.decrypt(password)  # Provide the correct password

writer = PdfWriter()
for page in reader.pages:
    writer.add_page(page)

with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"Decrypted PDF saved as {output_pdf}")


Add Metadata to a PDF

In [None]:
from PyPDF2 import PdfReader, PdfWriter

input_pdf = "file.pdf"
output_pdf = "metadata_added.pdf"

reader = PdfReader(input_pdf)
writer = PdfWriter()

writer.append(input_pdf)

# Add metadata
writer.add_metadata({
    "/Title": "My Document Title",
    "/Author": "Author Name",
    "/Subject": "PDF Metadata Example"
})

with open(output_pdf, "wb") as out_file:
    writer.write(out_file)

print(f"PDF with metadata saved as {output_pdf}")
