In [None]:
!pip install pymupdf pillow fpdf



With high quality and dpi üëá

In [None]:
# --- Install dependencies in Colab ---
!pip install pymupdf pillow fpdf tqdm

# --- Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive')

import fitz  # PyMuPDF
from PIL import Image, ImageEnhance
from fpdf import FPDF
import os
from tqdm import tqdm   # üëà Progress bar

def convert_to_scanned(input_pdf, output_folder=None):
    # Agar output folder nahi diya gaya to input ka hi folder use hoga
    folder = os.path.dirname(input_pdf) if output_folder is None else output_folder
    os.makedirs(folder, exist_ok=True)

    base_name = os.path.splitext(os.path.basename(input_pdf))[0]
    output_pdf = os.path.join(folder, f"{base_name}_scanned.pdf")

    # Input file open
    if not os.path.exists(input_pdf):
        raise FileNotFoundError(f"‚ö†Ô∏è File not found: {input_pdf}")

    doc = fitz.open(input_pdf)
    pdf = FPDF()

    print(f"üìÑ Total pages: {len(doc)}")
    print("üîÑ Converting to scanned style...")

    # Progress bar lagaya
    for page_num in tqdm(range(len(doc)), desc="Processing Pages", unit="page"):
        page = doc.load_page(page_num)

        # Render page to image
        pix = page.get_pixmap(dpi=200)
        img_path = os.path.join(folder, f"page_{page_num+1}.jpg")
        pix.save(img_path)

        # Open image with Pillow
        img = Image.open(img_path).convert("L")  # grayscale
        # Enhance contrast
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(1.5)
        # Adjust brightness
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(1.2)

        # Save compressed image
        img.save(img_path, "JPEG", quality=70)

        # Add image into PDF (A4 size)
        pdf.add_page()
        pdf.image(img_path, 0, 0, 210, 297)

        os.remove(img_path)  # cleanup

    pdf.output(output_pdf, "F")
    print(f"‚úÖ Scanner-style PDF saved in Drive: {output_pdf}")
    return output_pdf


# ==== Run ====
input_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi.pdf"
output_folder = "/content/drive/MyDrive/books"

convert_to_scanned(input_pdf, output_folder)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üìÑ Total pages: 187
üîÑ Converting to scanned style...


Processing Pages: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 187/187 [02:00<00:00,  1.55page/s]


‚úÖ Scanner-style PDF saved in Drive: /content/drive/MyDrive/books/Aik Mohabbat Aur Sahi_scanned.pdf


'/content/drive/MyDrive/books/Aik Mohabbat Aur Sahi_scanned.pdf'

With low quality and dpi üëá

In [None]:
# --- Install dependencies in Colab ---
!pip install pymupdf pillow fpdf tqdm

# --- Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive')

import fitz  # PyMuPDF
from PIL import Image, ImageEnhance
from fpdf import FPDF
import os
from tqdm import tqdm

def convert_to_scanned(input_pdf, output_folder=None, dpi=120, quality=40):
    folder = os.path.dirname(input_pdf) if output_folder is None else output_folder
    os.makedirs(folder, exist_ok=True)

    base_name = os.path.splitext(os.path.basename(input_pdf))[0]
    output_pdf = os.path.join(folder, f"{base_name}_scanned.pdf")

    if not os.path.exists(input_pdf):
        raise FileNotFoundError(f"‚ö†Ô∏è File not found: {input_pdf}")

    doc = fitz.open(input_pdf)
    pdf = FPDF()

    print(f"üìÑ Total pages: {len(doc)}")
    print("üîÑ Converting to scanned style with compression...")

    for page_num in tqdm(range(len(doc)), desc="Processing Pages", unit="page"):
        page = doc.load_page(page_num)

        # Render page to image with lower DPI
        pix = page.get_pixmap(dpi=dpi)
        img_path = os.path.join(folder, f"page_{page_num+1}.jpg")
        pix.save(img_path)

        # Open image in grayscale
        img = Image.open(img_path).convert("L")
        # Enhance contrast
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(1.3)
        # Adjust brightness
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(1.1)

        # Save with stronger compression
        img.save(img_path, "JPEG", quality=quality, optimize=True)

        # Add image into PDF (A4 size)
        pdf.add_page()
        pdf.image(img_path, 0, 0, 210, 297)

        os.remove(img_path)  # cleanup

    pdf.output(output_pdf, "F")
    print(f"‚úÖ Optimized scanned PDF saved in Drive: {output_pdf}")
    return output_pdf


# ==== Run ====
input_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi.pdf"
output_folder = "/content/drive/MyDrive/books"

convert_to_scanned(input_pdf, output_folder, dpi=120, quality=40)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üìÑ Total pages: 187
üîÑ Converting to scanned style with compression...


Processing Pages: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 187/187 [00:53<00:00,  3.49page/s]


‚úÖ Optimized scanned PDF saved in Drive: /content/drive/MyDrive/books/Aik Mohabbat Aur Sahi_scanned.pdf


'/content/drive/MyDrive/books/Aik Mohabbat Aur Sahi_scanned.pdf'

remove text hereüëá

In [21]:
import fitz  # PyMuPDF

def remove_url_text(input_pdf, output_pdf, target="Courtesy www.pdfbooksfree.pk"):
    doc = fitz.open(input_pdf)

    for page in doc:
        text_instances = page.search_for(target)
        for inst in text_instances:
            # White rectangle draw kar ke text cover kar do
            page.add_redact_annot(inst, fill=(1, 1, 1))
        page.apply_redactions()

    doc.save(output_pdf, garbage=4, deflate=True, clean=True)
    print(f"‚úÖ Removed all '{target}' from PDF: {output_pdf}")


# ==== Run ====
input_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi.pdf"
output_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi_clean.pdf"

remove_url_text(input_pdf, output_pdf)


‚úÖ Removed all 'Courtesy www.pdfbooksfree.pk' from PDF: /content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi_clean.pdf


compress pdf hereüëá

In [20]:
# Google Drive mount karna
from google.colab import drive
drive.mount('/content/drive')

# Ghostscript install
!apt-get -y install ghostscript

import subprocess

def compress_pdf(input_path, output_path, quality="screen"):
    """
    quality options: screen, ebook, printer, prepress, default
    """
    gs_command = [
        "gs",
        "-sDEVICE=pdfwrite",
        "-dCompatibilityLevel=1.4",
        f"-dPDFSETTINGS=/{quality}",
        "-dNOPAUSE",
        "-dQUIET",
        "-dBATCH",
        f"-sOutputFile={output_path}",
        input_path,
    ]
    subprocess.run(gs_command, check=True)
    print(f"‚úÖ Compressed PDF save ho gaya:\n{output_path}")


# === Example Run ===
input_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi_scanned.pdf"
output_pdf = "/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi (Compressed).pdf"

# Quality: screen (sabse chhota size), ebook (achhi quality), printer/prepress (zyaada quality, badi file)
compress_pdf(input_pdf, output_pdf, quality="printer")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ghostscript is already the newest version (9.55.0~dfsg1-0ubuntu5.13).
0 upgraded, 0 newly installed, 0 to remove and 38 not upgraded.
‚úÖ Compressed PDF save ho gaya:
/content/drive/MyDrive/urdunovelbank2/Urdunovelbank2/Hashim Nadeem novels/Aik Mohabbat Aur Sahi (Compressed).pdf


Ghostscript quality levels aur unka effect:

/screen ‚Üí sabse chhota size (72 dpi images, on-screen reading ke liye best)

/ebook ‚Üí thoda better quality (150 dpi), size medium

/printer ‚Üí print quality (300 dpi), size bada

/prepress ‚Üí high-end print (300+ dpi, color preserved), sabse bada size