In [None]:
import fitz  # PyMuPDF
import os
import io
from PIL import Image
import numpy as np
from skimage.metrics import structural_similarity as ssim
import matplotlib.pyplot as plt

def read_file_from_local(path):
    try:
        with open(path, 'rb') as f:
            file_contents = f.read()
        return file_contents
    except IOError as e:
        print(f"Error reading file: {e}")
        return ""

def compute_ssim(img1, img2):
    """
    Compute the Structural Similarity Index (SSIM) between two images.

    Args:
        img1 (numpy array): First image
        img2 (numpy array): Second image

    Returns:
        float: SSIM score between 0 and 1
        numpy array: Difference image
    """
    ssim_score, diff = ssim(img1, img2, full=True)
    return ssim_score, diff

def extract_images_from_pdf(pdf_path):
    """
    Extract images from each page of a PDF.

    Args:
        pdf_path (str): Path to the PDF file

    Returns:
        list: List of images extracted from the PDF
    """
    images = []
    try:
        pdf_document = fitz.open(pdf_path)
        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
    except Exception as e:
        print(f"Error extracting images from PDF: {e}")
    return images

def compare_pdfs(pdf1_path, pdf2_path):
    """
    Compare two PDFs by comparing images extracted from each page.

    Args:
        pdf1_path (str): Path to the first PDF
        pdf2_path (str): Path to the second PDF

    Returns:
        bool: True if the PDFs are identical, False otherwise
    """
    try:
        images1 = extract_images_from_pdf(pdf1_path)
        images2 = extract_images_from_pdf(pdf2_path)

        if len(images1) != len(images2):
            print("PDFs have different number of pages.")
            return False

        for img1, img2 in zip(images1, images2):
            img1 = np.array(img1.convert('L'))
            img2 = np.array(img2.convert('L'))

            ssim_score, _ = compute_ssim(img1, img2)
            if ssim_score < 0.99:  # adjust the threshold as needed
                return False

        return True
    except Exception as e:
        print(f"Error comparing PDFs: {e}")
        return False

# Example usage for comparing PDFs
pdf1_path = r"C:\Users\acer\Downloads\document1.pdf"
pdf2_path = r"C:\Users\acer\Downloads\document2.pdf"

are_identical = compare_pdfs(pdf1_path, pdf2_path)
print(f"PDFs are identical: {are_identical}")