In [1]:
!pip install pillow
!pip install python-docx
!pip install pymupdf
!pip install spire Spire.Doc

Collecting python-docx
  Using cached python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Using cached python_docx-1.1.2-py3-none-any.whl (244 kB)
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2
Collecting pymupdf
  Downloading PyMuPDF-1.24.9-cp310-none-win_amd64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.24.9 (from pymupdf)
  Downloading PyMuPDFb-1.24.9-py3-none-win_amd64.whl.metadata (1.4 kB)
Downloading PyMuPDF-1.24.9-cp310-none-win_amd64.whl (3.2 MB)
   ---------------------------------------- 0.0/3.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.2 MB 165.2 kB/s eta 0:00:20
   ---------------------------------------- 0.0/3.2 MB 187.9 kB/s eta 0:00:17
   ---------------------------------------- 0.0/3.2 MB 187.9 kB/s eta 0:00:17
    --------------------------------------- 0.0/3.2 MB 164.3 kB/s eta 0:00:20
    --------------------------------------- 0.0/3.2 

In [4]:
import os
import fitz  # PyMuPDF
from PIL import Image, ImageOps, ImageChops

def create_output_folder():
    """
    Create an 'out' folder in the current directory if it doesn't already exist.

    Returns:
    - output_folder: The path to the created 'out' folder.
    """
    output_folder = os.path.join(os.getcwd(), r"C:\Users\Hami\Desktop\pdf-2-img-out")
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    return output_folder

def convert_image_to_jpeg(input_path, output_folder):
    """
    Convert an image file to JPEG format and save it to the output folder.

    Args:
    - input_path: The path to the input image file.
    - output_folder: The path to the output folder.
    """
    image = Image.open(input_path)
    image = image.convert('RGB')  # Convert the image to RGB format
    output_path = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_path))[0]}.jpg")
    image.save(output_path, 'JPEG')  # Save the image as JPEG

def trim_whitespace(image):
    """
    Trim the white space around the image.

    Args:
    - image: The image to be trimmed.

    Returns:
    - trimmed_image: The trimmed image.
    """
    # Convert image to grayscale
    gray_image = ImageOps.grayscale(image)
    # Invert the image
    inverted_image = ImageChops.invert(gray_image)
    # Get bounding box of non-black areas
    bbox = inverted_image.getbbox()
    # Crop the image to the bounding box
    trimmed_image = image.crop(bbox)
    return trimmed_image

def convert_pdf_to_images(pdf_path, output_folder, zoom=2):
    """
    Convert each page of a PDF to a separate JPEG image, crop it to remove white space, and save it to the output folder.

    Args:
    - pdf_path: The path to the input PDF file.
    - output_folder: The path to the output folder.
    - zoom: The zoom factor for the PDF pages (default is 2).
    """
    try:
        pdf_document = fitz.open(pdf_path)
        name_with_extension = os.path.basename(pdf_path)
        name = os.path.splitext(name_with_extension)[0]

        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)  # Load the specified page
            matrix = fitz.Matrix(zoom, zoom)  # Create a transformation matrix for zooming
            pix = page.get_pixmap(matrix=matrix)  # Render the page to an image

            # Convert the pixmap to a PIL image
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            # Trim the white space around the image
            trimmed_image = trim_whitespace(image)

            output_path = os.path.join(output_folder, f"{name}_page_{page_num + 1}.jpg")
            trimmed_image.save(output_path, 'JPEG')  # Save the image as JPEG

        print(f'Successfully converted {pdf_path} to images in {output_folder}')
    except Exception as e:
        print(f'Error converting {pdf_path}: {e}')
        print("\nTroubleshooting steps:")
        print("- Ensure the PDF file is not corrupted by opening it in a PDF viewer.")
        print("- Verify that the PDF file is accessible and not locked by another process.")
        print("- Check if other PDF files in the same directory are converted successfully.")

def convert_file(input_path):
    """
    Convert an input file to JPEG format based on its file extension.

    Args:
    - input_path: The path to the input file.
    """
    file_extension = os.path.splitext(input_path)[1].lower()
    output_folder = create_output_folder()

    if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
        convert_image_to_jpeg(input_path, output_folder)
    elif file_extension == '.pdf':
        convert_pdf_to_images(input_path, output_folder)
    else:
        print(f"File format {file_extension} is not supported.")

if __name__ == "__main__":
    input_files = [r"C:\Users\Hami\Desktop\check-ds\Scan_20240729 (2).pdf",r"C:\Users\Hami\Desktop\check-ds\Scan_20240729 (3).pdf",r"C:\Users\Hami\Desktop\check-ds\Scan_20240729 (4).pdf",r"C:\Users\Hami\Desktop\check-ds\Scan_20240729 (5).pdf",r"C:\Users\Hami\Desktop\check-ds\Scan_20240729.pdf"]
    for file in input_files:
        convert_file(file)


Successfully converted C:\Users\Hami\Desktop\check-ds\Scan_20240729 (2).pdf to images in C:\Users\Hami\Desktop\pdf-2-img-out
Successfully converted C:\Users\Hami\Desktop\check-ds\Scan_20240729 (3).pdf to images in C:\Users\Hami\Desktop\pdf-2-img-out
Successfully converted C:\Users\Hami\Desktop\check-ds\Scan_20240729 (4).pdf to images in C:\Users\Hami\Desktop\pdf-2-img-out
Successfully converted C:\Users\Hami\Desktop\check-ds\Scan_20240729 (5).pdf to images in C:\Users\Hami\Desktop\pdf-2-img-out
Successfully converted C:\Users\Hami\Desktop\check-ds\Scan_20240729.pdf to images in C:\Users\Hami\Desktop\pdf-2-img-out


In [10]:
import os
import re
import pytesseract
from PIL import Image, ExifTags

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def correct_image_orientation(image_path, output_folder):
    try:
        image = Image.open(image_path).convert("RGBA")

        if image.info.get('dpi', (0, 0))[0] == 0:  # If DPI is invalid, set it to a standard value
            image.info['dpi'] = (300, 300)

        try:
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            exif = dict(image._getexif().items())
            if exif[orientation] == 3:
                image = image.rotate(180, expand=True)
            elif exif[orientation] == 6:
                image = image.rotate(270, expand=True)
            elif exif[orientation] == 8:
                image = image.rotate(90, expand=True)
        except (AttributeError, KeyError, IndexError):
            pass

        osd = pytesseract.image_to_osd(image)
        rotation = int(re.search('(?<=Rotate: )\d+', osd).group(0))

        if rotation == 0:
            print(f"{image_path}: yes")
        else:
            print(f"{image_path}: no")
            if rotation == 90:
                image = image.rotate(270, expand=True)
            elif rotation == 180:
                image = image.rotate(180, expand=True)
            elif rotation == 270:
                image = image.rotate(90, expand=True)

        # Remove fully transparent areas
        bbox = image.getbbox()
        cropped_image = image.crop(bbox)

        # Convert RGBA to RGB if needed
        if cropped_image.mode == 'RGBA':
            cropped_image = cropped_image.convert('RGB')

        corrected_image_path = os.path.join(output_folder, os.path.basename(image_path))
        cropped_image.save(corrected_image_path)

        return corrected_image_path
    except pytesseract.TesseractError as e:
        print(f"Error processing {image_path}: {e}")
        return None

def process_directory(directory):
    output_folder = os.path.join(os.getcwd(), r"C:\Users\Hami\Desktop\rotated_image")
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpeg', '.jpg', '.bmp', '.gif')):
            correct_image_orientation(file_path, output_folder)

if __name__ == "__main__":
    input_directory = r"C:\Users\Hami\Desktop\pdf-2-img-out"
    process_directory(input_directory)


C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_1.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_10.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_11.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_12.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_13.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_2.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_3.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_4.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_5.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_6.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_7.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_8.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (2)_page_9.jpg: no
C:\Users\Hami\Desktop\pdf-2-img-out\Scan_20240729 (3)_page_1.jpg: no
C:\Users\Hami\Desktop\pdf-2-im

In [8]:
!pip install --trusted-host https://mirror-pypi.runflare.com --index-url https://mirror-pypi.runflare.com/simple/ paddleocr paddlepaddle pillow numpy


Looking in indexes: https://mirror-pypi.runflare.com/simple/
Collecting paddlepaddle
  Downloading https://mirror-pypi.runflare.com/packages/39/5b/7aa9df3ad815dd594791f6daf3af2e19294da2628a0d91f80f22f4e8dd3d/paddlepaddle-2.6.1-cp39-cp39-win_amd64.whl.metadata
     - 0 bytes ? 0:00:00
     - 8.8 kB ? 0:00:00
Collecting httpx (from paddlepaddle)
  Downloading https://mirror-pypi.runflare.com/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting astor (from paddlepaddle)
  Downloading https://mirror-pypi.runflare.com/packages/c3/88/97eef84f48fa04fbd6750e62dcceafba6c63c81b7ac1420856c8dcc0a3f9/astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting opt-einsum==3.3.0 (from paddlepaddle)
  Downloading https://mirror-pypi.runflare.com/packages/bc/19/404708a7e54ad2798907210462fd950c3442ea51acc8790f3da48d2bee8b/opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting protobuf<=3.20.2,>=3.1.0 (from paddle

In [2]:
import os
import numpy as np
import easyocr
from PIL import Image

def crop_image_based_on_text(image_path, output_folder, border=150):
    try:
        # Load image using PIL
        image = Image.open(image_path).convert("RGB")
        image_np = np.array(image)

        # Initialize EasyOCR reader
        reader = easyocr.Reader(['en', 'fa'], gpu=True)

        # Detect text regions
        results = reader.readtext(image_np, detail=1)

        # Get bounding box of all detected text regions
        if results:
            x_min = min([min(box[0][0], box[1][0], box[2][0], box[3][0]) for box, _, _ in results])
            y_min = min([min(box[0][1], box[1][1], box[2][1], box[3][1]) for box, _, _ in results])
            x_max = max([max(box[0][0], box[1][0], box[2][0], box[3][0]) for box, _, _ in results])
            y_max = max([max(box[0][1], box[1][1], box[2][1], box[3][1]) for box, _, _ in results])

            # Add border to the bounding box
            x_min = max(x_min - border, 0)
            y_min = max(y_min - border, 0)
            x_max = min(x_max + border, image.width)
            y_max = min(y_max + border, image.height)

            # Crop the image
            cropped_image = image.crop((x_min, y_min, x_max, y_max))

            # Save the cropped image without rotating
            output_path = os.path.join(output_folder, os.path.basename(image_path))
            cropped_image.save(output_path)
            print(f"Cropped image saved to {output_path}")
        else:
            print(f"No text found in {image_path}")

    except Exception as e:
        print(f"Error processing {image_path}: {e}")

def process_directory(directory, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpeg', '.jpg', '.bmp', '.gif')):
            crop_image_based_on_text(file_path, output_folder)

if __name__ == "__main__":
    input_dir = r"C:\Users\Hami\Desktop\rotated_image"
    output_dir = r"C:\Users\Hami\Desktop\final_image"

    process_directory(input_dir, output_dir)


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_1.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_10.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_11.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_12.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_13.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_2.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_3.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_4.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_5.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_6.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_7.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_8.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_9.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_1.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_10.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_11.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_12.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_13.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_14.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_2.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_3.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_4.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_5.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_6.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_7.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_8.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_9.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_1.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_10.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_11.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_12.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_13.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_14.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_15.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_16.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_17.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_18.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_19.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_2.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_20.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_21.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_22.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_23.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_24.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_25.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_3.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_4.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_5.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_6.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_7.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_8.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (4)_page_9.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_1.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_10.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_11.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_12.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_13.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_14.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_15.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_16.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_17.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_18.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_19.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_2.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_20.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_21.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_22.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_23.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_24.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_25.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_26.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_27.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_28.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_29.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_3.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_30.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_31.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_32.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_33.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_34.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_35.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_36.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_37.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_38.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_39.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_4.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_40.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_41.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_42.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_43.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_44.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_45.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_46.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_47.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_48.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_49.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_5.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_50.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_6.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_7.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_8.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729 (5)_page_9.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_10.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_2.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_3.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_4.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_5.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_6.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_7.jpg


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_8.jpg
Cropped image saved to C:\Users\Hami\Desktop\final_image\Scan_20240729_page_9.jpg


In [6]:
!pip install pytesseract

Collecting pytesseract
  Using cached pytesseract-0.3.10-py3-none-any.whl.metadata (11 kB)
Using cached pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10




In [7]:
import os
import re
import pytesseract
from PIL import Image, ExifTags

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def correct_image_orientation(image_path, output_folder):
    try:
        image = Image.open(image_path).convert("RGBA")

        if image.info.get('dpi', (0, 0))[0] == 0:  # If DPI is invalid, set it to a standard value
            image.info['dpi'] = (300, 300)

        try:
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            exif = dict(image._getexif().items())
            if exif[orientation] == 3:
                image = image.rotate(180, expand=True)
            elif exif[orientation] == 6:
                image = image.rotate(270, expand=True)
            elif exif[orientation] == 8:
                image = image.rotate(90, expand=True)
        except (AttributeError, KeyError, IndexError):
            pass

        osd = pytesseract.image_to_osd(image)
        rotation = int(re.search('(?<=Rotate: )\d+', osd).group(0))

        if rotation == 0:
            print(f"{image_path}: yes")
        else:
            print(f"{image_path}: no")
            if rotation == 90:
                image = image.rotate(270, expand=True)
            elif rotation == 180:
                image = image.rotate(180, expand=True)
            elif rotation == 270:
                image = image.rotate(90, expand=True)

        # Remove fully transparent areas
        bbox = image.getbbox()
        cropped_image = image.crop(bbox)

        # Convert RGBA to RGB if needed
        if cropped_image.mode == 'RGBA':
            cropped_image = cropped_image.convert('RGB')

        corrected_image_path = os.path.join(output_folder, os.path.basename(image_path))
        cropped_image.save(corrected_image_path)

        return corrected_image_path
    except pytesseract.TesseractError as e:
        print(f"Error processing {image_path}: {e}")
        return None

def process_directory(directory):
    output_folder = os.path.join(os.getcwd(), r"C:\Users\Hami\Desktop\frotated_image")
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpeg', '.jpg', '.bmp', '.gif')):
            correct_image_orientation(file_path, output_folder)

if __name__ == "__main__":
    input_directory = r"C:\Users\Hami\Desktop\final_image"
    process_directory(input_directory)


C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_1.jpg: no
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_10.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_11.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_12.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_13.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_2.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_3.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_4.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_5.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_6.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_7.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_8.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (2)_page_9.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_20240729 (3)_page_1.jpg: yes
C:\Users\Hami\Desktop\final_image\Scan_202407