In [77]:
from result import Ok, Err, Result
from pdf2image import convert_from_path
import pytesseract as tess
from pathlib import Path
from PIL import Image
import tempfile
import shutil
import fitz
import uuid
import os

TEMP_FOLDER: Path = Path(tempfile.gettempdir())

def generate_random_filename() -> str:
    return f"{uuid.uuid4()}"

def copy_file_to_temp(file_path: Path) -> Path: 
    filename = generate_random_filename()
    output_path = Path.joinpath(TEMP_FOLDER, filename + ".pdf")
    shutil.copyfile(src=file_path, dst=output_path)
    return output_path
    
def convert_to_png (temp_file_path: Path) -> Path: 
    images = convert_from_path(pdf_path=temp_file_path, dpi=500)
    filename = Path(temp_file_path).stem
    output_path = Path.joinpath(Path(TEMP_FOLDER), filename + ".png")
    images[0].save(fp=output_path, format="PNG")
    return output_path

def get_width (doc: fitz.Document) -> int: 
    return doc[0].rect.width

def get_height (doc: fitz.Document) -> int: 
    return doc[0].rect.height

def get_page_orientation(doc: fitz.Document) -> Result[str,str]: 
    if get_width(doc) <= 0:
        return Err("Width is not greater than 0")
    elif get_height(doc) <= 0:
        return Err("Height is not greater than 0")
    elif get_width(doc) < get_height(doc): 
        return Ok("vertical")
    elif get_width(doc) > get_height(doc):
        return Ok("horizontal")
    else: 
        return Err("Page Dimensions are not valid")
    
def get_text_orientation(temp_image_path: Path) -> int:
    my_image = Image.open(temp_image_path)
    osd = tess.image_to_osd(my_image).split()
    orientation = int(osd[8])
    return orientation

def get_plan_type(path: Path) -> Result[str,str]:
    temp_file_path = copy_file_to_temp(file_path=path)
    temp_image_path = convert_to_png(temp_file_path=temp_file_path)
    doc = fitz.open(temp_file_path)
    result_page_orientation = get_page_orientation(doc=doc)
    
    text_orientation = get_text_orientation(temp_image_path=temp_image_path)
    
    if result_page_orientation.is_ok():
        if result_page_orientation.value == "vertical": 
            if text_orientation == 0: 
                return Ok("vertical plan and horizontal text")
            elif text_orientation == 90: 
                return Ok("vertical plan and vertical text")
            else:
                return Err(f"Plan type is vertical, but text orientation {text_orientation} and it'is not allowed")
            
        elif result_page_orientation.value == "horizontal":
            return Err("Plan type is horizontal")
        else: 
            return Err("Plan type not allowed")
    else: 
        return Err(result_page_orientation.value)

def get_file_paths_from_folder_path(folder_path: Path) -> list:
    files_path = []
    for file in os.listdir(folder_path):
        if file.endswith(".pdf") or file.endswith(".PDF"):
            files_path.append(Path.joinpath(Path(folder_path), file))
    return files_path


In [78]:
pdf_file_path = get_file_paths_from_folder_path(Path("./inputs"))

# use path from pdf_file_path and get the result using get_plan_type and print it with the name of the path
for path in pdf_file_path:
    result = get_plan_type(path=path)
    if result.is_ok():
        print(f"{path.stem}: {result.value}")
    else:
        print(f"{path.stem}: {result.value}")

114319A: Plan type is horizontal
114319B: Plan type is vertical, but text orientation is not allowed
114319B_rotated: Plan type is horizontal
114319C: vertical plan and vertical text
114319D: vertical plan and vertical text
114319E: vertical plan and vertical text


In [72]:
temp_image_path

WindowsPath('C:/Users/joel_/AppData/Local/Temp/7b89207c-f897-4fa8-8b81-d80762e77f7f.png')

In [19]:
import uuid
from pdf2image import convert_from_path
import pytesseract as tess
from pathlib import Path
from PIL import Image
import tempfile
import shutil
import fitz

class PDFDrawing: 
    """
    This is a PDF Drawing class.
    """ 
    def __init__ (self, path): 
        self.path : str = path
        self.temp_folder: Path = Path(tempfile.gettempdir())
        self.temp_file_path = self.__copy_file_to_temp()
        self.doc : fitz.Document = fitz.open(self.temp_file_path) # type: ignore
        self.temp_image_path = self.__convert_to_png()
        
    def __copy_file_to_temp(self) -> Path: 
        filename = self.__generate_random_filename()
        output_path = Path.joinpath(self.temp_folder, filename + ".pdf")
        shutil.copyfile(src=self.path, dst=output_path)
        return output_path
    
    def __generate_random_filename(self) -> str:
        return f"{uuid.uuid4()}.png"
    
    def __convert_to_png (self) -> Path: 
        images = convert_from_path(pdf_path=self.temp_file_path, dpi=500)
        filename = Path(self.temp_file_path).stem
        output_path = Path.joinpath(Path(self.temp_folder), filename + ".png")
        images[0].save(fp=output_path, format="PNG")
        return output_path
    
    def get_width (self) -> int: 
        return self.doc[0].rect.width
    
    def get_height (self) -> int: 
        return self.doc[0].rect.height
    
    def get_page_orientation(self) -> str: 
        assert self.get_width() > 0, "Width is not greater than 0"
        assert self.get_height() > 0, "Height is not greater than 0"
        
        if self.get_width() < self.get_height(): 
            return "vertical"
        else: 
            return "horizontal"
    
    def get_text_orientation(self) -> int:
        my_image = Image.open(self.temp_image_path)
        osd = tess.image_to_osd(my_image).split()
        orientation = int(osd[8])
        return orientation
    
    def get_plan_type(self) -> str: 
        page_orientation = self.get_page_orientation()
        
        if page_orientation == "vertical": 
            if self.get_text_orientation() == 0: 
                return "vertical plan and horizontal text"
            elif self.get_text_orientation() == 90: 
                return "vertical plan and vertical text"
            else:
                raise Exception("Plan type not allowed")
            
        elif page_orientation == "horizontal":
            raise Exception("Plan type not allowed")
        
        else: 
            raise Exception("Plan type not allowed")
            

In [20]:
path = "./inputs/114319E.PDF"

pdf_drawing = PDFDrawing(path=path)

In [18]:
pdf_drawing.get_width(), pdf_drawing.get_height()

(612.0, 792.0)

In [13]:
pdf_drawing.get_page_orientation()

'vertical'

In [21]:
pdf_drawing.get_text_orientation()

90

In [15]:
pdf_drawing.get_plan_type()

'vertical plan and vertical text'