In [19]:
import uuid
from pdf2image import convert_from_path
import pytesseract as tess
from pathlib import Path
from PIL import Image
import tempfile
import shutil
import fitz

class PDFDrawing: 
    """
    This is a PDF Drawing class.
    """ 
    def __init__ (self, path): 
        self.path : str = path
        self.temp_folder: Path = Path(tempfile.gettempdir())
        self.temp_file_path = self.__copy_file_to_temp()
        self.doc : fitz.Document = fitz.open(self.temp_file_path) # type: ignore
        self.temp_image_path = self.__convert_to_png()
        
    def __copy_file_to_temp(self) -> Path: 
        filename = self.__generate_random_filename()
        output_path = Path.joinpath(self.temp_folder, filename + ".pdf")
        shutil.copyfile(src=self.path, dst=output_path)
        return output_path
    
    def __generate_random_filename(self) -> str:
        return f"{uuid.uuid4()}.png"
    
    def __convert_to_png (self) -> Path: 
        images = convert_from_path(pdf_path=self.temp_file_path, dpi=500)
        filename = Path(self.temp_file_path).stem
        output_path = Path.joinpath(Path(self.temp_folder), filename + ".png")
        images[0].save(fp=output_path, format="PNG")
        return output_path
    
    def get_width (self) -> int: 
        return self.doc[0].rect.width
    
    def get_height (self) -> int: 
        return self.doc[0].rect.height
    
    def get_page_orientation(self) -> str: 
        assert self.get_width() > 0, "Width is not greater than 0"
        assert self.get_height() > 0, "Height is not greater than 0"
        
        if self.get_width() < self.get_height(): 
            return "vertical"
        else: 
            return "horizontal"
    
    def get_text_orientation(self) -> int:
        my_image = Image.open(self.temp_image_path)
        osd = tess.image_to_osd(my_image).split()
        orientation = int(osd[8])
        return orientation
    
    def get_plan_type(self) -> str: 
        page_orientation = self.get_page_orientation()
        
        if page_orientation == "vertical": 
            if self.get_text_orientation() == 0: 
                return "vertical plan and horizontal text"
            elif self.get_text_orientation() == 90: 
                return "vertical plan and vertical text"
            else:
                raise Exception("Plan type not allowed")
            
        elif page_orientation == "horizontal":
            raise Exception("Plan type not allowed")
        
        else: 
            raise Exception("Plan type not allowed")
            

In [20]:
path = "./inputs/114319E.PDF"

pdf_drawing = PDFDrawing(path=path)

In [18]:
pdf_drawing.get_width(), pdf_drawing.get_height()

(612.0, 792.0)

In [13]:
pdf_drawing.get_page_orientation()

'vertical'

In [21]:
pdf_drawing.get_text_orientation()

90

In [15]:
pdf_drawing.get_plan_type()

'vertical plan and vertical text'