From 5c9c0b19d26ae391aea0e69c2c819261cc04460c Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Fri, 20 Aug 2021 17:22:41 -0700 Subject: [PATCH] finish annotating layout --- pdfminer/layout.py | 69 ++++++++++++++++++++++++++----------------- pdfminer/pdfinterp.py | 14 ++++++--- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 634ba43a..7f34f160 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -5,8 +5,9 @@ from .utils import INF from .utils import Matrix -from .utils import Rect from .utils import Plane +from .utils import Point +from .utils import Rect from .utils import apply_matrix_pt from .utils import bbox2str from .utils import fsplit @@ -15,6 +16,7 @@ from .utils import uniq from .pdfcolor import PDFColorSpace from .pdftypes import PDFStream +from .pdfinterp import Color from .pdfinterp import PDFGraphicState from .pdffont import PDFFont @@ -66,13 +68,13 @@ class LAParams: """ def __init__(self, - line_overlap=0.5, - char_margin=2.0, - line_margin=0.5, - word_margin=0.1, - boxes_flow=0.5, - detect_vertical=False, - all_texts=False): + line_overlap: float = 0.5, + char_margin: float = 2.0, + line_margin: float = 0.5, + word_margin: float = 0.1, + boxes_flow: float = 0.5, + detect_vertical: bool = False, + all_texts: bool = False): self.line_overlap = line_overlap self.char_margin = char_margin self.line_margin = line_margin @@ -84,7 +86,7 @@ def __init__(self, self._validate() return - def _validate(self): + def _validate(self) -> None: if self.boxes_flow is not None: boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a " "number between -1 and +1") @@ -200,8 +202,10 @@ def voverlap(self, obj: "LTComponent") -> float: class LTCurve(LTComponent): """A generic Bezier curve""" - def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False, - stroking_color=None, non_stroking_color=None): + def __init__(self, linewidth: float, pts: List[Point], + stroke: bool = False, fill: bool = False, + evenodd: bool = False, stroking_color: Optional[Color] = None, + non_stroking_color: Optional[Color] = None): LTComponent.__init__(self, get_bound(pts)) self.pts = pts self.linewidth = linewidth @@ -212,7 +216,7 @@ def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False, self.non_stroking_color = non_stroking_color return - def get_pts(self): + def get_pts(self) -> str: return ','.join('%.3f,%.3f' % p for p in self.pts) @@ -222,8 +226,10 @@ class LTLine(LTCurve): Could be used for separating text or figures. """ - def __init__(self, linewidth, p0, p1, stroke=False, fill=False, - evenodd=False, stroking_color=None, non_stroking_color=None): + def __init__(self, linewidth: float, p0: Point, p1: Point, + stroke: bool = False, fill: bool = False, + evenodd: bool = False, stroking_color: Optional[Color] = None, + non_stroking_color: Optional[Color] = None): LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color) return @@ -235,8 +241,10 @@ class LTRect(LTCurve): Could be used for framing another pictures or figures. """ - def __init__(self, linewidth, bbox, stroke=False, fill=False, - evenodd=False, stroking_color=None, non_stroking_color=None): + def __init__(self, linewidth: float, bbox: Rect, + stroke: bool = False, fill: bool = False, + evenodd: bool = False, stroking_color: Optional[Color] = None, + non_stroking_color: Optional[Color] = None): (x0, y0, x1, y1) = bbox LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, @@ -405,7 +413,7 @@ class LTTextLine(LTTextContainer): the text's writing mode. """ - def __init__(self, word_margin): + def __init__(self, word_margin: float): LTTextContainer.__init__(self) self.word_margin = word_margin return @@ -458,26 +466,30 @@ def find_neighbors(self, plane: Plane, ratio: float self._is_right_aligned_with(obj, tolerance=d) or self._is_centrally_aligned_with(obj, tolerance=d)))] - def _is_left_aligned_with(self, other, tolerance=0): + def _is_left_aligned_with(self, other: LTComponent, tolerance: float = 0 + ) -> bool: """ Whether the left-hand edge of `other` is within `tolerance`. """ return abs(other.x0 - self.x0) <= tolerance - def _is_right_aligned_with(self, other, tolerance=0): + def _is_right_aligned_with(self, other: LTComponent, tolerance: float = 0 + ) -> bool: """ Whether the right-hand edge of `other` is within `tolerance`. """ return abs(other.x1 - self.x1) <= tolerance - def _is_centrally_aligned_with(self, other, tolerance=0): + def _is_centrally_aligned_with(self, other: LTComponent, + tolerance: float = 0) -> bool: """ Whether the horizontal center of `other` is within `tolerance`. """ return abs( (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance - def _is_same_height_as(self, other, tolerance): + def _is_same_height_as(self, other: LTComponent, tolerance: float = 0 + ) -> bool: return abs(other.height - self.height) <= tolerance @@ -515,26 +527,29 @@ def find_neighbors(self, plane: Plane, ratio: float self._is_upper_aligned_with(obj, tolerance=d) or self._is_centrally_aligned_with(obj, tolerance=d)))] - def _is_lower_aligned_with(self, other, tolerance=0): + def _is_lower_aligned_with(self, other: LTComponent, tolerance: float = 0 + ) -> bool: """ Whether the lower edge of `other` is within `tolerance`. """ return abs(other.y0 - self.y0) <= tolerance - def _is_upper_aligned_with(self, other, tolerance=0): + def _is_upper_aligned_with(self, other: LTComponent, tolerance: float = 0 + ) -> bool: """ Whether the upper edge of `other` is within `tolerance`. """ return abs(other.y1 - self.y1) <= tolerance - def _is_centrally_aligned_with(self, other, tolerance=0): + def _is_centrally_aligned_with(self, other: LTComponent, + tolerance: float = 0) -> bool: """ Whether the vertical center of `other` is within `tolerance`. """ return abs( (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance - def _is_same_width_as(self, other, tolerance): + def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool: return abs(other.width - self.width) <= tolerance @@ -578,7 +593,7 @@ def get_writing_mode(self) -> str: class LTTextGroup(LTTextContainer): - def __init__(self, objs): + def __init__(self, objs: Iterable[LTContainerElement]): LTTextContainer.__init__(self) self.extend(objs) return @@ -874,7 +889,7 @@ class LTPage(LTLayoutContainer): LTCurve and LTLine. """ - def __init__(self, pageid, bbox: Rect, rotate=0): + def __init__(self, pageid: Any, bbox: Rect, rotate: float = 0): LTLayoutContainer.__init__(self, bbox) self.pageid = pageid self.rotate = rotate diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index f4e7cb20..2015ccd6 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,6 +1,6 @@ import re import logging -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union from io import BytesIO from .cmapdb import CMapDB from .cmapdb import CMap @@ -102,10 +102,16 @@ def reset(self) -> None: return +Color = Union[ + float, # Greyscale + Tuple[float, float, float], # R, G, B + Tuple[float, float, float, float]] # C, M, Y, K + + class PDFGraphicState: def __init__(self): - self.linewidth: int = 0 + self.linewidth: float = 0 self.linecap = None self.linejoin = None self.miterlimit = None @@ -114,10 +120,10 @@ def __init__(self): self.flatness = None # stroking color - self.scolor = None + self.scolor: Optional[Color] = None # non stroking color - self.ncolor = None + self.ncolor: Optional[Color] = None return def copy(self) -> "PDFGraphicState":