Skip to content

Commit

Permalink
finish annotating layout
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu committed Aug 21, 2021
1 parent 0e6871c commit 5c9c0b1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 31 deletions.
69 changes: 42 additions & 27 deletions pdfminer/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from .utils import INF
from .utils import Matrix
from .utils import Rect
from .utils import Plane
from .utils import Point
from .utils import Rect
from .utils import apply_matrix_pt
from .utils import bbox2str
from .utils import fsplit
Expand All @@ -15,6 +16,7 @@
from .utils import uniq
from .pdfcolor import PDFColorSpace
from .pdftypes import PDFStream
from .pdfinterp import Color
from .pdfinterp import PDFGraphicState
from .pdffont import PDFFont

Expand Down Expand Up @@ -66,13 +68,13 @@ class LAParams:
"""

def __init__(self,
line_overlap=0.5,
char_margin=2.0,
line_margin=0.5,
word_margin=0.1,
boxes_flow=0.5,
detect_vertical=False,
all_texts=False):
line_overlap: float = 0.5,
char_margin: float = 2.0,
line_margin: float = 0.5,
word_margin: float = 0.1,
boxes_flow: float = 0.5,
detect_vertical: bool = False,
all_texts: bool = False):
self.line_overlap = line_overlap
self.char_margin = char_margin
self.line_margin = line_margin
Expand All @@ -84,7 +86,7 @@ def __init__(self,
self._validate()
return

def _validate(self):
def _validate(self) -> None:
if self.boxes_flow is not None:
boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a "
"number between -1 and +1")
Expand Down Expand Up @@ -200,8 +202,10 @@ def voverlap(self, obj: "LTComponent") -> float:
class LTCurve(LTComponent):
"""A generic Bezier curve"""

def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
stroking_color=None, non_stroking_color=None):
def __init__(self, linewidth: float, pts: List[Point],
stroke: bool = False, fill: bool = False,
evenodd: bool = False, stroking_color: Optional[Color] = None,
non_stroking_color: Optional[Color] = None):
LTComponent.__init__(self, get_bound(pts))
self.pts = pts
self.linewidth = linewidth
Expand All @@ -212,7 +216,7 @@ def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
self.non_stroking_color = non_stroking_color
return

def get_pts(self):
def get_pts(self) -> str:
return ','.join('%.3f,%.3f' % p for p in self.pts)


Expand All @@ -222,8 +226,10 @@ class LTLine(LTCurve):
Could be used for separating text or figures.
"""

def __init__(self, linewidth, p0, p1, stroke=False, fill=False,
evenodd=False, stroking_color=None, non_stroking_color=None):
def __init__(self, linewidth: float, p0: Point, p1: Point,
stroke: bool = False, fill: bool = False,
evenodd: bool = False, stroking_color: Optional[Color] = None,
non_stroking_color: Optional[Color] = None):
LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
stroking_color, non_stroking_color)
return
Expand All @@ -235,8 +241,10 @@ class LTRect(LTCurve):
Could be used for framing another pictures or figures.
"""

def __init__(self, linewidth, bbox, stroke=False, fill=False,
evenodd=False, stroking_color=None, non_stroking_color=None):
def __init__(self, linewidth: float, bbox: Rect,
stroke: bool = False, fill: bool = False,
evenodd: bool = False, stroking_color: Optional[Color] = None,
non_stroking_color: Optional[Color] = None):
(x0, y0, x1, y1) = bbox
LTCurve.__init__(self, linewidth,
[(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
Expand Down Expand Up @@ -405,7 +413,7 @@ class LTTextLine(LTTextContainer):
the text's writing mode.
"""

def __init__(self, word_margin):
def __init__(self, word_margin: float):
LTTextContainer.__init__(self)
self.word_margin = word_margin
return
Expand Down Expand Up @@ -458,26 +466,30 @@ def find_neighbors(self, plane: Plane, ratio: float
self._is_right_aligned_with(obj, tolerance=d) or
self._is_centrally_aligned_with(obj, tolerance=d)))]

def _is_left_aligned_with(self, other, tolerance=0):
def _is_left_aligned_with(self, other: LTComponent, tolerance: float = 0
) -> bool:
"""
Whether the left-hand edge of `other` is within `tolerance`.
"""
return abs(other.x0 - self.x0) <= tolerance

def _is_right_aligned_with(self, other, tolerance=0):
def _is_right_aligned_with(self, other: LTComponent, tolerance: float = 0
) -> bool:
"""
Whether the right-hand edge of `other` is within `tolerance`.
"""
return abs(other.x1 - self.x1) <= tolerance

def _is_centrally_aligned_with(self, other, tolerance=0):
def _is_centrally_aligned_with(self, other: LTComponent,
tolerance: float = 0) -> bool:
"""
Whether the horizontal center of `other` is within `tolerance`.
"""
return abs(
(other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance

def _is_same_height_as(self, other, tolerance):
def _is_same_height_as(self, other: LTComponent, tolerance: float = 0
) -> bool:
return abs(other.height - self.height) <= tolerance


Expand Down Expand Up @@ -515,26 +527,29 @@ def find_neighbors(self, plane: Plane, ratio: float
self._is_upper_aligned_with(obj, tolerance=d) or
self._is_centrally_aligned_with(obj, tolerance=d)))]

def _is_lower_aligned_with(self, other, tolerance=0):
def _is_lower_aligned_with(self, other: LTComponent, tolerance: float = 0
) -> bool:
"""
Whether the lower edge of `other` is within `tolerance`.
"""
return abs(other.y0 - self.y0) <= tolerance

def _is_upper_aligned_with(self, other, tolerance=0):
def _is_upper_aligned_with(self, other: LTComponent, tolerance: float = 0
) -> bool:
"""
Whether the upper edge of `other` is within `tolerance`.
"""
return abs(other.y1 - self.y1) <= tolerance

def _is_centrally_aligned_with(self, other, tolerance=0):
def _is_centrally_aligned_with(self, other: LTComponent,
tolerance: float = 0) -> bool:
"""
Whether the vertical center of `other` is within `tolerance`.
"""
return abs(
(other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance

def _is_same_width_as(self, other, tolerance):
def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool:
return abs(other.width - self.width) <= tolerance


Expand Down Expand Up @@ -578,7 +593,7 @@ def get_writing_mode(self) -> str:


class LTTextGroup(LTTextContainer):
def __init__(self, objs):
def __init__(self, objs: Iterable[LTContainerElement]):
LTTextContainer.__init__(self)
self.extend(objs)
return
Expand Down Expand Up @@ -874,7 +889,7 @@ class LTPage(LTLayoutContainer):
LTCurve and LTLine.
"""

def __init__(self, pageid, bbox: Rect, rotate=0):
def __init__(self, pageid: Any, bbox: Rect, rotate: float = 0):
LTLayoutContainer.__init__(self, bbox)
self.pageid = pageid
self.rotate = rotate
Expand Down
14 changes: 10 additions & 4 deletions pdfminer/pdfinterp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import logging
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from io import BytesIO
from .cmapdb import CMapDB
from .cmapdb import CMap
Expand Down Expand Up @@ -102,10 +102,16 @@ def reset(self) -> None:
return


Color = Union[
float, # Greyscale
Tuple[float, float, float], # R, G, B
Tuple[float, float, float, float]] # C, M, Y, K


class PDFGraphicState:

def __init__(self):
self.linewidth: int = 0
self.linewidth: float = 0
self.linecap = None
self.linejoin = None
self.miterlimit = None
Expand All @@ -114,10 +120,10 @@ def __init__(self):
self.flatness = None

# stroking color
self.scolor = None
self.scolor: Optional[Color] = None

# non stroking color
self.ncolor = None
self.ncolor: Optional[Color] = None
return

def copy(self) -> "PDFGraphicState":
Expand Down

0 comments on commit 5c9c0b1

Please sign in to comment.