Skip to content

Commit

Permalink
tighten up types, avoid Any in favour of explicit casts
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu committed Sep 6, 2021
1 parent e58fd48 commit b52a059
Show file tree
Hide file tree
Showing 13 changed files with 255 additions and 209 deletions.
6 changes: 3 additions & 3 deletions pdfminer/ccitt.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def feedbytes(self, data: bytes) -> None:
self._parse_bit(byte & m)
return

def _parse_bit(self, x: Any) -> None:
def _parse_bit(self, x: object) -> None:
if x:
v = self._state[1]
else:
Expand Down Expand Up @@ -356,7 +356,7 @@ def feedbytes(self, data: bytes) -> None:
break
return

def _parse_mode(self, mode: Any) -> BitParserState:
def _parse_mode(self, mode: object) -> BitParserState:
if mode == 'p':
self._do_pass()
self._flush_line()
Expand Down Expand Up @@ -561,7 +561,7 @@ def output_line(self, y: int, bits: Sequence[int]) -> None:
return


def ccittfaxdecode(data: bytes, params: Dict[str, Any]) -> bytes:
def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
K = params.get('K')
cols = cast(int, params.get('Columns'))
bytealign = cast(bool, params.get('EncodedByteAlign'))
Expand Down
32 changes: 17 additions & 15 deletions pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
import pickle as pickle
import struct
import logging
from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Optional,
TextIO, Tuple, Union)
from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
MutableMapping, Optional, TextIO, Tuple, Union, cast)
from .psparser import PSStackParser
from .psparser import PSSyntaxError
from .psparser import PSEOF
Expand All @@ -41,14 +41,14 @@ class CMapBase:

debug = 0

def __init__(self, **kwargs: Union[str, int]):
self.attrs = kwargs.copy()
def __init__(self, **kwargs: object):
self.attrs: MutableMapping[str, object] = kwargs.copy()
return

def is_vertical(self) -> bool:
return self.attrs.get('WMode', 0) != 0

def set_attr(self, k: str, v: Any) -> None:
def set_attr(self, k: str, v: object) -> None:
self.attrs[k] = v
return

Expand All @@ -70,7 +70,7 @@ class CMap(CMapBase):

def __init__(self, **kwargs: Union[str, int]):
CMapBase.__init__(self, **kwargs)
self.code2cid: Dict[int, Any] = {}
self.code2cid: Dict[int, object] = {}
return

def __repr__(self) -> str:
Expand All @@ -79,10 +79,10 @@ def __repr__(self) -> str:
def use_cmap(self, cmap: CMapBase) -> None:
assert isinstance(cmap, CMap), str(type(cmap))

def copy(dst: Dict[Any, Any], src: Dict[Any, Any]) -> None:
def copy(dst: Dict[int, object], src: Dict[int, object]) -> None:
for (k, v) in src.items():
if isinstance(v, dict):
d: Dict[Any, Any] = {}
d: Dict[int, object] = {}
dst[k] = d
copy(d, v)
else:
Expand All @@ -95,16 +95,18 @@ def decode(self, code: bytes) -> Iterator[int]:
d = self.code2cid
for i in iter(code):
if i in d:
d = d[i]
if isinstance(d, int):
yield d
x = d[i]
if isinstance(x, int):
yield x
d = self.code2cid
else:
d = cast(Dict[int, object], x)
else:
d = self.code2cid
return

def dump(self, out: TextIO = sys.stdout,
code2cid: Optional[Dict[int, Any]] = None,
code2cid: Optional[Dict[int, object]] = None,
code: Tuple[int, ...] = ()) -> None:
if code2cid is None:
code2cid = self.code2cid
Expand All @@ -114,7 +116,7 @@ def dump(self, out: TextIO = sys.stdout,
if isinstance(v, int):
out.write('code %r = cid %d\n' % (c, v))
else:
self.dump(out=out, code2cid=v, code=c)
self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
return


Expand Down Expand Up @@ -167,9 +169,9 @@ def add_code2cid(self, code: str, cid: int) -> None:
for c in code[:-1]:
ci = ord(c)
if ci in d:
d = d[ci]
d = cast(Dict[int, object], d[ci])
else:
t: Dict[int, Any] = {}
t: Dict[int, object] = {}
d[ci] = t
d = t
ci = ord(code[-1])
Expand Down
8 changes: 4 additions & 4 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import io
import logging
from pdfminer.pdfcolor import PDFColorSpace
from typing import (Any, BinaryIO, Dict, Generic, List, Optional, Sequence,
TextIO, Tuple, TypeVar, Union, cast)
from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
Tuple, TypeVar, Union, cast)
import re

from . import utils
Expand Down Expand Up @@ -72,7 +72,7 @@ def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
return

def end_figure(self, _: Any) -> None:
def end_figure(self, _: str) -> None:
fig = self.cur_item
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
self.cur_item = self._stack.pop()
Expand Down Expand Up @@ -416,7 +416,7 @@ def begin_div(self, color: str, borderwidth: int, x: float, y: float,
self.write(s)
return

def end_div(self, color: Any) -> None:
def end_div(self, color: str) -> None:
if self._font is not None:
self.write('</span>')
self._font = self._fontstack.pop()
Expand Down
4 changes: 2 additions & 2 deletions pdfminer/encodingdb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import re
from typing import Any, Dict, Iterable, Optional, cast
from typing import Dict, Iterable, Optional, cast

from .glyphlist import glyphname2unicode
from .latin_enc import ENCODING
Expand Down Expand Up @@ -96,7 +96,7 @@ class EncodingDB:
}

@classmethod
def get_encoding(cls, name: str, diff: Optional[Iterable[Any]] = None
def get_encoding(cls, name: str, diff: Optional[Iterable[object]] = None
) -> Dict[int, str]:
cid2unicode = cls.encodings.get(name, cls.std2unicode)
if diff:
Expand Down
16 changes: 8 additions & 8 deletions pdfminer/layout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import heapq
import logging
from typing import (Any, Dict, Generic, Iterable, Iterator, List, Optional,
from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
Sequence, Set, Tuple, TypeVar, Union, cast)

from .utils import INF
Expand Down Expand Up @@ -137,16 +137,16 @@ def __repr__(self) -> str:
(self.__class__.__name__, bbox2str(self.bbox)))

# Disable comparison.
def __lt__(self, _: Any) -> bool:
def __lt__(self, _: object) -> bool:
raise ValueError

def __le__(self, _: Any) -> bool:
def __le__(self, _: object) -> bool:
raise ValueError

def __gt__(self, _: Any) -> bool:
def __gt__(self, _: object) -> bool:
raise ValueError

def __ge__(self, _: Any) -> bool:
def __ge__(self, _: object) -> bool:
raise ValueError

def set_bbox(self, bbox: Rect) -> None:
Expand Down Expand Up @@ -349,7 +349,7 @@ def __repr__(self) -> str:
def get_text(self) -> str:
return self._text

def is_compatible(self, obj: Any) -> bool:
def is_compatible(self, obj: object) -> bool:
"""Returns True if two characters can coexist in the same line."""
return True

Expand Down Expand Up @@ -730,7 +730,7 @@ def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine]
"""Group neighboring lines to textboxes"""
plane: Plane[LTTextLine] = Plane(self.bbox)
plane.extend(lines)
boxes: Dict[Any, LTTextBox] = {}
boxes: Dict[LTTextLine, LTTextBox] = {}
for line in lines:
neighbors = line.find_neighbors(plane, laparams.line_margin)
members = [line]
Expand Down Expand Up @@ -914,7 +914,7 @@ class LTPage(LTLayoutContainer):
LTCurve and LTLine.
"""

def __init__(self, pageid: Any, bbox: Rect, rotate: float = 0):
def __init__(self, pageid: int, bbox: Rect, rotate: float = 0):
LTLayoutContainer.__init__(self, bbox)
self.pageid = pageid
self.rotate = rotate
Expand Down
51 changes: 30 additions & 21 deletions pdfminer/pdfdevice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import (Any, BinaryIO, Iterable, List, Optional, Sequence,
TYPE_CHECKING)
from pdfminer.psparser import PSLiteral
from typing import (BinaryIO, Iterable, List, Optional, Sequence,
TYPE_CHECKING, Union, cast)
from . import utils
from .utils import Matrix, Point, Rect, PathSegment
from .pdfcolor import PDFColorSpace
Expand All @@ -12,6 +13,10 @@
from .pdfinterp import PDFGraphicState
from .pdfinterp import PDFResourceManager
from .pdfinterp import PDFTextState
from .pdfinterp import PDFStackT


PDFTextSeq = Iterable[Union[int, float, bytes]]


class PDFDevice:
Expand All @@ -29,7 +34,8 @@ def __repr__(self) -> str:
def __enter__(self) -> "PDFDevice":
return self

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object
) -> None:
self.close()

def close(self) -> None:
Expand All @@ -39,13 +45,15 @@ def set_ctm(self, ctm: Matrix) -> None:
self.ctm = ctm
return

def begin_tag(self, tag: Any, props: Any = None) -> None:
def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
) -> None:
return

def end_tag(self) -> None:
return

def do_tag(self, tag: Any, props: Any = None) -> None:
def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
) -> None:
return

def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
Expand All @@ -68,15 +76,15 @@ def paint_path(self, graphicstate: "PDFGraphicState", stroke: bool,
def render_image(self, name: str, stream: PDFStream) -> None:
return

def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq,
ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
) -> None:
return


class PDFTextDevice(PDFDevice):

def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq,
ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
) -> None:
assert self.ctm is not None
Expand All @@ -103,17 +111,16 @@ def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
graphicstate)
return

def render_string_horizontal(self, seq: Iterable[Any],
matrix: Matrix, pos: Point, font: PDFFont,
fontsize: float, scaling: float,
charspace: float, wordspace: float,
rise: float, dxscale: float,
def render_string_horizontal(self, seq: PDFTextSeq, matrix: Matrix,
pos: Point, font: PDFFont, fontsize: float,
scaling: float, charspace: float,
wordspace: float, rise: float, dxscale: float,
ncs: PDFColorSpace,
graphicstate: "PDFGraphicState") -> Point:
(x, y) = pos
needcharspace = False
for obj in seq:
if utils.isnumber(obj):
if isinstance(obj, (int, float)):
x -= obj*dxscale
needcharspace = True
else:
Expand All @@ -128,7 +135,7 @@ def render_string_horizontal(self, seq: Iterable[Any],
needcharspace = True
return (x, y)

def render_string_vertical(self, seq: Iterable[Any], matrix: Matrix,
def render_string_vertical(self, seq: PDFTextSeq, matrix: Matrix,
pos: Point, font: PDFFont, fontsize: float,
scaling: float, charspace: float,
wordspace: float, rise: float, dxscale: float,
Expand All @@ -137,7 +144,7 @@ def render_string_vertical(self, seq: Iterable[Any], matrix: Matrix,
(x, y) = pos
needcharspace = False
for obj in seq:
if utils.isnumber(obj):
if isinstance(obj, (int, float)):
y -= obj*dxscale
needcharspace = True
else:
Expand Down Expand Up @@ -166,10 +173,10 @@ def __init__(self, rsrcmgr: "PDFResourceManager", outfp: BinaryIO,
self.outfp = outfp
self.codec = codec
self.pageno = 0
self._stack: List[Any] = []
self._stack: List[PSLiteral] = []
return

def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq,
ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
) -> None:
font = textstate.font
Expand Down Expand Up @@ -201,26 +208,28 @@ def end_page(self, page: PDFPage) -> None:
self.pageno += 1
return

def begin_tag(self, tag: Any, props: Any = None) -> None:
def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
) -> None:
s = ''
if isinstance(props, dict):
s = ''.join([
' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
for (k, v) in sorted(props.items())
])
out_s = '<{}{}>'.format(utils.enc(tag.name), s)
out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s)
self._write(out_s)
self._stack.append(tag)
return

def end_tag(self) -> None:
assert self._stack, str(self.pageno)
tag = self._stack.pop(-1)
out_s = '</%s>' % utils.enc(tag.name)
out_s = '</%s>' % utils.enc(cast(str, tag.name))
self._write(out_s)
return

def do_tag(self, tag: Any, props: Any = None) -> None:
def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
) -> None:
self.begin_tag(tag, props)
self._stack.pop(-1)
return
Expand Down

0 comments on commit b52a059

Please sign in to comment.