From b52a0594e1998a492c172538a9b35491c5fc5f52 Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Sun, 5 Sep 2021 22:37:28 -0700 Subject: [PATCH] tighten up types, avoid Any in favour of explicit casts --- pdfminer/ccitt.py | 6 +- pdfminer/cmapdb.py | 32 +++--- pdfminer/converter.py | 8 +- pdfminer/encodingdb.py | 4 +- pdfminer/layout.py | 16 +-- pdfminer/pdfdevice.py | 51 +++++---- pdfminer/pdfdocument.py | 37 ++++--- pdfminer/pdffont.py | 8 +- pdfminer/pdfinterp.py | 230 ++++++++++++++++++++++------------------ pdfminer/pdfpage.py | 26 +++-- pdfminer/pdftypes.py | 32 +++--- pdfminer/psparser.py | 4 +- pdfminer/utils.py | 10 +- 13 files changed, 255 insertions(+), 209 deletions(-) diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index d57f211b..ceae95a6 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -62,7 +62,7 @@ def feedbytes(self, data: bytes) -> None: self._parse_bit(byte & m) return - def _parse_bit(self, x: Any) -> None: + def _parse_bit(self, x: object) -> None: if x: v = self._state[1] else: @@ -356,7 +356,7 @@ def feedbytes(self, data: bytes) -> None: break return - def _parse_mode(self, mode: Any) -> BitParserState: + def _parse_mode(self, mode: object) -> BitParserState: if mode == 'p': self._do_pass() self._flush_line() @@ -561,7 +561,7 @@ def output_line(self, y: int, bits: Sequence[int]) -> None: return -def ccittfaxdecode(data: bytes, params: Dict[str, Any]) -> bytes: +def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes: K = params.get('K') cols = cast(int, params.get('Columns')) bytealign = cast(bool, params.get('EncodedByteAlign')) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 2a80f8f7..76a4e394 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -16,8 +16,8 @@ import pickle as pickle import struct import logging -from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, - TextIO, Tuple, Union) +from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, + MutableMapping, Optional, TextIO, Tuple, Union, cast) from .psparser import PSStackParser from .psparser import PSSyntaxError from .psparser import PSEOF @@ -41,14 +41,14 @@ class CMapBase: debug = 0 - def __init__(self, **kwargs: Union[str, int]): - self.attrs = kwargs.copy() + def __init__(self, **kwargs: object): + self.attrs: MutableMapping[str, object] = kwargs.copy() return def is_vertical(self) -> bool: return self.attrs.get('WMode', 0) != 0 - def set_attr(self, k: str, v: Any) -> None: + def set_attr(self, k: str, v: object) -> None: self.attrs[k] = v return @@ -70,7 +70,7 @@ class CMap(CMapBase): def __init__(self, **kwargs: Union[str, int]): CMapBase.__init__(self, **kwargs) - self.code2cid: Dict[int, Any] = {} + self.code2cid: Dict[int, object] = {} return def __repr__(self) -> str: @@ -79,10 +79,10 @@ def __repr__(self) -> str: def use_cmap(self, cmap: CMapBase) -> None: assert isinstance(cmap, CMap), str(type(cmap)) - def copy(dst: Dict[Any, Any], src: Dict[Any, Any]) -> None: + def copy(dst: Dict[int, object], src: Dict[int, object]) -> None: for (k, v) in src.items(): if isinstance(v, dict): - d: Dict[Any, Any] = {} + d: Dict[int, object] = {} dst[k] = d copy(d, v) else: @@ -95,16 +95,18 @@ def decode(self, code: bytes) -> Iterator[int]: d = self.code2cid for i in iter(code): if i in d: - d = d[i] - if isinstance(d, int): - yield d + x = d[i] + if isinstance(x, int): + yield x d = self.code2cid + else: + d = cast(Dict[int, object], x) else: d = self.code2cid return def dump(self, out: TextIO = sys.stdout, - code2cid: Optional[Dict[int, Any]] = None, + code2cid: Optional[Dict[int, object]] = None, code: Tuple[int, ...] = ()) -> None: if code2cid is None: code2cid = self.code2cid @@ -114,7 +116,7 @@ def dump(self, out: TextIO = sys.stdout, if isinstance(v, int): out.write('code %r = cid %d\n' % (c, v)) else: - self.dump(out=out, code2cid=v, code=c) + self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c) return @@ -167,9 +169,9 @@ def add_code2cid(self, code: str, cid: int) -> None: for c in code[:-1]: ci = ord(c) if ci in d: - d = d[ci] + d = cast(Dict[int, object], d[ci]) else: - t: Dict[int, Any] = {} + t: Dict[int, object] = {} d[ci] = t d = t ci = ord(code[-1]) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index a84d1066..793078c0 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,8 +1,8 @@ import io import logging from pdfminer.pdfcolor import PDFColorSpace -from typing import (Any, BinaryIO, Dict, Generic, List, Optional, Sequence, - TextIO, Tuple, TypeVar, Union, cast) +from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO, + Tuple, TypeVar, Union, cast) import re from . import utils @@ -72,7 +72,7 @@ def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None: self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) return - def end_figure(self, _: Any) -> None: + def end_figure(self, _: str) -> None: fig = self.cur_item assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) self.cur_item = self._stack.pop() @@ -416,7 +416,7 @@ def begin_div(self, color: str, borderwidth: int, x: float, y: float, self.write(s) return - def end_div(self, color: Any) -> None: + def end_div(self, color: str) -> None: if self._font is not None: self.write('') self._font = self._fontstack.pop() diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py index f5aa0af1..0f6bd68e 100644 --- a/pdfminer/encodingdb.py +++ b/pdfminer/encodingdb.py @@ -1,6 +1,6 @@ import logging import re -from typing import Any, Dict, Iterable, Optional, cast +from typing import Dict, Iterable, Optional, cast from .glyphlist import glyphname2unicode from .latin_enc import ENCODING @@ -96,7 +96,7 @@ class EncodingDB: } @classmethod - def get_encoding(cls, name: str, diff: Optional[Iterable[Any]] = None + def get_encoding(cls, name: str, diff: Optional[Iterable[object]] = None ) -> Dict[int, str]: cid2unicode = cls.encodings.get(name, cls.std2unicode) if diff: diff --git a/pdfminer/layout.py b/pdfminer/layout.py index d78c0b53..1cbdf142 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,6 +1,6 @@ import heapq import logging -from typing import (Any, Dict, Generic, Iterable, Iterator, List, Optional, +from typing import (Dict, Generic, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, TypeVar, Union, cast) from .utils import INF @@ -137,16 +137,16 @@ def __repr__(self) -> str: (self.__class__.__name__, bbox2str(self.bbox))) # Disable comparison. - def __lt__(self, _: Any) -> bool: + def __lt__(self, _: object) -> bool: raise ValueError - def __le__(self, _: Any) -> bool: + def __le__(self, _: object) -> bool: raise ValueError - def __gt__(self, _: Any) -> bool: + def __gt__(self, _: object) -> bool: raise ValueError - def __ge__(self, _: Any) -> bool: + def __ge__(self, _: object) -> bool: raise ValueError def set_bbox(self, bbox: Rect) -> None: @@ -349,7 +349,7 @@ def __repr__(self) -> str: def get_text(self) -> str: return self._text - def is_compatible(self, obj: Any) -> bool: + def is_compatible(self, obj: object) -> bool: """Returns True if two characters can coexist in the same line.""" return True @@ -730,7 +730,7 @@ def group_textlines(self, laparams: LAParams, lines: Iterable[LTTextLine] """Group neighboring lines to textboxes""" plane: Plane[LTTextLine] = Plane(self.bbox) plane.extend(lines) - boxes: Dict[Any, LTTextBox] = {} + boxes: Dict[LTTextLine, LTTextBox] = {} for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) members = [line] @@ -914,7 +914,7 @@ class LTPage(LTLayoutContainer): LTCurve and LTLine. """ - def __init__(self, pageid: Any, bbox: Rect, rotate: float = 0): + def __init__(self, pageid: int, bbox: Rect, rotate: float = 0): LTLayoutContainer.__init__(self, bbox) self.pageid = pageid self.rotate = rotate diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 1279df4e..f14861a2 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,5 +1,6 @@ -from typing import (Any, BinaryIO, Iterable, List, Optional, Sequence, - TYPE_CHECKING) +from pdfminer.psparser import PSLiteral +from typing import (BinaryIO, Iterable, List, Optional, Sequence, + TYPE_CHECKING, Union, cast) from . import utils from .utils import Matrix, Point, Rect, PathSegment from .pdfcolor import PDFColorSpace @@ -12,6 +13,10 @@ from .pdfinterp import PDFGraphicState from .pdfinterp import PDFResourceManager from .pdfinterp import PDFTextState + from .pdfinterp import PDFStackT + + +PDFTextSeq = Iterable[Union[int, float, bytes]] class PDFDevice: @@ -29,7 +34,8 @@ def __repr__(self) -> str: def __enter__(self) -> "PDFDevice": return self - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object + ) -> None: self.close() def close(self) -> None: @@ -39,13 +45,15 @@ def set_ctm(self, ctm: Matrix) -> None: self.ctm = ctm return - def begin_tag(self, tag: Any, props: Any = None) -> None: + def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None + ) -> None: return def end_tag(self) -> None: return - def do_tag(self, tag: Any, props: Any = None) -> None: + def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None + ) -> None: return def begin_page(self, page: PDFPage, ctm: Matrix) -> None: @@ -68,7 +76,7 @@ def paint_path(self, graphicstate: "PDFGraphicState", stroke: bool, def render_image(self, name: str, stream: PDFStream) -> None: return - def render_string(self, textstate: "PDFTextState", seq: Iterable[Any], + def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, graphicstate: "PDFGraphicState" ) -> None: return @@ -76,7 +84,7 @@ def render_string(self, textstate: "PDFTextState", seq: Iterable[Any], class PDFTextDevice(PDFDevice): - def render_string(self, textstate: "PDFTextState", seq: Iterable[Any], + def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, graphicstate: "PDFGraphicState" ) -> None: assert self.ctm is not None @@ -103,17 +111,16 @@ def render_string(self, textstate: "PDFTextState", seq: Iterable[Any], graphicstate) return - def render_string_horizontal(self, seq: Iterable[Any], - matrix: Matrix, pos: Point, font: PDFFont, - fontsize: float, scaling: float, - charspace: float, wordspace: float, - rise: float, dxscale: float, + def render_string_horizontal(self, seq: PDFTextSeq, matrix: Matrix, + pos: Point, font: PDFFont, fontsize: float, + scaling: float, charspace: float, + wordspace: float, rise: float, dxscale: float, ncs: PDFColorSpace, graphicstate: "PDFGraphicState") -> Point: (x, y) = pos needcharspace = False for obj in seq: - if utils.isnumber(obj): + if isinstance(obj, (int, float)): x -= obj*dxscale needcharspace = True else: @@ -128,7 +135,7 @@ def render_string_horizontal(self, seq: Iterable[Any], needcharspace = True return (x, y) - def render_string_vertical(self, seq: Iterable[Any], matrix: Matrix, + def render_string_vertical(self, seq: PDFTextSeq, matrix: Matrix, pos: Point, font: PDFFont, fontsize: float, scaling: float, charspace: float, wordspace: float, rise: float, dxscale: float, @@ -137,7 +144,7 @@ def render_string_vertical(self, seq: Iterable[Any], matrix: Matrix, (x, y) = pos needcharspace = False for obj in seq: - if utils.isnumber(obj): + if isinstance(obj, (int, float)): y -= obj*dxscale needcharspace = True else: @@ -166,10 +173,10 @@ def __init__(self, rsrcmgr: "PDFResourceManager", outfp: BinaryIO, self.outfp = outfp self.codec = codec self.pageno = 0 - self._stack: List[Any] = [] + self._stack: List[PSLiteral] = [] return - def render_string(self, textstate: "PDFTextState", seq: Iterable[Any], + def render_string(self, textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, graphicstate: "PDFGraphicState" ) -> None: font = textstate.font @@ -201,14 +208,15 @@ def end_page(self, page: PDFPage) -> None: self.pageno += 1 return - def begin_tag(self, tag: Any, props: Any = None) -> None: + def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None + ) -> None: s = '' if isinstance(props, dict): s = ''.join([ ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v)) for (k, v) in sorted(props.items()) ]) - out_s = '<{}{}>'.format(utils.enc(tag.name), s) + out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s) self._write(out_s) self._stack.append(tag) return @@ -216,11 +224,12 @@ def begin_tag(self, tag: Any, props: Any = None) -> None: def end_tag(self) -> None: assert self._stack, str(self.pageno) tag = self._stack.pop(-1) - out_s = '' % utils.enc(tag.name) + out_s = '' % utils.enc(cast(str, tag.name)) self._write(out_s) return - def do_tag(self, tag: Any, props: Any = None) -> None: + def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None + ) -> None: self.begin_tag(tag, props) self._stack.pop(-1) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index ea7de322..e1bfaa47 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -53,7 +53,7 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError): class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed): - def __init__(self, *args: Any) -> None: + def __init__(self, *args: object) -> None: from warnings import warn warn('PDFTextExtractionNotAllowedError will be removed in the future. ' 'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning) @@ -67,7 +67,7 @@ def __init__(self, *args: Any) -> None: class PDFBaseXRef: - def get_trailer(self) -> Dict[Any, Any]: + def get_trailer(self) -> Dict[str, Any]: raise NotImplementedError def get_objids(self) -> Iterable[int]: @@ -87,7 +87,7 @@ class PDFXRef(PDFBaseXRef): def __init__(self) -> None: self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {} - self.trailer: Dict[Any, Any] = {} + self.trailer: Dict[str, Any] = {} return def __repr__(self) -> str: @@ -149,7 +149,7 @@ def load_trailer(self, parser: PDFParser) -> None: log.debug('trailer=%r', self.trailer) return - def get_trailer(self) -> Dict[Any, Any]: + def get_trailer(self) -> Dict[str, Any]: return self.trailer def get_objids(self) -> KeysView[int]: @@ -202,11 +202,11 @@ def load(self, parser: PDFParser) -> None: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser1 = PDFStreamParser(stream.get_data()) - objs: List[Any] = [] + objs: List[int] = [] try: while 1: (_, obj) = parser1.nextobject() - objs.append(obj) + objs.append(cast(int, obj)) except PSEOF: pass n = min(n, len(objs)//2) @@ -224,7 +224,7 @@ def __init__(self) -> None: self.fl1: Optional[int] = None self.fl2: Optional[int] = None self.fl3: Optional[int] = None - self.ranges: List[Tuple[Any, Any]] = [] + self.ranges: List[Tuple[int, int]] = [] return def __repr__(self) -> str: @@ -242,7 +242,8 @@ def load(self, parser: PDFParser) -> None: index_array = stream.get('Index', (0, size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') - self.ranges.extend(cast(Tuple[Any, Any], choplist(2, index_array))) + self.ranges.extend(cast(Iterator[Tuple[int, int]], + choplist(2, index_array))) (self.fl1, self.fl2, self.fl3) = stream['W'] assert (self.fl1 is not None and self.fl2 is not None and self.fl3 is not None) @@ -584,8 +585,8 @@ def __init__(self, parser: PDFParser, password: str = '', self.encryption: Optional[Tuple[Any, Any]] = None self.decipher: Optional[DecipherCallable] = None self._parser = None - self._cached_objs: Dict[int, Tuple[Any, int]] = {} - self._parsed_objs: Dict[int, Tuple[List[Any], Any]] = {} + self._cached_objs: Dict[int, Tuple[object, int]] = {} + self._parsed_objs: Dict[int, Tuple[List[object], int]] = {} self._parser = parser self._parser.set_document(self) self.is_printable = self.is_modifiable = self.is_extractable = True @@ -652,7 +653,8 @@ def _initialize_password(self, password: str = '') -> None: self._parser.fallback = False # need to read streams with exact length return - def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> Any: + def _getobj_objstm(self, stream: PDFStream, index: int, objid: int + ) -> object: if stream.objid in self._parsed_objs: (objs, n) = self._parsed_objs[stream.objid] else: @@ -667,19 +669,19 @@ def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> Any: raise PDFSyntaxError('index too big: %r' % index) return obj - def _get_objects(self, stream: PDFStream) -> Tuple[List[Any], Any]: + def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]: if stream.get('Type') is not LITERAL_OBJSTM: if settings.STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: - n = stream['N'] + n = cast(int, stream['N']) except KeyError: if settings.STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser = PDFStreamParser(stream.get_data()) parser.set_document(self) - objs = [] + objs: List[object] = [] try: while 1: (_, obj) = parser.nextobject() @@ -688,7 +690,7 @@ def _get_objects(self, stream: PDFStream) -> Tuple[List[Any], Any]: pass return (objs, n) - def _getobj_parse(self, pos: int, objid: Any) -> Any: + def _getobj_parse(self, pos: int, objid: int) -> object: assert self._parser is not None self._parser.seek(pos) (_, objid1) = self._parser.nexttoken() # objid @@ -717,7 +719,7 @@ def _getobj_parse(self, pos: int, objid: Any) -> Any: return obj # can raise PDFObjectNotFound - def getobj(self, objid: int) -> Any: + def getobj(self, objid: int) -> object: """Get object from PDF :raises PDFException if PDFDocument is not initialized @@ -794,7 +796,8 @@ def lookup(d: Dict[str, Any]) -> Any: return None if 'Names' in d: objs = list_value(d['Names']) - names = dict(cast(Tuple[Any, Any], choplist(2, objs))) + names = dict(cast(Iterator[Tuple[Union[str, bytes], Any]], + choplist(2, objs))) return names[key] if 'Kids' in d: for c in list_value(d['Kids']): diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index dcfd41aa..582fc2ce 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -42,7 +42,7 @@ log = logging.getLogger(__name__) -def get_widths(seq: Iterable[Any]) -> Dict[int, float]: +def get_widths(seq: Iterable[object]) -> Dict[int, float]: """Build a mapping of character widths for horizontal writing.""" widths: Dict[int, float] = {} r: List[float] = [] @@ -63,7 +63,7 @@ def get_widths(seq: Iterable[Any]) -> Dict[int, float]: return widths -def get_widths2(seq: Iterable[Any]) -> Dict[int, Tuple[float, Point]]: +def get_widths2(seq: Iterable[object]) -> Dict[int, Tuple[float, Point]]: """Build a mapping of character widths for vertical writing.""" widths: Dict[int, Tuple[float, Point]] = {} r: List[float] = [] @@ -541,8 +541,8 @@ def __init__(self, descriptor: Mapping[str, Any], widths: FontWidthDict, else: self.default_width = default_width self.leading = num_value(descriptor.get('Leading', 0)) - self.bbox: Rect = list_value( # type: ignore[assignment] - resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0)))) + self.bbox = cast(Rect, list_value( + resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0))))) self.hscale = self.vscale = .001 # PDF RM 9.8.1 specifies /Descent should always be a negative number. diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 46c14720..324a76c6 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,12 +1,11 @@ import re import logging -from typing import (Any, Dict, Iterable, List, Mapping, Optional, Sequence, - Tuple, Union, cast) +from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast from io import BytesIO from .cmapdb import CMapDB from .cmapdb import CMap from .cmapdb import CMapBase -from .psparser import PSTypeError +from .psparser import PSLiteral, PSTypeError from .psparser import PSStackType from .psparser import PSEOF from .psparser import PSKeyword @@ -17,6 +16,7 @@ from .psparser import KWD from . import settings from .pdfdevice import PDFDevice +from .pdfdevice import PDFTextSeq from .pdfpage import PDFPage from .pdftypes import PDFException from .pdftypes import PDFStream @@ -113,12 +113,12 @@ class PDFGraphicState: def __init__(self) -> None: self.linewidth: float = 0 - self.linecap = None - self.linejoin = None - self.miterlimit = None - self.dash: Optional[Tuple[Any, Any]] = None - self.intent = None - self.flatness = None + self.linecap: Optional[object] = None + self.linejoin: Optional[object] = None + self.miterlimit: Optional[object] = None + self.dash: Optional[Tuple[object, object]] = None + self.intent: Optional[object] = None + self.flatness: Optional[object] = None # stroking color self.scolor: Optional[Color] = None @@ -159,10 +159,10 @@ class PDFResourceManager: def __init__(self, caching: bool = True): self.caching = caching - self._cached_fonts: Dict[Any, PDFFont] = {} + self._cached_fonts: Dict[object, PDFFont] = {} return - def get_procset(self, procs: Sequence[Any]) -> None: + def get_procset(self, procs: Sequence[object]) -> None: for proc in procs: if proc is LITERAL_PDF: pass @@ -180,7 +180,7 @@ def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase: raise return CMap() - def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont: + def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: @@ -227,7 +227,7 @@ def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont: class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): - def __init__(self, streams: Sequence[Any]): + def __init__(self, streams: Sequence[object]): self.streams = streams self.istream = 0 # PSStackParser.__init__(fp=None) is safe only because we've overloaded @@ -327,6 +327,10 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None: return +PDFStackT = PSStackType[PDFStream] +"""Types that may appear on the PDF argument stack.""" + + class PDFPageInterpreter: """Processor for the content of a PDF page @@ -341,16 +345,16 @@ def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice): def dup(self) -> "PDFPageInterpreter": return self.__class__(self.rsrcmgr, self.device) - def init_resources(self, resources: Any) -> None: + def init_resources(self, resources: Dict[object, object]) -> None: """Prepare the fonts and XObjects listed in the Resource attribute.""" self.resources = resources - self.fontmap = {} + self.fontmap: Dict[object, PDFFont] = {} self.xobjmap = {} self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() if not resources: return - def get_colorspace(spec: Any) -> Optional[PDFColorSpace]: + def get_colorspace(spec: object) -> Optional[PDFColorSpace]: if isinstance(spec, list): name = literal_name(spec[0]) else: @@ -363,6 +367,7 @@ def get_colorspace(spec: Any) -> Optional[PDFColorSpace]: return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE.get(name) + for (k, v) in dict_value(resources).items(): log.debug('Resource: %r: %r', k, v) if k == 'Font': @@ -394,7 +399,7 @@ def init_state(self, ctm: Matrix) -> None: self.graphicstate = PDFGraphicState() self.curpath: List[PathSegment] = [] # argstack: stack for command arguments. - self.argstack: List[PSStackType[PDFStream]] = [] + self.argstack: List[PDFStackT] = [] # set some global states. self.scs: Optional[PDFColorSpace] = None self.ncs: Optional[PDFColorSpace] = None @@ -402,11 +407,11 @@ def init_state(self, ctm: Matrix) -> None: self.scs = self.ncs = next(iter(self.csmap.values())) return - def push(self, obj: PSStackType[PDFStream]) -> None: + def push(self, obj: PDFStackT) -> None: self.argstack.append(obj) return - def pop(self, n: int) -> List[PSStackType[PDFStream]]: + def pop(self, n: int) -> List[PDFStackT]: if n == 0: return [] x = self.argstack[-n:] @@ -434,77 +439,84 @@ def do_Q(self) -> None: self.set_current_state(self.gstack.pop()) return - def do_cm(self, a1: float, b1: float, c1: float, d1: float, e1: float, - f1: float) -> None: + def do_cm(self, a1: PDFStackT, b1: PDFStackT, c1: PDFStackT, d1: PDFStackT, + e1: PDFStackT, f1: PDFStackT) -> None: """Concatenate matrix to current transformation matrix""" - self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm) + self.ctm = \ + mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm) self.device.set_ctm(self.ctm) return - def do_w(self, linewidth: float) -> None: + def do_w(self, linewidth: PDFStackT) -> None: """Set line width""" - self.graphicstate.linewidth = linewidth + self.graphicstate.linewidth = cast(float, linewidth) return - def do_J(self, linecap: Any) -> None: + def do_J(self, linecap: PDFStackT) -> None: """Set line cap style""" self.graphicstate.linecap = linecap return - def do_j(self, linejoin: Any) -> None: + def do_j(self, linejoin: PDFStackT) -> None: """Set line join style""" self.graphicstate.linejoin = linejoin return - def do_M(self, miterlimit: Any) -> None: + def do_M(self, miterlimit: PDFStackT) -> None: """Set miter limit""" self.graphicstate.miterlimit = miterlimit return - def do_d(self, dash: Any, phase: Any) -> None: + def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None: """Set line dash pattern""" self.graphicstate.dash = (dash, phase) return - def do_ri(self, intent: Any) -> None: + def do_ri(self, intent: PDFStackT) -> None: """Set color rendering intent""" self.graphicstate.intent = intent return - def do_i(self, flatness: Any) -> None: + def do_i(self, flatness: PDFStackT) -> None: """Set flatness tolerance""" self.graphicstate.flatness = flatness return - def do_gs(self, name: Any) -> None: + def do_gs(self, name: PDFStackT) -> None: """Set parameters from graphics state parameter dictionary""" # todo return - def do_m(self, x: float, y: float) -> None: + def do_m(self, x: PDFStackT, y: PDFStackT) -> None: """Begin new subpath""" - self.curpath.append(('m', x, y)) + self.curpath.append(('m', cast(float, x), cast(float, y))) return - def do_l(self, x: float, y: float) -> None: + def do_l(self, x: PDFStackT, y: PDFStackT) -> None: """Append straight line segment to path""" - self.curpath.append(('l', x, y)) + self.curpath.append(('l', cast(float, x), cast(float, y))) return - def do_c(self, x1: float, y1: float, x2: float, y2: float, x3: float, - y3: float) -> None: + def do_c(self, x1: PDFStackT, y1: PDFStackT, x2: PDFStackT, + y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: """Append curved segment to path (three control points)""" - self.curpath.append(('c', x1, y1, x2, y2, x3, y3)) + self.curpath.append(('c', cast(float, x1), cast(float, y1), + cast(float, x2), cast(float, y2), + cast(float, x3), cast(float, y3))) return - def do_v(self, x2: float, y2: float, x3: float, y3: float) -> None: + def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, + y3: PDFStackT) -> None: """Append curved segment to path (initial point replicated)""" - self.curpath.append(('v', x2, y2, x3, y3)) + self.curpath.append(('v', cast(float, x2), cast(float, y2), + cast(float, x3), cast(float, y3))) return - def do_y(self, x1: float, y1: float, x3: float, y3: float) -> None: + def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, + y3: PDFStackT) -> None: """Append curved segment to path (final point replicated)""" - self.curpath.append(('y', x1, y1, x3, y3)) + self.curpath.append(('y', cast(float, x1), cast(float, y1), + cast(float, x3), cast(float, y3))) return def do_h(self) -> None: @@ -512,8 +524,13 @@ def do_h(self) -> None: self.curpath.append(('h',)) return - def do_re(self, x: float, y: float, w: float, h: float) -> None: + def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, + h: PDFStackT) -> None: """Append rectangle to path""" + x = cast(float, x) + y = cast(float, y) + w = cast(float, w) + h = cast(float, h) self.curpath.append(('m', x, y)) self.curpath.append(('l', x+w, y)) self.curpath.append(('l', x+w, y+h)) @@ -591,7 +608,7 @@ def do_W_a(self) -> None: """Set clipping path using even-odd rule""" return - def do_CS(self, name: Any) -> None: + def do_CS(self, name: PDFStackT) -> None: """Set color space for stroking operations Introduced in PDF 1.1 @@ -603,7 +620,7 @@ def do_CS(self, name: Any) -> None: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return - def do_cs(self, name: Any) -> None: + def do_cs(self, name: PDFStackT) -> None: """Set color space for nonstroking operations""" try: self.ncs = self.csmap[literal_name(name)] @@ -612,34 +629,40 @@ def do_cs(self, name: Any) -> None: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return - def do_G(self, gray: float) -> None: + def do_G(self, gray: PDFStackT) -> None: """Set gray level for stroking operations""" - self.graphicstate.scolor = gray + self.graphicstate.scolor = cast(float, gray) return - def do_g(self, gray: float) -> None: + def do_g(self, gray: PDFStackT) -> None: """Set gray level for nonstroking operations""" - self.graphicstate.ncolor = gray + self.graphicstate.ncolor = cast(float, gray) return - def do_RG(self, r: float, g: float, b: float) -> None: + def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for stroking operations""" - self.graphicstate.scolor = (r, g, b) + self.graphicstate.scolor = \ + (cast(float, r), cast(float, g), cast(float, b)) return - def do_rg(self, r: float, g: float, b: float) -> None: + def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for nonstroking operations""" - self.graphicstate.ncolor = (r, g, b) + self.graphicstate.ncolor = \ + (cast(float, r), cast(float, g), cast(float, b)) return - def do_K(self, c: float, m: float, y: float, k: float) -> None: + def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT + ) -> None: """Set CMYK color for stroking operations""" - self.graphicstate.scolor = (c, m, y, k) + self.graphicstate.scolor = \ + (cast(float, c), cast(float, m), cast(float, y), cast(float, k)) return - def do_k(self, c: float, m: float, y: float, k: float) -> None: + def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT + ) -> None: """Set CMYK color for nonstroking operations""" - self.graphicstate.ncolor = (c, m, y, k) + self.graphicstate.ncolor = \ + (cast(float, c), cast(float, m), cast(float, y), cast(float, k)) return def do_SCN(self) -> None: @@ -650,7 +673,7 @@ def do_SCN(self) -> None: if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.graphicstate.scolor = cast(Color, self.pop(n)) # Unchecked cast! + self.graphicstate.scolor = cast(Color, self.pop(n)) return def do_scn(self) -> None: @@ -661,7 +684,7 @@ def do_scn(self) -> None: if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.graphicstate.ncolor = cast(Color, self.pop(n)) # Unchecked cast! + self.graphicstate.ncolor = cast(Color, self.pop(n)) return def do_SC(self) -> None: @@ -674,7 +697,7 @@ def do_sc(self) -> None: self.do_scn() return - def do_sh(self, name: Any) -> None: + def do_sh(self, name: object) -> None: """Paint area defined by shading pattern""" return @@ -700,24 +723,24 @@ def do_EX(self) -> None: """End compatibility section""" return - def do_MP(self, tag: Any) -> None: + def do_MP(self, tag: PDFStackT) -> None: """Define marked-content point""" - self.device.do_tag(tag) + self.device.do_tag(cast(PSLiteral, tag)) return - def do_DP(self, tag: Any, props: Any) -> None: + def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None: """Define marked-content point with property list""" - self.device.do_tag(tag, props) + self.device.do_tag(cast(PSLiteral, tag), props) return - def do_BMC(self, tag: Any) -> None: + def do_BMC(self, tag: PDFStackT) -> None: """Begin marked-content sequence""" - self.device.begin_tag(tag) + self.device.begin_tag(cast(PSLiteral, tag)) return - def do_BDC(self, tag: Any, props: Any) -> None: + def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None: """Begin marked-content sequence with property list""" - self.device.begin_tag(tag, props) + self.device.begin_tag(cast(PSLiteral, tag), props) return def do_EMC(self) -> None: @@ -725,45 +748,45 @@ def do_EMC(self) -> None: self.device.end_tag() return - def do_Tc(self, space: float) -> None: + def do_Tc(self, space: PDFStackT) -> None: """Set character spacing. Character spacing is used by the Tj, TJ, and ' operators. :param space: a number expressed in unscaled text space units. """ - self.textstate.charspace = space + self.textstate.charspace = cast(float, space) return - def do_Tw(self, space: float) -> None: + def do_Tw(self, space: PDFStackT) -> None: """Set the word spacing. Word spacing is used by the Tj, TJ, and ' operators. :param space: a number expressed in unscaled text space units """ - self.textstate.wordspace = space + self.textstate.wordspace = cast(float, space) return - def do_Tz(self, scale: float) -> None: + def do_Tz(self, scale: PDFStackT) -> None: """Set the horizontal scaling. :param scale: is a number specifying the percentage of the normal width """ - self.textstate.scaling = scale + self.textstate.scaling = cast(float, scale) return - def do_TL(self, leading: float) -> None: + def do_TL(self, leading: PDFStackT) -> None: """Set the text leading. Text leading is used only by the T*, ', and " operators. :param leading: a number expressed in unscaled text space units """ - self.textstate.leading = -leading + self.textstate.leading = -cast(float, leading) return - def do_Tf(self, fontid: Any, fontsize: float) -> None: + def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None: """Set the text font :param fontid: the name of a font resource in the Font subdictionary @@ -776,41 +799,45 @@ def do_Tf(self, fontid: Any, fontsize: float) -> None: if settings.STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) - self.textstate.fontsize = fontsize + self.textstate.fontsize = cast(float, fontsize) return - def do_Tr(self, render: int) -> None: + def do_Tr(self, render: PDFStackT) -> None: """Set the text rendering mode""" - self.textstate.render = render + self.textstate.render = cast(int, render) return - def do_Ts(self, rise: float) -> None: + def do_Ts(self, rise: PDFStackT) -> None: """Set the text rise :param rise: a number expressed in unscaled text space units """ - self.textstate.rise = rise + self.textstate.rise = cast(float, rise) return - def do_Td(self, tx: float, ty: float) -> None: + def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None: """Move text position""" + tx = cast(float, tx) + ty = cast(float, ty) (a, b, c, d, e, f) = self.textstate.matrix self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f) self.textstate.linematrix = (0, 0) return - def do_TD(self, tx: float, ty: float) -> None: + def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None: """Move text position and set leading""" + tx = cast(float, tx) + ty = cast(float, ty) (a, b, c, d, e, f) = self.textstate.matrix self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f) self.textstate.leading = ty self.textstate.linematrix = (0, 0) return - def do_Tm(self, a: float, b: float, c: float, d: float, e: float, f: float - ) -> None: + def do_Tm(self, a: PDFStackT, b: PDFStackT, c: PDFStackT, d: PDFStackT, + e: PDFStackT, f: PDFStackT) -> None: """Set text matrix and text line matrix""" - self.textstate.matrix = (a, b, c, d, e, f) + self.textstate.matrix = cast(Matrix, (a, b, c, d, e, f)) self.textstate.linematrix = (0, 0) return @@ -822,23 +849,23 @@ def do_T_a(self) -> None: self.textstate.linematrix = (0, 0) return - def do_TJ(self, seq: Iterable[Any]) -> None: + def do_TJ(self, seq: PDFStackT) -> None: """Show text, allowing individual glyph positioning""" if self.textstate.font is None: if settings.STRICT: raise PDFInterpreterError('No font specified!') return assert self.ncs is not None - self.device.render_string(self.textstate, seq, self.ncs, - self.graphicstate.copy()) + self.device.render_string(self.textstate, cast(PDFTextSeq, seq), + self.ncs, self.graphicstate.copy()) return - def do_Tj(self, s: Any) -> None: + def do_Tj(self, s: PDFStackT) -> None: """Show text""" self.do_TJ([s]) return - def do__q(self, s: Any) -> None: + def do__q(self, s: PDFStackT) -> None: """Move to next line and show text The ' (single quote) operator. @@ -847,7 +874,7 @@ def do__q(self, s: Any) -> None: self.do_TJ([s]) return - def do__w(self, aw: float, ac: float, s: Any) -> None: + def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None: """Set word and character spacing, move to next line, and show text The " (double quote) operator. @@ -865,7 +892,7 @@ def do_ID(self) -> None: """Begin inline image data""" return - def do_EI(self, obj: Any) -> None: + def do_EI(self, obj: PDFStackT) -> None: """End inline image object""" if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj: iobjid = str(id(obj)) @@ -874,9 +901,9 @@ def do_EI(self, obj: Any) -> None: self.device.end_figure(iobjid) return - def do_Do(self, xobjid: Any) -> None: + def do_Do(self, xobjid_arg: PDFStackT) -> None: """Invoke named XObject""" - xobjid = literal_name(xobjid) + xobjid = cast(str, literal_name(xobjid_arg)) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: @@ -887,9 +914,9 @@ def do_Do(self, xobjid: Any) -> None: subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() - bbox: Rect = list_value(xobj['BBox']) # type: ignore[assignment] - matrix: Matrix = list_value( # type: ignore[assignment] - xobj.get('Matrix', MATRIX_IDENTITY)) + bbox = cast(Rect, list_value(xobj['BBox'])) + matrix = cast(Matrix, list_value( + xobj.get('Matrix', MATRIX_IDENTITY))) # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. @@ -927,7 +954,8 @@ def process_page(self, page: PDFPage) -> None: self.device.end_page(page) return - def render_contents(self, resources: Any, streams: Sequence[Any], + def render_contents(self, resources: Dict[object, object], + streams: Sequence[object], ctm: Matrix = MATRIX_IDENTITY) -> None: """Render the content streams. @@ -940,7 +968,7 @@ def render_contents(self, resources: Any, streams: Sequence[Any], self.execute(list_value(streams)) return - def execute(self, streams: Sequence[Any]) -> None: + def execute(self, streams: Sequence[object]) -> None: try: parser = PDFContentParser(streams) except PSEOF: diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 168e112f..a584b392 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,6 +1,6 @@ import logging -from typing import (Any, BinaryIO, Container, Dict, Iterator, List, Optional, - Tuple) +from pdfminer.utils import Rect +from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple import warnings from . import settings from .psparser import LIT @@ -34,7 +34,7 @@ class PDFPage: attrs: a dictionary of page attributes. contents: a list of PDFStream objects that represents the page content. lastmod: the last modified time of the page. - resources: a list of resources used by the page. + resources: a dictionary of resources used by the page. mediabox: the physical size of the page. cropbox: the crop rectangle of the page. rotate: the page rotation (in degree). @@ -42,7 +42,7 @@ class PDFPage: beads: a chain that represents natural reading order. """ - def __init__(self, doc: PDFDocument, pageid: Any, attrs: Any): + def __init__(self, doc: PDFDocument, pageid: object, attrs: object): """Initialize a page object. doc: a PDFDocument object. @@ -53,10 +53,11 @@ def __init__(self, doc: PDFDocument, pageid: Any, attrs: Any): self.pageid = pageid self.attrs = dict_value(attrs) self.lastmod = resolve1(self.attrs.get('LastModified')) - self.resources = resolve1(self.attrs.get('Resources', dict())) - self.mediabox = resolve1(self.attrs['MediaBox']) + self.resources: Dict[object, object] = \ + resolve1(self.attrs.get('Resources', dict())) + self.mediabox: Rect = resolve1(self.attrs['MediaBox']) if 'CropBox' in self.attrs: - self.cropbox = resolve1(self.attrs['CropBox']) + self.cropbox: Rect = resolve1(self.attrs['CropBox']) else: self.cropbox = self.mediabox self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360 @@ -68,7 +69,7 @@ def __init__(self, doc: PDFDocument, pageid: Any, attrs: Any): contents = [] if not isinstance(contents, list): contents = [contents] - self.contents: List[Any] = contents + self.contents: List[object] = contents return def __repr__(self) -> str: @@ -79,13 +80,16 @@ def __repr__(self) -> str: @classmethod def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]: - def search(obj: Any, parent: Dict[str, Any] - ) -> Iterator[Tuple[int, Dict[Any, Any]]]: + def search(obj: object, parent: Dict[str, object] + ) -> Iterator[Tuple[int, + Dict[object, Dict[object, object]]]]: if isinstance(obj, int): objid = obj tree = dict_value(document.getobj(objid)).copy() else: - objid = obj.objid + # This looks broken. obj.objid means obj could be either + # PDFObjRef or PDFStream, but neither is valid for dict_value. + objid = obj.objid # type: ignore[attr-defined] tree = dict_value(obj).copy() for (k, v) in parent.items(): if k in cls.INHERITABLE_ATTRS and k not in tree: diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 12eca526..0e91b1ab 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -74,7 +74,7 @@ class PDFNotImplementedError(PDFException): class PDFObjRef(PDFObject): - def __init__(self, doc: Optional["PDFDocument"], objid: int, _: Any): + def __init__(self, doc: Optional["PDFDocument"], objid: int, _: object): if objid == 0: if settings.STRICT: raise PDFValueError('PDF object id cannot be 0.') @@ -85,7 +85,7 @@ def __init__(self, doc: Optional["PDFDocument"], objid: int, _: Any): def __repr__(self) -> str: return '' % (self.objid) - def resolve(self, default: Any = None) -> Any: + def resolve(self, default: object = None) -> Any: assert self.doc is not None try: return self.doc.getobj(self.objid) @@ -93,7 +93,7 @@ def resolve(self, default: Any = None) -> Any: return default -def resolve1(x: Any, default: Any = None) -> Any: +def resolve1(x: object, default: object = None) -> Any: """Resolves an object. If this is an array or dictionary, it may still contains @@ -104,7 +104,7 @@ def resolve1(x: Any, default: Any = None) -> Any: return x -def resolve_all(x: Any, default: Any = None) -> Any: +def resolve_all(x: object, default: object = None) -> Any: """Recursively resolves the given object and all the internals. Make sure there is no indirect reference within the nested object. @@ -120,7 +120,7 @@ def resolve_all(x: Any, default: Any = None) -> Any: return x -def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: Any +def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: object ) -> Any: """Recursively deciphers the given object. """ @@ -134,7 +134,7 @@ def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: Any return x -def int_value(x: Any) -> int: +def int_value(x: object) -> int: x = resolve1(x) if not isinstance(x, int): if settings.STRICT: @@ -143,7 +143,7 @@ def int_value(x: Any) -> int: return x -def float_value(x: Any) -> float: +def float_value(x: object) -> float: x = resolve1(x) if not isinstance(x, float): if settings.STRICT: @@ -152,7 +152,7 @@ def float_value(x: Any) -> float: return x -def num_value(x: Any) -> float: +def num_value(x: object) -> float: x = resolve1(x) if not isinstance(x, (int, float)): # == utils.isnumber(x) if settings.STRICT: @@ -161,7 +161,7 @@ def num_value(x: Any) -> float: return x -def uint_value(x: Any, n_bits: int) -> int: +def uint_value(x: object, n_bits: int) -> int: """Resolve number and interpret it as a two's-complement unsigned number""" xi = int_value(x) if xi > 0: @@ -170,7 +170,7 @@ def uint_value(x: Any, n_bits: int) -> int: return xi + cast(int, 2**n_bits) -def str_value(x: Any) -> bytes: +def str_value(x: object) -> bytes: x = resolve1(x) if not isinstance(x, bytes): if settings.STRICT: @@ -179,7 +179,7 @@ def str_value(x: Any) -> bytes: return x -def list_value(x: Any) -> Union[List[Any], Tuple[Any, ...]]: +def list_value(x: object) -> Union[List[Any], Tuple[Any, ...]]: x = resolve1(x) if not isinstance(x, (list, tuple)): if settings.STRICT: @@ -188,7 +188,7 @@ def list_value(x: Any) -> Union[List[Any], Tuple[Any, ...]]: return x -def dict_value(x: Any) -> Dict[Any, Any]: +def dict_value(x: object) -> Dict[Any, Any]: x = resolve1(x) if not isinstance(x, dict): if settings.STRICT: @@ -198,7 +198,7 @@ def dict_value(x: Any) -> Dict[Any, Any]: return x -def stream_value(x: Any) -> "PDFStream": +def stream_value(x: object) -> "PDFStream": x = resolve1(x) if not isinstance(x, PDFStream): if settings.STRICT: @@ -235,16 +235,16 @@ def __repr__(self) -> str: return '' % \ (self.objid, len(self.data), self.attrs) - def __contains__(self, name: Any) -> bool: + def __contains__(self, name: object) -> bool: return name in self.attrs def __getitem__(self, name: str) -> Any: return self.attrs[name] - def get(self, name: str, default: Any = None) -> Any: + def get(self, name: str, default: object = None) -> Any: return self.attrs.get(name, default) - def get_any(self, names: Iterable[str], default: Any = None) -> Any: + def get_any(self, names: Iterable[str], default: object = None) -> Any: for name in names: if name in self.attrs: return self.attrs[name] diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 81192425..dd6dbddc 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -120,7 +120,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT: KEYWORD_DICT_END = KWD(b'>>') -def literal_name(x: Any) -> Any: +def literal_name(x: object) -> Any: if not isinstance(x, PSLiteral): if settings.STRICT: raise PSTypeError('Literal required: {!r}'.format(x)) @@ -136,7 +136,7 @@ def literal_name(x: Any) -> Any: return name -def keyword_name(x: Any) -> Any: +def keyword_name(x: object) -> Any: if not isinstance(x, PSKeyword): if settings.STRICT: raise PSTypeError('Keyword required: %r' % x) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 0f9462e4..7b0af8dd 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -45,7 +45,7 @@ def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any): def __enter__(self) -> AnyIO: return self.file_handler - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object ) -> Literal[False]: if self.closing: self.file_handler.close() @@ -58,7 +58,7 @@ def make_compat_bytes(in_str: str) -> bytes: return in_str.encode() -def make_compat_str(o: Any) -> str: +def make_compat_str(o: object) -> str: """Converts everything to string, if bytes guessing the encoding.""" if isinstance(o, bytes): enc = chardet.detect(o) @@ -108,7 +108,7 @@ def paeth_predictor(left: int, above: int, upper_left: int) -> int: return upper_left -def apply_png_predictor(pred: Any, colors: int, columns: int, +def apply_png_predictor(pred: int, colors: int, columns: int, bitspercomponent: int, data: bytes) -> bytes: """Reverse the effect of the PNG predictor @@ -249,7 +249,7 @@ def apply_matrix_norm(m: Matrix, v: Point) -> Point: # Utility functions -def isnumber(x: Any) -> bool: +def isnumber(x: object) -> bool: return isinstance(x, (int, float)) @@ -453,7 +453,7 @@ def __iter__(self) -> Iterator[LTComponentT]: def __len__(self) -> int: return len(self._objs) - def __contains__(self, obj: Any) -> bool: + def __contains__(self, obj: object) -> bool: return obj in self._objs def _getrange(self, bbox: Rect) -> Iterator[Point]: