From ff4b6a9bd46b352583d823d39065652c9a6f05f4 Mon Sep 17 00:00:00 2001
From: Andrew Baumann <Andrew.Baumann@microsoft.com>
Date: Fri, 20 Aug 2021 22:49:06 -0700
Subject: [PATCH] turn on more strict checks, and untangle the layout mess with
 generics

Status:
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes"
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
Found 5 errors in 4 files (checked 27 source files)

pdfdevice.py:191 appears to be a real bug
---
 mypy.ini              | 18 +++++++++
 pdfminer/converter.py | 12 +++---
 pdfminer/layout.py    | 89 ++++++++++++++++++++++++-------------------
 pdfminer/pdfdevice.py | 36 ++++++++---------
 pdfminer/pdfinterp.py | 17 +++++----
 pdfminer/pdfpage.py   |  4 +-
 pdfminer/psparser.py  |  2 +-
 pdfminer/utils.py     | 14 ++++---
 8 files changed, 113 insertions(+), 79 deletions(-)
 create mode 100644 mypy.ini

diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 00000000..c276f0d3
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,18 @@
+[mypy]
+warn_unused_configs = True
+disallow_any_generics = True
+disallow_subclassing_any = True
+#disallow_untyped_calls = True
+#disallow_untyped_defs = True
+#disallow_incomplete_defs = True
+#check_untyped_defs = True
+disallow_untyped_decorators = True
+no_implicit_optional = True
+warn_redundant_casts = True
+warn_unused_ignores = True
+warn_return_any = True
+no_implicit_reexport = True
+strict_equality = True
+
+[mypy-pdfminer.*]
+ignore_missing_imports = True
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index ec1735bf..0b978876 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -1,7 +1,7 @@
 import io
 import logging
 from pdfminer.pdfcolor import PDFColorSpace
-from typing import List
+from typing import Any, List, Optional, Sequence
 import re
 import sys
 
@@ -27,7 +27,7 @@
 from .pdfinterp import PDFGraphicState, PDFResourceManager
 from .pdfpage import PDFPage
 from .pdftypes import PDFStream
-from .utils import Matrix, Rect
+from .utils import Matrix, Rect, PathSegment
 from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import enc
@@ -41,7 +41,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
     ctm: Matrix
 
     def __init__(self, rsrcmgr: PDFResourceManager, pageno: int = 1,
-                 laparams: LAParams = None):
+                 laparams: Optional[LAParams] = None):
         PDFTextDevice.__init__(self, rsrcmgr)
         self.pageno = pageno
         self.laparams = laparams
@@ -70,7 +70,7 @@ def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
         return
 
-    def end_figure(self, _) -> None:
+    def end_figure(self, _: Any) -> None:
         fig = self.cur_item
         assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
         self.cur_item = self._stack.pop()
@@ -85,8 +85,8 @@ def render_image(self, name: str, stream: PDFStream) -> None:
         self.cur_item.add(item)
         return
 
-    def paint_path(self, gstate: PDFGraphicState, stroke, fill, evenodd, path
-                   ) -> None:
+    def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
+                   evenodd: bool, path: Sequence[PathSegment]) -> None:
         """Paint paths described in section 4.4 of the PDF reference manual"""
         shape = ''.join(x[0] for x in path)
 
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index 7f34f160..f1c5652e 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -1,7 +1,7 @@
 import heapq
 import logging
 from typing import (Any, Dict, Generic, Iterable, Iterator, List, Optional,
-                    Sequence, Set, Tuple, TypeVar, cast)
+                    Sequence, Set, Tuple, TypeVar, Union, cast)
 
 from .utils import INF
 from .utils import Matrix
@@ -296,8 +296,9 @@ def get_text(self) -> str:
 class LTChar(LTComponent, LTText):
     """Actual letter in the text as a Unicode string."""
 
-    def __init__(self, matrix: Matrix, font: PDFFont, fontsize, scaling, rise,
-                 text: str, textwidth, textdisp, ncs: PDFColorSpace,
+    def __init__(self, matrix: Matrix, font: PDFFont, fontsize: float,
+                 scaling: float, rise: float, text: str, textwidth: float,
+                 textdisp: Point, ncs: PDFColorSpace,
                  graphicstate: PDFGraphicState):
         LTText.__init__(self)
         self._text = text
@@ -351,15 +352,15 @@ def is_compatible(self, obj: Any) -> bool:
         return True
 
 
-LTContainerElement = TypeVar('LTContainerElement', LTItem, LTComponent)
+LTItemT = TypeVar('LTItemT', bound=LTItem)
 
 
-class LTContainer(LTComponent, Generic[LTContainerElement]):
+class LTContainer(LTComponent, Generic[LTItemT]):
     """Object that can be extended and analyzed"""
 
     def __init__(self, bbox: Rect):
         LTComponent.__init__(self, bbox)
-        self._objs: List[LTContainerElement] = []
+        self._objs: List[LTItemT] = []
         return
 
     def __iter__(self):
@@ -368,11 +369,11 @@ def __iter__(self):
     def __len__(self):
         return len(self._objs)
 
-    def add(self, obj: LTContainerElement) -> None:
+    def add(self, obj: LTItemT) -> None:
         self._objs.append(obj)
         return
 
-    def extend(self, objs: Iterable[LTContainerElement]) -> None:
+    def extend(self, objs: Iterable[LTItemT]) -> None:
         for obj in objs:
             self.add(obj)
         return
@@ -383,19 +384,21 @@ def analyze(self, laparams: LAParams) -> None:
         return
 
 
-class LTExpandableContainer(LTContainer):
+class LTExpandableContainer(LTContainer[LTItemT]):
     def __init__(self):
         LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
         return
 
-    def add(self, obj: LTComponent) -> None:
-        LTContainer.add(self, obj)
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # super() LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
+        LTContainer.add(self, cast(LTItemT, obj))
         self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
                        max(self.x1, obj.x1), max(self.y1, obj.y1)))
         return
 
 
-class LTTextContainer(LTExpandableContainer, LTText):
+class LTTextContainer(LTExpandableContainer[LTItemT], LTText):
     def __init__(self):
         LTText.__init__(self)
         LTExpandableContainer.__init__(self)
@@ -406,7 +409,10 @@ def get_text(self) -> str:
                        if isinstance(obj, LTText))
 
 
-class LTTextLine(LTTextContainer):
+TextLineElement = Union[LTChar, LTAnno]
+
+
+class LTTextLine(LTTextContainer[TextLineElement]):
     """Contains a list of LTChar objects that represent a single text line.
 
     The characters are aligned either horizontally or vertically, depending on
@@ -414,7 +420,7 @@ class LTTextLine(LTTextContainer):
     """
 
     def __init__(self, word_margin: float):
-        LTTextContainer.__init__(self)
+        super().__init__()
         self.word_margin = word_margin
         return
 
@@ -428,27 +434,28 @@ def analyze(self, laparams: LAParams) -> None:
         LTContainer.add(self, LTAnno('\n'))
         return
 
-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(self, plane: Plane, ratio: float) -> List["LTTextLine"]:
         raise NotImplementedError
 
 
 class LTTextLineHorizontal(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float):
         LTTextLine.__init__(self, word_margin)
-        self._x1 = +INF
+        self._x1: float = +INF
         return
 
-    def add(self, obj: LTComponent) -> None:
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
         if isinstance(obj, LTChar) and self.word_margin:
             margin = self.word_margin * max(obj.width, obj.height)
             if self._x1 < obj.x0 - margin:
                 LTContainer.add(self, LTAnno(' '))
         self._x1 = obj.x1
-        LTTextLine.add(self, obj)
+        super().add(obj)
         return
 
-    def find_neighbors(self, plane: Plane, ratio: float
-                       ) -> List["LTTextLineHorizontal"]:
+    def find_neighbors(self, plane: Plane, ratio: float) -> List[LTTextLine]:
         """
         Finds neighboring LTTextLineHorizontals in the plane.
 
@@ -494,22 +501,23 @@ def _is_same_height_as(self, other: LTComponent, tolerance: float = 0
 
 
 class LTTextLineVertical(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float):
         LTTextLine.__init__(self, word_margin)
-        self._y0 = -INF
+        self._y0: float = -INF
         return
 
-    def add(self, obj: LTComponent) -> None:
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
         if isinstance(obj, LTChar) and self.word_margin:
             margin = self.word_margin * max(obj.width, obj.height)
             if obj.y1 + margin < self._y0:
                 LTContainer.add(self, LTAnno(' '))
         self._y0 = obj.y0
-        LTTextLine.add(self, obj)
+        super().add(obj)
         return
 
-    def find_neighbors(self, plane: Plane, ratio: float
-                       ) -> List["LTTextLineVertical"]:
+    def find_neighbors(self, plane: Plane, ratio: float) -> List[LTTextLine]:
         """
         Finds neighboring LTTextLineVerticals in the plane.
 
@@ -553,7 +561,7 @@ def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool:
         return abs(other.width - self.width) <= tolerance
 
 
-class LTTextBox(LTTextContainer):
+class LTTextBox(LTTextContainer[LTTextLine]):
     """Represents a group of text chunks in a rectangular area.
 
     Note that this box is created by geometric analysis and does not
@@ -563,7 +571,7 @@ class LTTextBox(LTTextContainer):
 
     def __init__(self):
         LTTextContainer.__init__(self)
-        self.index = -1
+        self.index: int = -1
         return
 
     def __repr__(self):
@@ -574,7 +582,7 @@ def __repr__(self):
 
 class LTTextBoxHorizontal(LTTextBox):
     def analyze(self, laparams: LAParams) -> None:
-        LTTextBox.analyze(self, laparams)
+        super().analyze(laparams)
         self._objs.sort(key=lambda obj: -obj.y1)
         return
 
@@ -584,7 +592,7 @@ def get_writing_mode(self) -> str:
 
 class LTTextBoxVertical(LTTextBox):
     def analyze(self, laparams: LAParams) -> None:
-        LTTextBox.analyze(self, laparams)
+        super().analyze(laparams)
         self._objs.sort(key=lambda obj: -obj.x1)
         return
 
@@ -592,16 +600,19 @@ def get_writing_mode(self) -> str:
         return 'tb-rl'
 
 
-class LTTextGroup(LTTextContainer):
-    def __init__(self, objs: Iterable[LTContainerElement]):
-        LTTextContainer.__init__(self)
+TextGroupElement = Union[LTTextBox, "LTTextGroup"]
+
+
+class LTTextGroup(LTTextContainer[TextGroupElement]):
+    def __init__(self, objs: Iterable[TextGroupElement]):
+        super().__init__()
         self.extend(objs)
         return
 
 
 class LTTextGroupLRTB(LTTextGroup):
     def analyze(self, laparams: LAParams) -> None:
-        LTTextGroup.analyze(self, laparams)
+        super().analyze(laparams)
         # reorder the objects from top-left to bottom-right.
         self._objs.sort(
             key=lambda obj: (1 - laparams.boxes_flow) * obj.x0
@@ -611,7 +622,7 @@ def analyze(self, laparams: LAParams) -> None:
 
 class LTTextGroupTBRL(LTTextGroup):
     def analyze(self, laparams: LAParams) -> None:
-        LTTextGroup.analyze(self, laparams)
+        super().analyze(laparams)
         # reorder the objects from top-right to bottom-left.
         self._objs.sort(
             key=lambda obj: - (1 + laparams.boxes_flow) * (obj.x0 + obj.x1)
@@ -619,7 +630,7 @@ def analyze(self, laparams: LAParams) -> None:
         return
 
 
-class LTLayoutContainer(LTContainer):
+class LTLayoutContainer(LTContainer[LTComponent]):
     def __init__(self, bbox: Rect):
         LTContainer.__init__(self, bbox)
         self.groups: Optional[List[LTTextGroup]] = None
@@ -782,8 +793,8 @@ def isany(obj1: LTComponent, obj2: LTComponent) -> Set[LTComponent]:
             objs = set(plane.find((x0, y0, x1, y1)))
             return objs.difference((obj1, obj2))
 
-        dists: List[Tuple[bool, float, int, int, LTTextContainer,
-                          LTTextContainer]] = []
+        dists: List[Tuple[bool, float, int, int, Union[LTTextBox, LTTextGroup],
+                          Union[LTTextBox, LTTextGroup]]] = []
         for i in range(len(boxes)):
             box1 = boxes[i]
             for j in range(i+1, len(boxes)):
diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index e800d555..a2cb3487 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -1,4 +1,4 @@
-from typing import (Any, IO, Iterable, List, Optional, Sequence, Tuple,
+from typing import (Any, BinaryIO, Iterable, List, Optional, Sequence, Tuple,
                     TYPE_CHECKING)
 from . import utils
 from .utils import Matrix, Point, Rect
@@ -39,13 +39,13 @@ def set_ctm(self, ctm: Matrix) -> None:
         self.ctm = ctm
         return
 
-    def begin_tag(self, tag: Any, props=None) -> None:
+    def begin_tag(self, tag: Any, props: Any = None) -> None:
         return
 
     def end_tag(self) -> None:
         return
 
-    def do_tag(self, tag: Any, props=None) -> None:
+    def do_tag(self, tag: Any, props: Any = None) -> None:
         return
 
     def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
@@ -68,7 +68,7 @@ def paint_path(self, graphicstate: "PDFGraphicState", stroke: bool,
     def render_image(self, name: str, stream: PDFStream) -> None:
         return
 
-    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+    def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
                       ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
                       ) -> None:
         return
@@ -76,7 +76,7 @@ def render_string(self, textstate: "PDFTextState", seq: Iterable,
 
 class PDFTextDevice(PDFDevice):
 
-    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+    def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
                       ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
                       ) -> None:
         assert self.ctm is not None
@@ -103,10 +103,11 @@ def render_string(self, textstate: "PDFTextState", seq: Iterable,
                 graphicstate)
         return
 
-    def render_string_horizontal(self, seq: Iterable, matrix: Matrix,
-                                 pos: Point, font: PDFFont, fontsize: float,
-                                 scaling: float, charspace: float,
-                                 wordspace: float, rise: float, dxscale: float,
+    def render_string_horizontal(self, seq: Iterable[Any],
+                                 matrix: Matrix, pos: Point, font: PDFFont,
+                                 fontsize: float, scaling: float,
+                                 charspace: float, wordspace: float,
+                                 rise: float, dxscale: float,
                                  ncs: PDFColorSpace,
                                  graphicstate: "PDFGraphicState") -> Point:
         (x, y) = pos
@@ -127,10 +128,11 @@ def render_string_horizontal(self, seq: Iterable, matrix: Matrix,
                     needcharspace = True
         return (x, y)
 
-    def render_string_vertical(self, seq: Iterable, matrix: Matrix, pos: Point,
-                               font: PDFFont, fontsize: float, scaling: float,
-                               charspace: float, wordspace: float, rise: float,
-                               dxscale: float, ncs: PDFColorSpace,
+    def render_string_vertical(self, seq: Iterable[Any], matrix: Matrix,
+                               pos: Point, font: PDFFont, fontsize: float,
+                               scaling: float, charspace: float,
+                               wordspace: float, rise: float, dxscale: float,
+                               ncs: PDFColorSpace,
                                graphicstate: "PDFGraphicState") -> Point:
         (x, y) = pos
         needcharspace = False
@@ -158,7 +160,7 @@ def render_char(self, matrix: Matrix, font: PDFFont, fontsize: float,
 
 class TagExtractor(PDFDevice):
 
-    def __init__(self, rsrcmgr: "PDFResourceManager", outfp: IO,
+    def __init__(self, rsrcmgr: "PDFResourceManager", outfp: BinaryIO,
                  codec: str = 'utf-8'):
         PDFDevice.__init__(self, rsrcmgr)
         self.outfp = outfp
@@ -167,7 +169,7 @@ def __init__(self, rsrcmgr: "PDFResourceManager", outfp: IO,
         self._stack: List[Any] = []
         return
 
-    def render_string(self, textstate: "PDFTextState", seq: Iterable,
+    def render_string(self, textstate: "PDFTextState", seq: Iterable[Any],
                       ncs: PDFColorSpace, graphicstate: "PDFGraphicState"
                       ) -> None:
         font = textstate.font
@@ -200,7 +202,7 @@ def end_page(self, page: PDFPage) -> None:
         self.pageno += 1
         return
 
-    def begin_tag(self, tag: Any, props=None) -> None:
+    def begin_tag(self, tag: Any, props: Any = None) -> None:
         s = ''
         if isinstance(props, dict):
             s = ''.join(' {}="{}"'.format(utils.enc(k), utils.enc(str(v)))
@@ -217,7 +219,7 @@ def end_tag(self) -> None:
         self.outfp.write(utils.make_compat_bytes(out_s))
         return
 
-    def do_tag(self, tag: Any, props=None) -> None:
+    def do_tag(self, tag: Any, props: Any = None) -> None:
         self.begin_tag(tag, props)
         self._stack.pop(-1)
         return
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 2015ccd6..01902971 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -1,6 +1,7 @@
 import re
 import logging
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import (Any, Dict, Iterable, List, Mapping, Optional, Sequence,
+                    Tuple, Union)
 from io import BytesIO
 from .cmapdb import CMapDB
 from .cmapdb import CMap
@@ -32,7 +33,7 @@
 from .pdffont import PDFCIDFont
 from .pdfcolor import PDFColorSpace
 from .pdfcolor import PREDEFINED_COLORSPACE
-from .utils import Matrix, Point
+from .utils import Matrix, Point, PathSegment
 from .utils import choplist
 from .utils import mult_matrix
 from .utils import MATRIX_IDENTITY
@@ -179,7 +180,7 @@ def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
                 raise
             return CMap()
 
-    def get_font(self, objid: Any, spec) -> PDFFont:
+    def get_font(self, objid: Any, spec: Mapping[str, Any]) -> PDFFont:
         if objid and objid in self._cached_fonts:
             font = self._cached_fonts[objid]
         else:
@@ -340,7 +341,7 @@ def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice):
     def dup(self) -> "PDFPageInterpreter":
         return self.__class__(self.rsrcmgr, self.device)
 
-    def init_resources(self, resources) -> None:
+    def init_resources(self, resources: Any) -> None:
         """Prepare the fonts and XObjects listed in the Resource attribute."""
         self.resources = resources
         self.fontmap = {}
@@ -349,7 +350,7 @@ def init_resources(self, resources) -> None:
         if not resources:
             return
 
-        def get_colorspace(spec) -> Optional[PDFColorSpace]:
+        def get_colorspace(spec: Any) -> Optional[PDFColorSpace]:
             if isinstance(spec, list):
                 name = literal_name(spec[0])
             else:
@@ -391,7 +392,7 @@ def init_state(self, ctm: Matrix) -> None:
         self.device.set_ctm(self.ctm)
         self.textstate = PDFTextState()
         self.graphicstate = PDFGraphicState()
-        self.curpath: List[Tuple[str, float, float]] = []
+        self.curpath: List[PathSegment] = []
         # argstack: stack for command arguments.
         self.argstack: List[Any] = []
         # set some global states.
@@ -819,7 +820,7 @@ def do_T_a(self):
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_TJ(self, seq: Iterable):
+    def do_TJ(self, seq: Iterable[Any]) -> None:
         """Show text, allowing individual glyph positioning"""
         if self.textstate.font is None:
             if settings.STRICT:
@@ -923,7 +924,7 @@ def process_page(self, page: PDFPage) -> None:
         self.device.end_page(page)
         return
 
-    def render_contents(self, resources, streams: Sequence,
+    def render_contents(self, resources: Any, streams: Sequence[Any],
                         ctm: Matrix = MATRIX_IDENTITY) -> None:
         """Render the content streams.
 
diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py
index b5b89a53..bfcd013b 100644
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@@ -41,7 +41,7 @@ class PDFPage:
       beads: a chain that represents natural reading order.
     """
 
-    def __init__(self, doc: PDFDocument, pageid: Any, attrs):
+    def __init__(self, doc: PDFDocument, pageid: Any, attrs: Any):
         """Initialize a page object.
 
         doc: a PDFDocument object.
@@ -67,7 +67,7 @@ def __init__(self, doc: PDFDocument, pageid: Any, attrs):
             contents = []
         if not isinstance(contents, list):
             contents = [contents]
-        self.contents: List = contents
+        self.contents: List[Any] = contents
         return
 
     def __repr__(self) -> str:
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index caed19df..5668edca 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -198,7 +198,7 @@ def close(self) -> None:
     def tell(self) -> int:
         return self.bufpos+self.charpos
 
-    def poll(self, pos=None, n=80) -> None:
+    def poll(self, pos: Optional[int] = None, n: int = 80) -> None:
         pos0 = self.fp.tell()
         if not pos:
             pos = self.bufpos+self.charpos
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index 807ce11b..25f09728 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -5,7 +5,7 @@
 import pathlib
 import struct
 from typing import (Any, Callable, Dict, Iterable, Iterator, List, Optional,
-                    Set, Tuple, TypeVar, Union, TYPE_CHECKING)
+                    Set, Tuple, TypeVar, Union, TYPE_CHECKING, cast)
 from html import escape
 
 if TYPE_CHECKING:
@@ -71,7 +71,8 @@ def shorten_str(s: str, size: int) -> str:
 
 
 def compatible_encode_method(bytesorstring: Union[bytes, str],
-                             encoding='utf-8', erraction='ignore') -> str:
+                             encoding: str = 'utf-8',
+                             erraction: str = 'ignore') -> str:
     """When Py2 str.encode is called, it often means bytes.encode in Py3.
 
      This does either.
@@ -127,6 +128,7 @@ def apply_png_predictor(pred: Any, colors: int, columns: int,
 Point = Tuple[float, float]
 Rect = Tuple[float, float, float, float]
 Matrix = Tuple[float, float, float, float, float, float]
+PathSegment = Tuple[str, float, float]
 
 #  Matrix operations
 MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)
@@ -242,13 +244,13 @@ def nunpack(s: bytes, default: int = 0) -> int:
     elif length == 1:
         return ord(s)
     elif length == 2:
-        return struct.unpack('>H', s)[0]
+        return cast(int, struct.unpack('>H', s)[0])
     elif length == 3:
-        return struct.unpack('>L', b'\x00' + s)[0]
+        return cast(int, struct.unpack('>L', b'\x00' + s)[0])
     elif length == 4:
-        return struct.unpack('>L', s)[0]
+        return cast(int, struct.unpack('>L', s)[0])
     elif length == 8:
-        return struct.unpack('>Q', s)[0]
+        return cast(int, struct.unpack('>Q', s)[0])
     else:
         raise TypeError('invalid length: %d' % length)