From 9406040d8e4d627dab3d53ab88b16be43f20bcf1 Mon Sep 17 00:00:00 2001
From: Andrew Baumann <0xabu@users.noreply.github.com>
Date: Sat, 9 Oct 2021 07:23:28 -0700
Subject: [PATCH] Add type annotations (#661)

Squashed commit of the following:

commit fa229f7b7591c07aea4e5a4545f9e0c34246e1cd
Merge: eaab3c6 c3e3499
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 20:33:06 2021 -0700

    Merge branch 'develop' into mypy (and fixed types)

commit eaab3c65e2e3ab5f1f400cfc5186a3834c4ffe34
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 20:00:45 2021 -0700

    reformat all multi-line function defs to one-arg-per-line

commit 3fe2b69eed9197009d9da6776462f580ebf0dfa3
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:58:48 2021 -0700

    ccitt nit -- avoid casting needlessly

commit 15983d8c1e7162632fde43752c9d1c15938cd980
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:58:36 2021 -0700

    tweak CHANGELOG

commit 13dc0babf782938e7d5b5e482d4c5adf92d82702
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:43:46 2021 -0700

    add failing tests for dumppdf crash

commit 6b509c517876b8c15ac5a98a963884e23bd2e4d8
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:24:23 2021 -0700

    ccitt: apply misc PR feedback

commit feb031ba86d3f22e41cfbbda13f17c039359f1e6
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:18:26 2021 -0700

    add missing None return type to all __init__ methods

commit c0d62d6c54c7ec37b40bea54a3f6a7a618ec0ec6
Author: Andrew Baumann <ab@ab.id.au>
Date:   Mon Sep 6 15:13:08 2021 -0700

    minor cleanup, remove a few more Any types

commit b52a0594e1998a492c172538a9b35491c5fc5f52
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sun Sep 5 22:37:28 2021 -0700

    tighten up types, avoid Any in favour of explicit casts

commit e58fd48bd14f31bebd2de8259f12630ac02756d6
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sun Sep 5 14:10:49 2021 -0700

    annotate ccitt.py, and fix one definite bug (array.tostring was renamed tobytes)

commit 605290633e55595e5e0045840df5c5b1d9de843a
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Sep 4 22:37:38 2021 -0700

    python 3.7 back-compat

commit 4dbcf8760f8a1d3e3d99f085476f86e6a043c80c
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Sep 4 22:32:43 2021 -0700

    annotate pdfminer.jbig2

commit 0d40b7c03a8028dc44acd3f457eac71abd681827
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Sep 4 22:31:33 2021 -0700

    annotate pdf2txt.py

commit 5f82eb4f5646b5d1285252689191e0a14557ec7b
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Sep 4 09:16:31 2021 -0700

    cleanup: make Plane generic

commit 624fc92b88473ff36a174760883f34c22109da2b
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 23:16:51 2021 -0700

    bluntly ignore calls to cryptography.hazmat

commit 96b20439c169f40dbb114cabba6a582ad1ebe91e
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 23:01:06 2021 -0700

    finish annotating, and disallow_untyped_defs for pdfminer.* _except_ ccitt and jbig2

commit 0ab586347861b72b1d16880dc9293f9ad597e20a
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 21:51:56 2021 -0700

    annotate pdffont

commit 4b689f1bcbdaf654feb9de81023e318ca310a12e
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 18:30:02 2021 -0700

    annotate a couple more scripts; document sketchy code

commit 291981ff3d273952ec9c92ef8ab948473558b787
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 15:02:01 2021 -0700

    pacify flake8

commit 45d2ce91ff333f3b7e34322b16e9c52b99b7a972
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 14:31:48 2021 -0700

    annotate dumppdf, and comment likely bugs

commit 7278d83851cb336a1be3803a0993b5ec0ad39b4c
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 13:49:58 2021 -0700

    enable mypy on tests and tools, fix one implicit reexport bug

commit 4a83166ef4e4733cd2113f43188b585a4fda392b
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 13:25:59 2021 -0700

    pdfdocument: per dumppdf.py, get_dest accepts either bytes or str

commit 43701e1bee068df98f378a253c9c2150ee4ad9f7
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 13:25:00 2021 -0700

    layout: LAParams.boxes_flow may be None

commit 164f81652f1788e74837466f0ab593e94079bc0f
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 09:45:09 2021 -0700

    add whitespace, pacify flake8

commit 893b9fb9ec918032b36a30456fc0b7a217da86d8
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 09:40:33 2021 -0700

    support old Python without typing.Protocol

commit dc245084102b7b04c3f5599d75b5d62ba4290787
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Sep 3 09:12:03 2021 -0700

    Move "# type: ignore" comments to fix mypy on Python < 3.8

    The placement of these comments got more flexible in 3.8 due to
    https://github.com/python/mypy/issues/1032

    Satisfying older Python and fitting in flake8's 79-character line
    limit was quite a challenge!

commit da03afe7bd2cf3336e611f467f1c901455940ae8
Author: Andrew Baumann <ab@ab.id.au>
Date:   Thu Sep 2 22:59:58 2021 -0700

    fix text output from HTMLConverter

commit 5401276a2ed3b74a385ebcab5152485224146161
Author: Andrew Baumann <ab@ab.id.au>
Date:   Thu Sep 2 22:40:22 2021 -0700

    annotate high_level.py and the immediately-reachable internal APIs (mostly converters)

commit cc490513f8f17a7adc0bcbab2e0e86f37e832300
Author: Andrew Baumann <ab@ab.id.au>
Date:   Thu Sep 2 17:04:35 2021 -0700

     * expand and improve annotations in cmap, encryption/decompression and fonts
     * disallow untyped calls; this way, we have a core set of
       typed code that can grow over time
       (just not for ccitt, because there's a ton of work lurking there)
     * expand "typing: none" comments to suppress a specific error code

commit 92df54ba1d53d5dbbd5442757dd85be5b1851f99
Author: Andrew Baumann <ab@ab.id.au>
Date:   Wed Sep 1 20:50:59 2021 -0700

    update CHANGELOG

commit f72aaead45d0615e472a9b3190c9551a6b67b36e
Merge: ff787a9 8ea9f10
Author: Andrew Baumann <ab@ab.id.au>
Date:   Wed Sep 1 20:47:03 2021 -0700

    Merge branch 'develop' into mypy

commit ff787a93986c60361536a97182a41774f4a53ac3
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Aug 21 21:46:14 2021 -0700

    be more precise about types on ps/pdf stacks, remove most of the Any annotations

commit be1550189e10717f6827dbb7009d6e8c8b3f4c62
Author: Andrew Baumann <ab@ab.id.au>
Date:   Sat Aug 21 10:13:58 2021 -0700

    silence missing imports, (maybe?) hook to tox

commit ff4b6a9bd46b352583d823d39065652c9a6f05f4
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Aug 20 22:49:06 2021 -0700

    turn on more strict checks, and untangle the layout mess with generics

    Status:
    $ mypy pdfminer
    pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
    pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
    pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
    pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
    pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes"
    pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
    Found 5 errors in 4 files (checked 27 source files)

    pdfdevice.py:191 appears to be a real bug

commit 5c9c0b19d26ae391aea0e69c2c819261cc04460c
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Aug 20 17:22:41 2021 -0700

    finish annotating layout

commit 0e6871c16abb29df2868ab145b4ce451b4b6c777
Author: Andrew Baumann <ab@ab.id.au>
Date:   Fri Aug 20 16:54:46 2021 -0700

    general progress on annotations
     * finish utils
     * annotate more of pdfinterp, pdfdevice
     * document reason for # type: ignore comments
     * fix cyclic imports
     * satisfy flake8

commit 17d59f42917fbf9b2b2eb844d3e83a8f2a3f123a
Author: Andrew Baumann <ab@ab.id.au>
Date:   Thu Aug 19 21:38:50 2021 -0700

    WIP on type annotations

    With the possible exception of psparser.py, this is far from complete.

    $ mypy pdfminer
    pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
    pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
    pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
    pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
    pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
---
 CHANGELOG.md                |   1 +
 docs/source/conf.py         |   3 +-
 mypy.ini                    |  27 +++
 pdfminer/_saslprep.py       |   5 +-
 pdfminer/arcfour.py         |   7 +-
 pdfminer/ascii85.py         |   6 +-
 pdfminer/ccitt.py           | 102 ++++----
 pdfminer/cmapdb.py          | 114 +++++----
 pdfminer/converter.py       | 300 +++++++++++++++++-------
 pdfminer/encodingdb.py      |  23 +-
 pdfminer/high_level.py      |  63 +++--
 pdfminer/image.py           |  36 ++-
 pdfminer/jbig2.py           | 157 ++++++++-----
 pdfminer/latin_enc.py       |   7 +-
 pdfminer/layout.py          | 394 ++++++++++++++++++++-----------
 pdfminer/lzw.py             |  25 +-
 pdfminer/pdfcolor.py        |   7 +-
 pdfminer/pdfdevice.py       | 176 ++++++++++----
 pdfminer/pdfdocument.py     | 335 ++++++++++++++++----------
 pdfminer/pdffont.py         | 326 ++++++++++++++++----------
 pdfminer/pdfinterp.py       | 453 ++++++++++++++++++++++--------------
 pdfminer/pdfpage.py         |  45 ++--
 pdfminer/pdfparser.py       |  35 +--
 pdfminer/pdftypes.py        | 111 ++++++---
 pdfminer/psparser.py        | 148 +++++++-----
 pdfminer/runlength.py       |   2 +-
 pdfminer/utils.py           | 153 ++++++++----
 setup.py                    |   2 +-
 tests/test_tools_dumppdf.py |  12 +-
 tools/conv_afm.py           |   2 +-
 tools/conv_cmap.py          |   2 +-
 tools/conv_glyphlist.py     |   2 +-
 tools/dumppdf.py            |  89 ++++---
 tools/pdf2txt.py            |  55 +++--
 tools/pdfdiff.py            |  13 +-
 tools/pdfstats.py           |  15 +-
 tools/prof.py               |  22 +-
 tox.ini                     |   1 +
 38 files changed, 2165 insertions(+), 1111 deletions(-)
 create mode 100644 mypy.ini

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a508b25..29059ac5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 - Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption ([#614](https://github.com/pdfminer/pdfminer.six/pull/614))
 - Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537))
+- Type annotations ([#661](https://github.com/pdfminer/pdfminer.six/pull/661))
 
 ### Fixed
 - `KeyError` when `'Encrypt'` but not `'ID'` present in `trailer` ([#594](https://github.com/pdfminer/pdfminer.six/pull/594))
diff --git a/docs/source/conf.py b/docs/source/conf.py
index fcbf595d..ccb6ec1d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,6 +12,7 @@
 
 import os
 import sys
+from typing import List
 
 import pdfminer
 
@@ -48,7 +49,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns: List[str] = []
 
 
 # -- Options for HTML output -------------------------------------------------
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 00000000..eaddd861
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,27 @@
+[mypy]
+warn_unused_configs = True
+disallow_any_generics = True
+disallow_subclassing_any = True
+disallow_untyped_calls = True
+disallow_incomplete_defs = True
+disallow_untyped_decorators = True
+no_implicit_optional = True
+warn_redundant_casts = True
+warn_return_any = True
+no_implicit_reexport = True
+strict_equality = True
+
+# This seems impossible to turn on in a version-independent manner
+warn_unused_ignores = False
+
+[mypy-pdfminer.*]
+disallow_untyped_defs = True
+
+[mypy-cryptography.hazmat.*]
+ignore_missing_imports = True
+
+[mypy-nose.*]
+ignore_missing_imports = True
+
+[mypy-setuptools]
+ignore_missing_imports = True
diff --git a/pdfminer/_saslprep.py b/pdfminer/_saslprep.py
index 067a077f..32c68cb2 100644
--- a/pdfminer/_saslprep.py
+++ b/pdfminer/_saslprep.py
@@ -21,10 +21,11 @@
 __all__ = ['saslprep']
 
 import stringprep
+from typing import Callable, Tuple
 import unicodedata
 
 # RFC4013 section 2.3 prohibited output.
-_PROHIBITED = (
+_PROHIBITED: Tuple[Callable[[str], bool], ...] = (
     # A strict reading of RFC 4013 requires table c12 here, but
     # characters from it are mapped to SPACE in the Map step. Can
     # normalization reintroduce them somehow?
@@ -39,7 +40,7 @@
     stringprep.in_table_c9)
 
 
-def saslprep(data: str, prohibit_unassigned_code_points=True) -> str:
+def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
     """An implementation of RFC4013 SASLprep.
     :param data:
         The string to SASLprep.
diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py
index e40b0804..dd2697ce 100644
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@@ -5,9 +5,12 @@
 """
 
 
+from typing import Sequence
+
+
 class Arcfour:
 
-    def __init__(self, key):
+    def __init__(self, key: Sequence[int]) -> None:
         # because Py3 range is not indexable
         s = [i for i in range(256)]
         j = 0
@@ -19,7 +22,7 @@ def __init__(self, key):
         (self.i, self.j) = (0, 0)
         return
 
-    def process(self, data):
+    def process(self, data: bytes) -> bytes:
         (i, j) = (self.i, self.j)
         s = self.s
         r = b''
diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py
index cde3f908..7c7c757f 100644
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@@ -9,7 +9,7 @@
 
 
 # ascii85decode(data)
-def ascii85decode(data):
+def ascii85decode(data: bytes) -> bytes:
     """
     In ASCII85 encoding, every four bytes are encoded with five ASCII
     letters, using 85 different types of characters (as 256**4 < 85**5).
@@ -47,7 +47,7 @@ def ascii85decode(data):
 trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
 
 
-def asciihexdecode(data):
+def asciihexdecode(data: bytes) -> bytes:
     """
     ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
     For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
@@ -57,7 +57,7 @@ def asciihexdecode(data):
     the EOD marker after reading an odd number of hexadecimal digits, it
     will behave as if a 0 followed the last digit.
     """
-    def decode(x):
+    def decode(x: bytes) -> bytes:
         i = int(x, 16)
         return bytes((i,))
 
diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py
index 1c00eb0e..4dadc813 100644
--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@@ -11,25 +11,39 @@
 #    FOR GROUP 4 FACSIMILE APPARATUS"
 
 
-import sys
 import array
+from typing import (Any, Callable, Dict, Iterator, List, MutableSequence,
+                    Optional, Sequence, Union, cast)
 
 
-def get_bytes(data):
+def get_bytes(data: bytes) -> Iterator[int]:
     yield from data
 
 
+# Workaround https://github.com/python/mypy/issues/731
+BitParserState = MutableSequence[Any]
+# A better definition (not supported by mypy) would be:
+# BitParserState = MutableSequence[Union["BitParserState", int, str, None]]
+
+
 class BitParser:
-    def __init__(self):
+    _state: BitParserState
+
+    # _accept is declared Optional solely as a workaround for
+    # https://github.com/python/mypy/issues/708
+    _accept: Optional[Callable[[Any], BitParserState]]
+
+    def __init__(self) -> None:
         self._pos = 0
         return
 
     @classmethod
-    def add(cls, root, v, bits):
-        p = root
+    def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None:
+        p: BitParserState = root
         b = None
         for i in range(len(bits)):
             if 0 < i:
+                assert b is not None
                 if p[b] is None:
                     p[b] = [None, None]
                 p = p[b]
@@ -37,16 +51,17 @@ def add(cls, root, v, bits):
                 b = 1
             else:
                 b = 0
+        assert b is not None
         p[b] = v
         return
 
-    def feedbytes(self, data):
+    def feedbytes(self, data: bytes) -> None:
         for byte in get_bytes(data):
             for m in (128, 64, 32, 16, 8, 4, 2, 1):
                 self._parse_bit(byte & m)
         return
 
-    def _parse_bit(self, x):
+    def _parse_bit(self, x: object) -> None:
         if x:
             v = self._state[1]
         else:
@@ -55,6 +70,7 @@ def _parse_bit(self, x):
         if isinstance(v, list):
             self._state = v
         else:
+            assert self._accept is not None
             self._state = self._accept(v)
         return
 
@@ -318,14 +334,16 @@ class InvalidData(Exception):
     class ByteSkip(Exception):
         pass
 
-    def __init__(self, width, bytealign=False):
+    _color: int
+
+    def __init__(self, width: int, bytealign: bool = False) -> None:
         BitParser.__init__(self)
         self.width = width
         self.bytealign = bytealign
         self.reset()
         return
 
-    def feedbytes(self, data):
+    def feedbytes(self, data: bytes) -> None:
         for byte in get_bytes(data):
             try:
                 for m in (128, 64, 32, 16, 8, 4, 2, 1):
@@ -337,7 +355,7 @@ def feedbytes(self, data):
                 break
         return
 
-    def _parse_mode(self, mode):
+    def _parse_mode(self, mode: object) -> BitParserState:
         if mode == 'p':
             self._do_pass()
             self._flush_line()
@@ -361,7 +379,7 @@ def _parse_mode(self, mode):
         else:
             raise self.InvalidData(mode)
 
-    def _parse_horiz1(self, n):
+    def _parse_horiz1(self, n: Any) -> BitParserState:
         if n is None:
             raise self.InvalidData
         self._n1 += n
@@ -374,7 +392,7 @@ def _parse_horiz1(self, n):
         else:
             return self.BLACK
 
-    def _parse_horiz2(self, n):
+    def _parse_horiz2(self, n: Any) -> BitParserState:
         if n is None:
             raise self.InvalidData
         self._n2 += n
@@ -389,7 +407,7 @@ def _parse_horiz2(self, n):
         else:
             return self.BLACK
 
-    def _parse_uncompressed(self, bits):
+    def _parse_uncompressed(self, bits: Optional[str]) -> BitParserState:
         if not bits:
             raise self.InvalidData
         if bits.startswith('T'):
@@ -401,10 +419,10 @@ def _parse_uncompressed(self, bits):
             self._do_uncompressed(bits)
             return self.UNCOMPRESSED
 
-    def _get_bits(self):
+    def _get_bits(self) -> str:
         return ''.join(str(b) for b in self._curline[:self._curpos])
 
-    def _get_refline(self, i):
+    def _get_refline(self, i: int) -> str:
         if i < 0:
             return '[]'+''.join(str(b) for b in self._refline)
         elif len(self._refline) <= i:
@@ -414,7 +432,7 @@ def _get_refline(self, i):
                     '['+str(self._refline[i])+']' +
                     ''.join(str(b) for b in self._refline[i+1:]))
 
-    def reset(self):
+    def reset(self) -> None:
         self._y = 0
         self._curline = array.array('b', [1]*self.width)
         self._reset_line()
@@ -422,18 +440,18 @@ def reset(self):
         self._state = self.MODE
         return
 
-    def output_line(self, y, bits):
+    def output_line(self, y: int, bits: Sequence[int]) -> None:
         print(y, ''.join(str(b) for b in bits))
         return
 
-    def _reset_line(self):
+    def _reset_line(self) -> None:
         self._refline = self._curline
         self._curline = array.array('b', [1]*self.width)
         self._curpos = -1
         self._color = 1
         return
 
-    def _flush_line(self):
+    def _flush_line(self) -> None:
         if self.width <= self._curpos:
             self.output_line(self._y, self._curline)
             self._y += 1
@@ -442,7 +460,7 @@ def _flush_line(self):
                 raise self.ByteSkip
         return
 
-    def _do_vertical(self, dx):
+    def _do_vertical(self, dx: int) -> None:
         x1 = self._curpos+1
         while 1:
             if x1 == 0:
@@ -467,7 +485,7 @@ def _do_vertical(self, dx):
         self._color = 1-self._color
         return
 
-    def _do_pass(self):
+    def _do_pass(self) -> None:
         x1 = self._curpos+1
         while 1:
             if x1 == 0:
@@ -494,7 +512,7 @@ def _do_pass(self):
         self._curpos = x1
         return
 
-    def _do_horizontal(self, n1, n2):
+    def _do_horizontal(self, n1: int, n2: int) -> None:
         if self._curpos < 0:
             self._curpos = 0
         x = self._curpos
@@ -511,7 +529,7 @@ def _do_horizontal(self, n1, n2):
         self._curpos = x
         return
 
-    def _do_uncompressed(self, bits):
+    def _do_uncompressed(self, bits: str) -> None:
         for c in bits:
             self._curline[self._curpos] = int(c)
             self._curpos += 1
@@ -521,32 +539,33 @@ def _do_uncompressed(self, bits):
 
 class CCITTFaxDecoder(CCITTG4Parser):
 
-    def __init__(self, width, bytealign=False, reversed=False):
+    def __init__(self, width: int, bytealign: bool = False,
+                 reversed: bool = False) -> None:
         CCITTG4Parser.__init__(self, width, bytealign=bytealign)
         self.reversed = reversed
         self._buf = b''
         return
 
-    def close(self):
+    def close(self) -> bytes:
         return self._buf
 
-    def output_line(self, y, bits):
-        bytes = array.array('B', [0]*((len(bits)+7)//8))
+    def output_line(self, y: int, bits: Sequence[int]) -> None:
+        arr = array.array('B', [0]*((len(bits)+7)//8))
         if self.reversed:
             bits = [1-b for b in bits]
         for (i, b) in enumerate(bits):
             if b:
-                bytes[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
-        self._buf += bytes.tostring()
+                arr[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
+        self._buf += arr.tobytes()
         return
 
 
-def ccittfaxdecode(data, params):
+def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
     K = params.get('K')
-    cols = params.get('Columns')
-    bytealign = params.get('EncodedByteAlign')
-    reversed = params.get('BlackIs1')
     if K == -1:
+        cols = cast(int, params.get('Columns'))
+        bytealign = cast(bool, params.get('EncodedByteAlign'))
+        reversed = cast(bool, params.get('BlackIs1'))
         parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
     else:
         raise ValueError(K)
@@ -555,19 +574,20 @@ def ccittfaxdecode(data, params):
 
 
 # test
-def main(argv):
+def main(argv: List[str]) -> None:
     if not argv[1:]:
         import unittest
-        return unittest.main()
+        unittest.main()
+        return
 
     class Parser(CCITTG4Parser):
-        def __init__(self, width, bytealign=False):
-            import pygame
+        def __init__(self, width: int, bytealign: bool = False) -> None:
+            import pygame  # type: ignore[import]
             CCITTG4Parser.__init__(self, width, bytealign=bytealign)
             self.img = pygame.Surface((self.width, 1000))
             return
 
-        def output_line(self, y, bits):
+        def output_line(self, y: int, bits: Sequence[int]) -> None:
             for (x, b) in enumerate(bits):
                 if b:
                     self.img.set_at((x, y), (255, 255, 255))
@@ -575,7 +595,7 @@ def output_line(self, y, bits):
                     self.img.set_at((x, y), (0, 0, 0))
             return
 
-        def close(self):
+        def close(self) -> None:
             import pygame
             pygame.image.save(self.img, 'out.bmp')
             return
@@ -587,7 +607,3 @@ def close(self):
         parser.close()
         fp.close()
     return
-
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
index 35ced14f..853d877a 100644
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@@ -16,9 +16,12 @@
 import pickle as pickle
 import struct
 import logging
+from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
+                    MutableMapping, Optional, TextIO, Tuple, Union, cast)
 from .psparser import PSStackParser
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
+from .psparser import PSKeyword
 from .psparser import PSLiteral
 from .psparser import literal_name
 from .psparser import KWD
@@ -38,44 +41,48 @@ class CMapBase:
 
     debug = 0
 
-    def __init__(self, **kwargs):
-        self.attrs = kwargs.copy()
+    def __init__(self, **kwargs: object) -> None:
+        self.attrs: MutableMapping[str, object] = kwargs.copy()
         return
 
-    def is_vertical(self):
+    def is_vertical(self) -> bool:
         return self.attrs.get('WMode', 0) != 0
 
-    def set_attr(self, k, v):
+    def set_attr(self, k: str, v: object) -> None:
         self.attrs[k] = v
         return
 
-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
         return
 
-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
         return
 
-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: "CMapBase") -> None:
         return
 
+    def decode(self, code: bytes) -> Iterable[int]:
+        raise NotImplementedError
+
 
 class CMap(CMapBase):
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]) -> None:
         CMapBase.__init__(self, **kwargs)
-        self.code2cid = {}
+        self.code2cid: Dict[int, object] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<CMap: %s>' % self.attrs.get('CMapName')
 
-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: CMapBase) -> None:
         assert isinstance(cmap, CMap), str(type(cmap))
 
-        def copy(dst, src):
+        def copy(dst: Dict[int, object], src: Dict[int, object]) -> None:
             for (k, v) in src.items():
                 if isinstance(v, dict):
-                    d = {}
+                    d: Dict[int, object] = {}
                     dst[k] = d
                     copy(d, v)
                 else:
@@ -83,20 +90,24 @@ def copy(dst, src):
         copy(self.code2cid, cmap.code2cid)
         return
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Iterator[int]:
         log.debug('decode: %r, %r', self, code)
         d = self.code2cid
         for i in iter(code):
             if i in d:
-                d = d[i]
-                if isinstance(d, int):
-                    yield d
+                x = d[i]
+                if isinstance(x, int):
+                    yield x
                     d = self.code2cid
+                else:
+                    d = cast(Dict[int, object], x)
             else:
                 d = self.code2cid
         return
 
-    def dump(self, out=sys.stdout, code2cid=None, code=None):
+    def dump(self, out: TextIO = sys.stdout,
+             code2cid: Optional[Dict[int, object]] = None,
+             code: Tuple[int, ...] = ()) -> None:
         if code2cid is None:
             code2cid = self.code2cid
             code = ()
@@ -105,13 +116,13 @@ def dump(self, out=sys.stdout, code2cid=None, code=None):
             if isinstance(v, int):
                 out.write('code %r = cid %d\n' % (c, v))
             else:
-                self.dump(out=out, code2cid=v, code=c)
+                self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
         return
 
 
 class IdentityCMap(CMapBase):
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
         n = len(code)//2
         if n:
             return struct.unpack('>%dH' % n, code)
@@ -121,7 +132,7 @@ def decode(self, code):
 
 class IdentityCMapByte(IdentityCMap):
 
-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
         n = len(code)
         if n:
             return struct.unpack('>%dB' % n, code)
@@ -131,19 +142,19 @@ def decode(self, code):
 
 class UnicodeMap(CMapBase):
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]) -> None:
         CMapBase.__init__(self, **kwargs)
-        self.cid2unichr = {}
+        self.cid2unichr: Dict[int, str] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
 
-    def get_unichr(self, cid):
+    def get_unichr(self, cid: int) -> str:
         log.debug('get_unichr: %r, %r', self, cid)
         return self.cid2unichr[cid]
 
-    def dump(self, out=sys.stdout):
+    def dump(self, out: TextIO = sys.stdout) -> None:
         for (k, v) in sorted(self.cid2unichr.items()):
             out.write('cid %d = unicode %r\n' % (k, v))
         return
@@ -151,29 +162,31 @@ def dump(self, out=sys.stdout):
 
 class FileCMap(CMap):
 
-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
         assert isinstance(code, str) and isinstance(cid, int),\
             str((type(code), type(cid)))
         d = self.code2cid
         for c in code[:-1]:
-            c = ord(c)
-            if c in d:
-                d = d[c]
+            ci = ord(c)
+            if ci in d:
+                d = cast(Dict[int, object], d[ci])
             else:
-                t = {}
-                d[c] = t
+                t: Dict[int, object] = {}
+                d[ci] = t
                 d = t
-        c = ord(code[-1])
-        d[c] = cid
+        ci = ord(code[-1])
+        d[ci] = cid
         return
 
 
 class FileUnicodeMap(UnicodeMap):
 
-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
         assert isinstance(cid, int), str(type(cid))
         if isinstance(code, PSLiteral):
             # Interpret as an Adobe glyph name.
+            assert isinstance(code.name, str)
             self.cid2unichr[cid] = name2unicode(code.name)
         elif isinstance(code, bytes):
             # Interpret as UTF-16BE.
@@ -187,8 +200,8 @@ def add_cid2unichr(self, cid, code):
 
 class PyCMap(CMap):
 
-    def __init__(self, name, module):
-        CMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any) -> None:
+        super().__init__(CMapName=name)
         self.code2cid = module.CODE2CID
         if module.IS_VERTICAL:
             self.attrs['WMode'] = 1
@@ -197,8 +210,8 @@ def __init__(self, name, module):
 
 class PyUnicodeMap(UnicodeMap):
 
-    def __init__(self, name, module, vertical):
-        UnicodeMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any, vertical: bool) -> None:
+        super().__init__(CMapName=name)
         if vertical:
             self.cid2unichr = module.CID2UNICHR_V
             self.attrs['WMode'] = 1
@@ -209,14 +222,14 @@ def __init__(self, name, module, vertical):
 
 class CMapDB:
 
-    _cmap_cache = {}
-    _umap_cache = {}
+    _cmap_cache: Dict[str, PyCMap] = {}
+    _umap_cache: Dict[str, List[PyUnicodeMap]] = {}
 
     class CMapNotFound(CMapError):
         pass
 
     @classmethod
-    def _load_data(cls, name):
+    def _load_data(cls, name: str) -> Any:
         name = name.replace("\0", "")
         filename = '%s.pickle.gz' % name
         log.info('loading: %r', name)
@@ -234,7 +247,7 @@ def _load_data(cls, name):
             raise CMapDB.CMapNotFound(name)
 
     @classmethod
-    def get_cmap(cls, name):
+    def get_cmap(cls, name: str) -> CMapBase:
         if name == 'Identity-H':
             return IdentityCMap(WMode=0)
         elif name == 'Identity-V':
@@ -252,7 +265,7 @@ def get_cmap(cls, name):
         return cmap
 
     @classmethod
-    def get_unicode_map(cls, name, vertical=False):
+    def get_unicode_map(cls, name: str, vertical: bool = False) -> UnicodeMap:
         try:
             return cls._umap_cache[name][vertical]
         except KeyError:
@@ -263,16 +276,16 @@ def get_unicode_map(cls, name, vertical=False):
         return cls._umap_cache[name][vertical]
 
 
-class CMapParser(PSStackParser):
+class CMapParser(PSStackParser[PSKeyword]):
 
-    def __init__(self, cmap, fp):
+    def __init__(self, cmap: CMapBase, fp: BinaryIO) -> None:
         PSStackParser.__init__(self, fp)
         self.cmap = cmap
         # some ToUnicode maps don't have "begincmap" keyword.
         self._in_cmap = True
         return
 
-    def run(self):
+    def run(self) -> None:
         try:
             self.nextobject()
         except PSEOF:
@@ -296,7 +309,7 @@ def run(self):
     KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
     KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         if token is self.KEYWORD_BEGINCMAP:
             self._in_cmap = True
             self.popall()
@@ -380,6 +393,7 @@ def do_keyword(self, pos, token):
                     for i in range(e1-s1+1):
                         self.cmap.add_cid2unichr(s1+i, code[i])
                 else:
+                    assert isinstance(code, bytes)
                     var = code[-4:]
                     base = nunpack(var)
                     prefix = code[:-4]
@@ -410,7 +424,7 @@ def do_keyword(self, pos, token):
         return
 
 
-def main(argv):
+def main(argv: List[str]) -> None:
     args = argv[1:]
     for fname in args:
         fp = open(fname, 'rb')
@@ -422,4 +436,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    main(sys.argv)
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 812f6682..bffbb89e 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -1,13 +1,19 @@
 import io
 import logging
+from pdfminer.pdfcolor import PDFColorSpace
+from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
+                    Tuple, TypeVar, Union, cast)
 import re
 
 from . import utils
+from .layout import LAParams, LTComponent, TextGroupElement
 from .layout import LTChar
 from .layout import LTContainer
 from .layout import LTCurve
 from .layout import LTFigure
 from .layout import LTImage
+from .layout import LTItem
+from .layout import LTLayoutContainer
 from .layout import LTLine
 from .layout import LTPage
 from .layout import LTRect
@@ -17,25 +23,38 @@
 from .layout import LTTextGroup
 from .layout import LTTextLine
 from .pdfdevice import PDFTextDevice
+from .pdffont import PDFFont
 from .pdffont import PDFUnicodeNotDefined
+from .pdfinterp import PDFGraphicState, PDFResourceManager
+from .pdfpage import PDFPage
+from .pdftypes import PDFStream
+from .utils import AnyIO, Point, Matrix, Rect, PathSegment
 from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import enc
 from .utils import mult_matrix
+from .image import ImageWriter
 
 log = logging.getLogger(__name__)
 
 
 class PDFLayoutAnalyzer(PDFTextDevice):
-
-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
+    cur_item: LTLayoutContainer
+    ctm: Matrix
+
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
         PDFTextDevice.__init__(self, rsrcmgr)
         self.pageno = pageno
         self.laparams = laparams
-        self._stack = []
+        self._stack: List[LTLayoutContainer] = []
         return
 
-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
         (x0, y0, x1, y1) = page.mediabox
         (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
         (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
@@ -43,7 +62,7 @@ def begin_page(self, page, ctm):
         self.cur_item = LTPage(self.pageno, mediabox)
         return
 
-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
         assert not self._stack, str(len(self._stack))
         assert isinstance(self.cur_item, LTPage), str(type(self.cur_item))
         if self.laparams is not None:
@@ -52,19 +71,19 @@ def end_page(self, page):
         self.receive_layout(self.cur_item)
         return
 
-    def begin_figure(self, name, bbox, matrix):
+    def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         self._stack.append(self.cur_item)
         self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
         return
 
-    def end_figure(self, _):
+    def end_figure(self, _: str) -> None:
         fig = self.cur_item
         assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
         self.cur_item = self._stack.pop()
         self.cur_item.add(fig)
         return
 
-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
         assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
         item = LTImage(name, stream,
                        (self.cur_item.x0, self.cur_item.y0,
@@ -72,7 +91,14 @@ def render_image(self, name, stream):
         self.cur_item.add(item)
         return
 
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        gstate: PDFGraphicState,
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
         """Paint paths described in section 4.4 of the PDF reference manual"""
         shape = ''.join(x[0] for x in path)
 
@@ -90,7 +116,8 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
             # And, per Section 4.4's Table 4.9, all other path commands place
             # their point-position in their final two arguments. (Any preceding
             # arguments represent control points on Bézier curves.)
-            raw_pts = [p[-2:] if p[0] != 'h' else path[0][-2:] for p in path]
+            raw_pts = [cast(Point, p[-2:] if p[0] != 'h' else path[0][-2:])
+                       for p in path]
             pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
 
             if shape in {'mlh', 'ml'}:
@@ -123,8 +150,17 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
                                 gstate.scolor, gstate.ncolor)
                 self.cur_item.add(curve)
 
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        cid: int,
+        ncs: PDFColorSpace,
+        graphicstate: PDFGraphicState
+    ) -> float:
         try:
             text = font.to_unichr(cid)
             assert isinstance(text, str), str(type(text))
@@ -137,40 +173,56 @@ def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
         self.cur_item.add(item)
         return item.adv
 
-    def handle_undefined_char(self, font, cid):
+    def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
         log.info('undefined: %r, %r', font, cid)
         return '(cid:%d)' % cid
 
-    def receive_layout(self, ltpage):
+    def receive_layout(self, ltpage: LTPage) -> None:
         return
 
 
 class PDFPageAggregator(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
         PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
                                    laparams=laparams)
-        self.result = None
+        self.result: Optional[LTPage] = None
         return
 
-    def receive_layout(self, ltpage):
+    def receive_layout(self, ltpage: LTPage) -> None:
         self.result = ltpage
         return
 
-    def get_result(self):
+    def get_result(self) -> LTPage:
+        assert self.result is not None
         return self.result
 
 
-class PDFConverter(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
-                 laparams=None):
+# Some PDFConverter children support only binary I/O
+IOType = TypeVar('IOType', TextIO, BinaryIO, AnyIO)
+
+
+class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: IOType,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
         PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
                                    laparams=laparams)
-        self.outfp = outfp
+        self.outfp: IOType = outfp
         self.codec = codec
         self.outfp_binary = self._is_binary_stream(self.outfp)
 
     @staticmethod
-    def _is_binary_stream(outfp):
+    def _is_binary_stream(outfp: AnyIO) -> bool:
         """Test if an stream is binary or not"""
         if 'b' in getattr(outfp, 'mode', ''):
             return True
@@ -187,24 +239,33 @@ def _is_binary_stream(outfp):
         return True
 
 
-class TextConverter(PDFConverter):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 showpageno=False, imagewriter=None):
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
-                              laparams=laparams)
+class TextConverter(PDFConverter[AnyIO]):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        showpageno: bool = False,
+        imagewriter: Optional[ImageWriter] = None
+    ) -> None:
+        super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno,
+                         laparams=laparams)
         self.showpageno = showpageno
         self.imagewriter = imagewriter
         return
 
-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
         text = utils.compatible_encode_method(text, self.codec, 'ignore')
         if self.outfp_binary:
-            text = text.encode()
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode())
+        else:
+            cast(TextIO, self.outfp).write(text)
         return
 
-    def receive_layout(self, ltpage):
-        def render(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def render(item: LTItem) -> None:
             if isinstance(item, LTContainer):
                 for child in item:
                     render(child)
@@ -224,17 +285,24 @@ def render(item):
     # Some dummy functions to save memory/CPU when all that is wanted
     # is text.  This stops all the image and drawing output from being
     # recorded and taking up RAM.
-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
         if self.imagewriter is None:
             return
         PDFConverter.render_image(self, name, stream)
         return
 
-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        gstate: PDFGraphicState,
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
         return
 
 
-class HTMLConverter(PDFConverter):
+class HTMLConverter(PDFConverter[AnyIO]):
     RECT_COLORS = {
         'figure': 'yellow',
         'textline': 'magenta',
@@ -249,12 +317,30 @@ class HTMLConverter(PDFConverter):
         'char': 'black',
     }
 
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
-                 pagemargin=50, imagewriter=None, debug=0, rect_colors=None,
-                 text_colors=None):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        scale: float = 1,
+        fontscale: float = 1.0,
+        layoutmode: str = 'normal',
+        showpageno: bool = True,
+        pagemargin: int = 50,
+        imagewriter: Optional[ImageWriter] = None,
+        debug: int = 0,
+        rect_colors: Optional[Dict[str, str]] = None,
+        text_colors: Optional[Dict[str, str]] = None
+    ) -> None:
         PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
                               laparams=laparams)
+
+        # write() assumes a codec for binary I/O, or no codec for text I/O.
+        if self.outfp_binary == (not self.codec):
+            raise ValueError("Codec is required for a binary I/O output")
+
         if text_colors is None:
             text_colors = {'char': 'black'}
         if rect_colors is None:
@@ -271,19 +357,20 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
         if debug:
             self.rect_colors.update(self.RECT_COLORS)
             self.text_colors.update(self.TEXT_COLORS)
-        self._yoffset = self.pagemargin
-        self._font = None
-        self._fontstack = []
+        self._yoffset: float = self.pagemargin
+        self._font: Optional[Tuple[str, float]] = None
+        self._fontstack: List[Optional[Tuple[str, float]]] = []
         self.write_header()
         return
 
-    def write(self, text):
+    def write(self, text: str) -> None:
         if self.codec:
-            text = text.encode(self.codec)
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode(self.codec))
+        else:
+            cast(TextIO, self.outfp).write(text)
         return
 
-    def write_header(self):
+    def write_header(self) -> None:
         self.write('<html><head>\n')
         if self.codec:
             s = '<meta http-equiv="Content-Type" content="text/html; ' \
@@ -294,7 +381,7 @@ def write_header(self):
         self.write('</head><body>\n')
         return
 
-    def write_footer(self):
+    def write_footer(self) -> None:
         page_links = ['<a href="#{}">{}</a>'.format(i, i)
                       for i in range(1, self.pageno)]
         s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % \
@@ -303,28 +390,49 @@ def write_footer(self):
         self.write('</body></html>\n')
         return
 
-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
         self.write(enc(text))
         return
 
-    def place_rect(self, color, borderwidth, x, y, w, h):
-        color = self.rect_colors.get(color)
-        if color is not None:
+    def place_rect(
+        self,
+        color: str,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float
+    ) -> None:
+        color2 = self.rect_colors.get(color)
+        if color2 is not None:
             s = '<span style="position:absolute; border: %s %dpx solid; ' \
                 'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
-                (color, borderwidth, x * self.scale,
+                (color2, borderwidth, x * self.scale,
                  (self._yoffset - y) * self.scale, w * self.scale,
                  h * self.scale)
             self.write(
                 s)
         return
 
-    def place_border(self, color, borderwidth, item):
+    def place_border(
+        self,
+        color: str,
+        borderwidth: int,
+        item: LTComponent
+    ) -> None:
         self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
                         item.height)
         return
 
-    def place_image(self, item, borderwidth, x, y, w, h):
+    def place_image(
+        self,
+        item: LTImage,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float
+    ) -> None:
         if self.imagewriter is not None:
             name = self.imagewriter.export_image(item)
             s = '<img src="%s" border="%d" style="position:absolute; ' \
@@ -335,19 +443,35 @@ def place_image(self, item, borderwidth, x, y, w, h):
             self.write(s)
         return
 
-    def place_text(self, color, text, x, y, size):
-        color = self.text_colors.get(color)
-        if color is not None:
+    def place_text(
+        self,
+        color: str,
+        text: str,
+        x: float,
+        y: float,
+        size: float
+    ) -> None:
+        color2 = self.text_colors.get(color)
+        if color2 is not None:
             s = '<span style="position:absolute; color:%s; left:%dpx; ' \
                 'top:%dpx; font-size:%dpx;">' % \
-                (color, x * self.scale, (self._yoffset - y) * self.scale,
+                (color2, x * self.scale, (self._yoffset - y) * self.scale,
                  size * self.scale * self.fontscale)
             self.write(s)
             self.write_text(text)
             self.write('</span>\n')
         return
 
-    def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
+    def begin_div(
+        self,
+        color: str,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float,
+        writing_mode: str = 'False'
+    ) -> None:
         self._fontstack.append(self._font)
         self._font = None
         s = '<div style="position:absolute; border: %s %dpx solid; ' \
@@ -358,14 +482,14 @@ def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
         self.write(s)
         return
 
-    def end_div(self, color):
+    def end_div(self, color: str) -> None:
         if self._font is not None:
             self.write('</span>')
         self._font = self._fontstack.pop()
         self.write('</div>')
         return
 
-    def put_text(self, text, fontname, fontsize):
+    def put_text(self, text: str, fontname: str, fontsize: float) -> None:
         font = (fontname, fontsize)
         if font != self._font:
             if self._font is not None:
@@ -379,19 +503,20 @@ def put_text(self, text, fontname, fontsize):
         self.write_text(text)
         return
 
-    def put_newline(self):
+    def put_newline(self) -> None:
         self.write('<br>')
         return
 
-    def receive_layout(self, ltpage):
-        def show_group(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None:
             if isinstance(item, LTTextGroup):
                 self.place_border('textgroup', 1, item)
                 for child in item:
                     show_group(child)
             return
 
-        def render(item):
+        def render(item: LTItem) -> None:
+            child: LTItem
             if isinstance(item, LTPage):
                 self._yoffset += item.y1
                 self.place_border('page', 1, item)
@@ -455,31 +580,45 @@ def render(item):
         self._yoffset += self.pagemargin
         return
 
-    def close(self):
+    def close(self) -> None:
         self.write_footer()
         return
 
 
-class XMLConverter(PDFConverter):
+class XMLConverter(PDFConverter[AnyIO]):
 
     CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
 
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 imagewriter=None, stripcontrol=False):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        imagewriter: Optional[ImageWriter] = None,
+        stripcontrol: bool = False
+    ) -> None:
         PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
                               laparams=laparams)
+
+        # write() assumes a codec for binary I/O, or no codec for text I/O.
+        if self.outfp_binary == (not self.codec):
+            raise ValueError("Codec is required for a binary I/O output")
+
         self.imagewriter = imagewriter
         self.stripcontrol = stripcontrol
         self.write_header()
         return
 
-    def write(self, text):
+    def write(self, text: str) -> None:
         if self.codec:
-            text = text.encode(self.codec)
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode(self.codec))
+        else:
+            cast(TextIO, self.outfp).write(text)
         return
 
-    def write_header(self):
+    def write_header(self) -> None:
         if self.codec:
             self.write('<?xml version="1.0" encoding="%s" ?>\n' % self.codec)
         else:
@@ -487,18 +626,18 @@ def write_header(self):
         self.write('<pages>\n')
         return
 
-    def write_footer(self):
+    def write_footer(self) -> None:
         self.write('</pages>\n')
         return
 
-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
         if self.stripcontrol:
             text = self.CONTROL.sub('', text)
         self.write(enc(text))
         return
 
-    def receive_layout(self, ltpage):
-        def show_group(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def show_group(item: LTItem) -> None:
             if isinstance(item, LTTextBox):
                 self.write('<textbox id="%d" bbox="%s" />\n' %
                            (item.index, bbox2str(item.bbox)))
@@ -509,7 +648,8 @@ def show_group(item):
                 self.write('</textgroup>\n')
             return
 
-        def render(item):
+        def render(item: LTItem) -> None:
+            child: LTItem
             if isinstance(item, LTPage):
                 s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
                     (item.pageid, bbox2str(item.bbox), item.rotate)
@@ -580,6 +720,6 @@ def render(item):
         render(ltpage)
         return
 
-    def close(self):
+    def close(self) -> None:
         self.write_footer()
         return
diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py
index 58998a90..3db476f5 100644
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@@ -1,5 +1,6 @@
 import logging
 import re
+from typing import Dict, Iterable, Optional, cast
 
 from .glyphlist import glyphname2unicode
 from .latin_enc import ENCODING
@@ -10,7 +11,7 @@
 log = logging.getLogger(__name__)
 
 
-def name2unicode(name):
+def name2unicode(name: str) -> str:
     """Converts Adobe glyph names to Unicode numbers.
 
     In contrast to the specification, this raises a KeyError instead of return
@@ -32,7 +33,7 @@ def name2unicode(name):
 
     else:
         if name in glyphname2unicode:
-            return glyphname2unicode.get(name)
+            return glyphname2unicode[name]
 
         elif name.startswith('uni'):
             name_without_uni = name.strip('uni')
@@ -59,7 +60,7 @@ def name2unicode(name):
                    'it does not match specification' % name)
 
 
-def raise_key_error_for_invalid_unicode(unicode_digit):
+def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None:
     """Unicode values should not be in the range D800 through DFFF because
     that is used for surrogate pairs in UTF-16
 
@@ -72,10 +73,10 @@ def raise_key_error_for_invalid_unicode(unicode_digit):
 
 class EncodingDB:
 
-    std2unicode = {}
-    mac2unicode = {}
-    win2unicode = {}
-    pdf2unicode = {}
+    std2unicode: Dict[int, str] = {}
+    mac2unicode: Dict[int, str] = {}
+    win2unicode: Dict[int, str] = {}
+    pdf2unicode: Dict[int, str] = {}
     for (name, std, mac, win, pdf) in ENCODING:
         c = name2unicode(name)
         if std:
@@ -95,7 +96,11 @@ class EncodingDB:
     }
 
     @classmethod
-    def get_encoding(cls, name, diff=None):
+    def get_encoding(
+        cls,
+        name: str,
+        diff: Optional[Iterable[object]] = None
+    ) -> Dict[int, str]:
         cid2unicode = cls.encodings.get(name, cls.std2unicode)
         if diff:
             cid2unicode = cid2unicode.copy()
@@ -105,7 +110,7 @@ def get_encoding(cls, name, diff=None):
                     cid = x
                 elif isinstance(x, PSLiteral):
                     try:
-                        cid2unicode[cid] = name2unicode(x.name)
+                        cid2unicode[cid] = name2unicode(cast(str, x.name))
                     except (KeyError, ValueError) as e:
                         log.debug(str(e))
                     cid += 1
diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py
index 33f661c0..f8c5ca4d 100644
--- a/pdfminer/high_level.py
+++ b/pdfminer/high_level.py
@@ -3,22 +3,36 @@
 import logging
 import sys
 from io import StringIO
+from typing import Any, BinaryIO, Container, Iterator, Optional, cast
 
 from .converter import XMLConverter, HTMLConverter, TextConverter, \
     PDFPageAggregator
 from .image import ImageWriter
-from .layout import LAParams
-from .pdfdevice import TagExtractor
+from .layout import LAParams, LTPage
+from .pdfdevice import PDFDevice, TagExtractor
 from .pdfinterp import PDFResourceManager, PDFPageInterpreter
 from .pdfpage import PDFPage
-from .utils import open_filename
-
-
-def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
-                       laparams=None, maxpages=0, page_numbers=None,
-                       password="", scale=1.0, rotation=0, layoutmode='normal',
-                       output_dir=None, strip_control=False, debug=False,
-                       disable_caching=False, **kwargs):
+from .utils import open_filename, FileOrName, AnyIO
+
+
+def extract_text_to_fp(
+    inf: BinaryIO,
+    outfp: AnyIO,
+    output_type: str = 'text',
+    codec: str = 'utf-8',
+    laparams: Optional[LAParams] = None,
+    maxpages: int = 0,
+    page_numbers: Optional[Container[int]] = None,
+    password: str = "",
+    scale: float = 1.0,
+    rotation: int = 0,
+    layoutmode: str = 'normal',
+    output_dir: Optional[str] = None,
+    strip_control: bool = False,
+    debug: bool = False,
+    disable_caching: bool = False,
+    **kwargs: Any
+) -> None:
     """Parses text from inf-file and writes to outfp file-like object.
 
     Takes loads of optional arguments but the defaults are somewhat sane.
@@ -56,7 +70,7 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
         imagewriter = ImageWriter(output_dir)
 
     rsrcmgr = PDFResourceManager(caching=not disable_caching)
-    device = None
+    device: Optional[PDFDevice] = None
 
     if output_type != 'text' and outfp == sys.stdout:
         outfp = sys.stdout.buffer
@@ -76,13 +90,15 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
                                imagewriter=imagewriter)
 
     elif output_type == 'tag':
-        device = TagExtractor(rsrcmgr, outfp, codec=codec)
+        # Binary I/O is required, but we have no good way to test it here.
+        device = TagExtractor(rsrcmgr, cast(BinaryIO, outfp), codec=codec)
 
     else:
         msg = f"Output type can be text, html, xml or tag but is " \
               f"{output_type}"
         raise ValueError(msg)
 
+    assert device is not None
     interpreter = PDFPageInterpreter(rsrcmgr, device)
     for page in PDFPage.get_pages(inf,
                                   page_numbers,
@@ -95,8 +111,15 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
     device.close()
 
 
-def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
-                 caching=True, codec='utf-8', laparams=None):
+def extract_text(
+    pdf_file: FileOrName,
+    password: str = '',
+    page_numbers: Optional[Container[int]] = None,
+    maxpages: int = 0,
+    caching: bool = True,
+    codec: str = 'utf-8',
+    laparams: Optional[LAParams] = None
+) -> str:
     """Parse and return the text contained in a PDF file.
 
     :param pdf_file: Either a file path or a file-like object for the PDF file
@@ -114,6 +137,7 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
         laparams = LAParams()
 
     with open_filename(pdf_file, "rb") as fp, StringIO() as output_string:
+        fp = cast(BinaryIO, fp)  # we opened in binary mode
         rsrcmgr = PDFResourceManager(caching=caching)
         device = TextConverter(rsrcmgr, output_string, codec=codec,
                                laparams=laparams)
@@ -131,8 +155,14 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
         return output_string.getvalue()
 
 
-def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
-                  caching=True, laparams=None):
+def extract_pages(
+    pdf_file: FileOrName,
+    password: str = '',
+    page_numbers: Optional[Container[int]] = None,
+    maxpages: int = 0,
+    caching: bool = True,
+    laparams: Optional[LAParams] = None
+) -> Iterator[LTPage]:
     """Extract and yield LTPage objects
 
     :param pdf_file: Either a file path or a file-like object for the PDF file
@@ -149,6 +179,7 @@ def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
         laparams = LAParams()
 
     with open_filename(pdf_file, "rb") as fp:
+        fp = cast(BinaryIO, fp)  # we opened in binary mode
         resource_manager = PDFResourceManager(caching=caching)
         device = PDFPageAggregator(resource_manager, laparams=laparams)
         interpreter = PDFPageInterpreter(resource_manager, device)
diff --git a/pdfminer/image.py b/pdfminer/image.py
index 77d14810..83f9a7aa 100644
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@@ -2,20 +2,28 @@
 import os.path
 import struct
 from io import BytesIO
+from typing import BinaryIO, Tuple
 
 from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
+from .layout import LTImage
 from .pdfcolor import LITERAL_DEVICE_CMYK
 from .pdfcolor import LITERAL_DEVICE_GRAY
 from .pdfcolor import LITERAL_DEVICE_RGB
 from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE
 
 
-def align32(x):
+def align32(x: int) -> int:
     return ((x+3)//4)*4
 
 
 class BMPWriter:
-    def __init__(self, fp, bits, width, height):
+    def __init__(
+        self,
+        fp: BinaryIO,
+        bits: int,
+        width: int,
+        height: int
+    ) -> None:
         self.fp = fp
         self.bits = bits
         self.width = width
@@ -51,7 +59,7 @@ def __init__(self, fp, bits, width, height):
         self.pos1 = self.pos0 + self.datasize
         return
 
-    def write_line(self, y, data):
+    def write_line(self, y: int, data: bytes) -> None:
         self.fp.seek(self.pos1 - (y+1)*self.linesize)
         self.fp.write(data)
         return
@@ -63,13 +71,13 @@ class ImageWriter:
     Supports various image types: JPEG, JBIG2 and bitmaps
     """
 
-    def __init__(self, outdir):
+    def __init__(self, outdir: str) -> None:
         self.outdir = outdir
         if not os.path.exists(self.outdir):
             os.makedirs(self.outdir)
         return
 
-    def export_image(self, image):
+    def export_image(self, image: LTImage) -> str:
         (width, height) = image.srcsize
 
         is_jbig2 = self.is_jbig2_image(image)
@@ -80,8 +88,9 @@ def export_image(self, image):
         fp = open(path, 'wb')
         if ext == '.jpg':
             raw_data = image.stream.get_rawdata()
+            assert raw_data is not None
             if LITERAL_DEVICE_CMYK in image.colorspace:
-                from PIL import Image
+                from PIL import Image  # type: ignore[import]
                 from PIL import ImageChops
                 ifp = BytesIO(raw_data)
                 i = Image.open(ifp)
@@ -128,7 +137,7 @@ def export_image(self, image):
         return name
 
     @staticmethod
-    def is_jbig2_image(image):
+    def is_jbig2_image(image: LTImage) -> bool:
         filters = image.stream.get_filters()
         is_jbig2 = False
         for filter_name, params in filters:
@@ -138,7 +147,12 @@ def is_jbig2_image(image):
         return is_jbig2
 
     @staticmethod
-    def _get_image_extension(image, width, height, is_jbig2):
+    def _get_image_extension(
+        image: LTImage,
+        width: int,
+        height: int,
+        is_jbig2: bool
+    ) -> str:
         filters = image.stream.get_filters()
         if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
             ext = '.jpg'
@@ -154,7 +168,11 @@ def _get_image_extension(image, width, height, is_jbig2):
         return ext
 
     @staticmethod
-    def _create_unique_image_name(dirname, image_name, ext):
+    def _create_unique_image_name(
+        dirname: str,
+        image_name: str,
+        ext: str
+    ) -> Tuple[str, str]:
         name = image_name + ext
         path = os.path.join(dirname, name)
         img_index = 0
diff --git a/pdfminer/jbig2.py b/pdfminer/jbig2.py
index 4299629b..10ee7e6f 100644
--- a/pdfminer/jbig2.py
+++ b/pdfminer/jbig2.py
@@ -1,6 +1,7 @@
 import math
 import os
 from struct import pack, unpack, calcsize
+from typing import BinaryIO, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 # segment structure base
 SEG_STRUCT = [
@@ -34,15 +35,15 @@
 FILE_HEAD_FLAG_PAGES_UNKNOWN = 0b00000010
 
 
-def bit_set(bit_pos, value):
+def bit_set(bit_pos: int, value: int) -> bool:
     return bool((value >> bit_pos) & 1)
 
 
-def check_flag(flag, value):
+def check_flag(flag: int, value: int) -> bool:
     return bool(flag & value)
 
 
-def masked_value(mask, value):
+def masked_value(mask: int, value: int) -> int:
     for bit_pos in range(0, 31):
         if bit_set(bit_pos, mask):
             return (value & mask) >> bit_pos
@@ -50,7 +51,7 @@ def masked_value(mask, value):
     raise Exception("Invalid mask or value")
 
 
-def mask_value(mask, value):
+def mask_value(mask: int, value: int) -> int:
     for bit_pos in range(0, 31):
         if bit_set(bit_pos, mask):
             return (value & (mask >> bit_pos)) << bit_pos
@@ -58,25 +59,34 @@ def mask_value(mask, value):
     raise Exception("Invalid mask or value")
 
 
+def unpack_int(format: str, buffer: bytes) -> int:
+    assert format in {">B", ">I", ">L"}
+    [result] = cast(Tuple[int], unpack(format, buffer))
+    return result
+
+
+JBIG2SegmentFlags = Dict[str, Union[int, bool]]
+JBIG2RetentionFlags = Dict[str, Union[int, List[int], List[bool]]]
+JBIG2Segment = Dict[str, Union[bool, int, bytes, JBIG2SegmentFlags,
+                               JBIG2RetentionFlags]]
+
+
 class JBIG2StreamReader:
     """Read segments from a JBIG2 byte stream"""
-
-    def __init__(self, stream):
+    def __init__(self, stream: BinaryIO) -> None:
         self.stream = stream
 
-    def get_segments(self):
-        segments = []
+    def get_segments(self) -> List[JBIG2Segment]:
+        segments: List[JBIG2Segment] = []
         while not self.is_eof():
-            segment = {}
+            segment: JBIG2Segment = {}
             for field_format, name in SEG_STRUCT:
                 field_len = calcsize(field_format)
                 field = self.stream.read(field_len)
                 if len(field) < field_len:
                     segment["_error"] = True
                     break
-                value = unpack(field_format, field)
-                if len(value) == 1:
-                    [value] = value
+                value = unpack_int(field_format, field)
                 parser = getattr(self, "parse_%s" % name, None)
                 if callable(parser):
                     value = parser(segment, value, field)
@@ -86,21 +96,31 @@ def get_segments(self):
                 segments.append(segment)
         return segments
 
-    def is_eof(self):
+    def is_eof(self) -> bool:
         if self.stream.read(1) == b'':
             return True
         else:
             self.stream.seek(-1, os.SEEK_CUR)
             return False
 
-    def parse_flags(self, segment, flags, field):
+    def parse_flags(
+        self,
+        segment: JBIG2Segment,
+        flags: int,
+        field: bytes
+    ) -> JBIG2SegmentFlags:
         return {
             "deferred": check_flag(HEADER_FLAG_DEFERRED, flags),
             "page_assoc_long": check_flag(HEADER_FLAG_PAGE_ASSOC_LONG, flags),
             "type": masked_value(SEG_TYPE_MASK, flags)
         }
 
-    def parse_retention_flags(self, segment, flags, field):
+    def parse_retention_flags(
+        self,
+        segment: JBIG2Segment,
+        flags: int,
+        field: bytes
+    ) -> JBIG2RetentionFlags:
         ref_count = masked_value(REF_COUNT_SHORT_MASK, flags)
         retain_segments = []
         ref_segments = []
@@ -110,15 +130,16 @@ def parse_retention_flags(self, segment, flags, field):
                 retain_segments.append(bit_set(bit_pos, flags))
         else:
             field += self.stream.read(3)
-            [ref_count] = unpack(">L", field)
+            ref_count = unpack_int(">L", field)
             ref_count = masked_value(REF_COUNT_LONG_MASK, ref_count)
             ret_bytes_count = int(math.ceil((ref_count + 1) / 8))
             for ret_byte_index in range(ret_bytes_count):
-                [ret_byte] = unpack(">B", self.stream.read(1))
+                ret_byte = unpack_int(">B", self.stream.read(1))
                 for bit_pos in range(7):
                     retain_segments.append(bit_set(bit_pos, ret_byte))
 
         seg_num = segment["number"]
+        assert isinstance(seg_num, int)
         if seg_num <= 256:
             ref_format = ">B"
         elif seg_num <= 65536:
@@ -129,8 +150,8 @@ def parse_retention_flags(self, segment, flags, field):
         ref_size = calcsize(ref_format)
 
         for ref_index in range(ref_count):
-            ref = self.stream.read(ref_size)
-            [ref] = unpack(ref_format, ref)
+            ref_data = self.stream.read(ref_size)
+            ref = unpack_int(ref_format, ref_data)
             ref_segments.append(ref)
 
         return {
@@ -139,15 +160,26 @@ def parse_retention_flags(self, segment, flags, field):
             "ref_segments": ref_segments,
         }
 
-    def parse_page_assoc(self, segment, page, field):
-        if segment["flags"]["page_assoc_long"]:
+    def parse_page_assoc(
+        self,
+        segment: JBIG2Segment,
+        page: int,
+        field: bytes
+    ) -> int:
+        if cast(JBIG2SegmentFlags, segment["flags"])["page_assoc_long"]:
             field += self.stream.read(3)
-            [page] = unpack(">L", field)
+            page = unpack_int(">L", field)
         return page
 
-    def parse_data_length(self, segment, length, field):
+    def parse_data_length(
+        self,
+        segment: JBIG2Segment,
+        length: int,
+        field: bytes
+    ) -> int:
         if length:
-            if (segment["flags"]["type"] == SEG_TYPE_IMMEDIATE_GEN_REGION) \
+            if (cast(JBIG2SegmentFlags, segment["flags"])["type"] ==
+                    SEG_TYPE_IMMEDIATE_GEN_REGION) \
                     and (length == DATA_LEN_UNKNOWN):
 
                 raise NotImplementedError(
@@ -163,25 +195,36 @@ def parse_data_length(self, segment, length, field):
 class JBIG2StreamWriter:
     """Write JBIG2 segments to a file in JBIG2 format"""
 
-    def __init__(self, stream):
+    EMPTY_RETENTION_FLAGS: JBIG2RetentionFlags = {
+        'ref_count': 0,
+        'ref_segments': cast(List[int], []),
+        'retain_segments': cast(List[bool], [])
+    }
+
+    def __init__(self, stream: BinaryIO) -> None:
         self.stream = stream
 
-    def write_segments(self, segments, fix_last_page=True):
+    def write_segments(
+        self,
+        segments: Iterable[JBIG2Segment],
+        fix_last_page: bool = True
+    ) -> int:
         data_len = 0
-        current_page = None
-        seg_num = None
+        current_page: Optional[int] = None
+        seg_num: Optional[int] = None
 
         for segment in segments:
             data = self.encode_segment(segment)
             self.stream.write(data)
             data_len += len(data)
 
-            seg_num = segment["number"]
+            seg_num = cast(Optional[int], segment["number"])
 
             if fix_last_page:
-                seg_page = segment.get("page_assoc")
+                seg_page = cast(int, segment.get("page_assoc"))
 
-                if segment["flags"]["type"] == SEG_TYPE_END_OF_PAGE:
+                if cast(JBIG2SegmentFlags, segment["flags"])["type"] == \
+                        SEG_TYPE_END_OF_PAGE:
                     current_page = None
                 elif seg_page:
                     current_page = seg_page
@@ -194,7 +237,11 @@ def write_segments(self, segments, fix_last_page=True):
 
         return data_len
 
-    def write_file(self, segments, fix_last_page=True):
+    def write_file(
+        self,
+        segments: Iterable[JBIG2Segment],
+        fix_last_page: bool = True
+    ) -> int:
         header = FILE_HEADER_ID
         header_flags = FILE_HEAD_FLAG_SEQUENTIAL | FILE_HEAD_FLAG_PAGES_UNKNOWN
         header += pack(">B", header_flags)
@@ -205,7 +252,7 @@ def write_file(self, segments, fix_last_page=True):
 
         seg_num = 0
         for segment in segments:
-            seg_num = segment["number"]
+            seg_num = cast(int, segment["number"])
 
         eof_segment = self.get_eof_segment(seg_num + 1)
         data = self.encode_segment(eof_segment)
@@ -215,7 +262,7 @@ def write_file(self, segments, fix_last_page=True):
 
         return data_len
 
-    def encode_segment(self, segment):
+    def encode_segment(self, segment: JBIG2Segment) -> bytes:
         data = b''
         for field_format, name in SEG_STRUCT:
             value = segment.get(name)
@@ -227,7 +274,8 @@ def encode_segment(self, segment):
             data += field
         return data
 
-    def encode_flags(self, value, segment):
+    def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment
+                     ) -> bytes:
         flags = 0
         if value.get("deferred"):
             flags |= HEADER_FLAG_DEFERRED
@@ -237,17 +285,22 @@ def encode_flags(self, value, segment):
                 if value["page_assoc_long"] else flags
         else:
             flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
-                if segment.get("page", 0) > 255 else flags
+                if cast(int, segment.get("page", 0)) > 255 else flags
 
         flags |= mask_value(SEG_TYPE_MASK, value["type"])
 
         return pack(">B", flags)
 
-    def encode_retention_flags(self, value, segment):
+    def encode_retention_flags(
+        self,
+        value: JBIG2RetentionFlags,
+        segment: JBIG2Segment
+    ) -> bytes:
         flags = []
         flags_format = ">B"
         ref_count = value["ref_count"]
-        retain_segments = value.get("retain_segments", [])
+        assert isinstance(ref_count, int)
+        retain_segments = cast(List[bool], value.get("retain_segments", []))
 
         if ref_count <= 4:
             flags_byte = mask_value(REF_COUNT_SHORT_MASK, ref_count)
@@ -271,9 +324,9 @@ def encode_retention_flags(self, value, segment):
 
                 flags.append(ret_byte)
 
-        ref_segments = value.get("ref_segments", [])
+        ref_segments = cast(List[int], value.get("ref_segments", []))
 
-        seg_num = segment["number"]
+        seg_num = cast(int, segment["number"])
         if seg_num <= 256:
             ref_format = "B"
         elif seg_num <= 65536:
@@ -287,35 +340,31 @@ def encode_retention_flags(self, value, segment):
 
         return pack(flags_format, *flags)
 
-    def encode_data_length(self, value, segment):
+    def encode_data_length(self, value: int, segment: JBIG2Segment) -> bytes:
         data = pack(">L", value)
-        data += segment["raw_data"]
+        data += cast(bytes, segment["raw_data"])
         return data
 
-    def get_eop_segment(self, seg_number, page_number):
+    def get_eop_segment(
+        self,
+        seg_number: int,
+        page_number: int
+    ) -> JBIG2Segment:
         return {
             'data_length': 0,
             'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_PAGE},
             'number': seg_number,
             'page_assoc': page_number,
             'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
+            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
         }
 
-    def get_eof_segment(self, seg_number):
+    def get_eof_segment(self, seg_number: int) -> JBIG2Segment:
         return {
             'data_length': 0,
             'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_FILE},
             'number': seg_number,
             'page_assoc': 0,
             'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
+            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
         }
diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py
index fae26ff3..d579aea1 100644
--- a/pdfminer/latin_enc.py
+++ b/pdfminer/latin_enc.py
@@ -5,7 +5,12 @@
 
 """
 
-ENCODING = [
+from typing import List, Optional, Tuple
+
+EncodingRow = \
+    Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]
+
+ENCODING: List[EncodingRow] = [
   # (name, std, mac, win, pdf)
   ('A', 65, 65, 65, 65),
   ('AE', 225, 174, 198, 198),
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index 8bce26bc..b9f3d105 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -1,25 +1,36 @@
 import heapq
 import logging
+from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
+                    Sequence, Set, Tuple, TypeVar, Union, cast)
 
 from .utils import INF
+from .utils import LTComponentT
+from .utils import Matrix
 from .utils import Plane
+from .utils import Point
+from .utils import Rect
 from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import fsplit
 from .utils import get_bound
 from .utils import matrix2str
 from .utils import uniq
+from .pdfcolor import PDFColorSpace
+from .pdftypes import PDFStream
+from .pdfinterp import Color
+from .pdfinterp import PDFGraphicState
+from .pdffont import PDFFont
 
 logger = logging.getLogger(__name__)
 
 
 class IndexAssigner:
 
-    def __init__(self, index=0):
+    def __init__(self, index: int = 0) -> None:
         self.index = index
         return
 
-    def run(self, obj):
+    def run(self, obj: "LTItem") -> None:
         if isinstance(obj, LTTextBox):
             obj.index = self.index
             self.index += 1
@@ -57,14 +68,16 @@ class LAParams:
         figures.
     """
 
-    def __init__(self,
-                 line_overlap=0.5,
-                 char_margin=2.0,
-                 line_margin=0.5,
-                 word_margin=0.1,
-                 boxes_flow=0.5,
-                 detect_vertical=False,
-                 all_texts=False):
+    def __init__(
+        self,
+        line_overlap: float = 0.5,
+        char_margin: float = 2.0,
+        line_margin: float = 0.5,
+        word_margin: float = 0.1,
+        boxes_flow: Optional[float] = 0.5,
+        detect_vertical: bool = False,
+        all_texts: bool = False
+    ) -> None:
         self.line_overlap = line_overlap
         self.char_margin = char_margin
         self.line_margin = line_margin
@@ -76,7 +89,7 @@ def __init__(self,
         self._validate()
         return
 
-    def _validate(self):
+    def _validate(self) -> None:
         if self.boxes_flow is not None:
             boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a "
                                   "number between -1 and +1")
@@ -86,7 +99,7 @@ def _validate(self):
             if not -1 <= self.boxes_flow <= 1:
                 raise ValueError(boxes_flow_err_msg)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<LAParams: char_margin=%.1f, line_margin=%.1f, ' \
                'word_margin=%.1f all_texts=%r>' % \
                (self.char_margin, self.line_margin, self.word_margin,
@@ -96,7 +109,7 @@ def __repr__(self):
 class LTItem:
     """Interface for things that can be analyzed"""
 
-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
         """Perform the layout analysis."""
         return
 
@@ -104,11 +117,11 @@ def analyze(self, laparams):
 class LTText:
     """Interface for things that have text"""
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s %r>' %
                 (self.__class__.__name__, self.get_text()))
 
-    def get_text(self):
+    def get_text(self) -> str:
         """Text contained in this object"""
         raise NotImplementedError
 
@@ -116,29 +129,29 @@ def get_text(self):
 class LTComponent(LTItem):
     """Object with a bounding box"""
 
-    def __init__(self, bbox):
+    def __init__(self, bbox: Rect) -> None:
         LTItem.__init__(self)
         self.set_bbox(bbox)
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s %s>' %
                 (self.__class__.__name__, bbox2str(self.bbox)))
 
     # Disable comparison.
-    def __lt__(self, _):
+    def __lt__(self, _: object) -> bool:
         raise ValueError
 
-    def __le__(self, _):
+    def __le__(self, _: object) -> bool:
         raise ValueError
 
-    def __gt__(self, _):
+    def __gt__(self, _: object) -> bool:
         raise ValueError
 
-    def __ge__(self, _):
+    def __ge__(self, _: object) -> bool:
         raise ValueError
 
-    def set_bbox(self, bbox):
+    def set_bbox(self, bbox: Rect) -> None:
         (x0, y0, x1, y1) = bbox
         self.x0 = x0
         self.y0 = y0
@@ -149,39 +162,39 @@ def set_bbox(self, bbox):
         self.bbox = bbox
         return
 
-    def is_empty(self):
+    def is_empty(self) -> bool:
         return self.width <= 0 or self.height <= 0
 
-    def is_hoverlap(self, obj):
+    def is_hoverlap(self, obj: "LTComponent") -> bool:
         assert isinstance(obj, LTComponent), str(type(obj))
         return obj.x0 <= self.x1 and self.x0 <= obj.x1
 
-    def hdistance(self, obj):
+    def hdistance(self, obj: "LTComponent") -> float:
         assert isinstance(obj, LTComponent), str(type(obj))
         if self.is_hoverlap(obj):
             return 0
         else:
             return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
 
-    def hoverlap(self, obj):
+    def hoverlap(self, obj: "LTComponent") -> float:
         assert isinstance(obj, LTComponent), str(type(obj))
         if self.is_hoverlap(obj):
             return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
         else:
             return 0
 
-    def is_voverlap(self, obj):
+    def is_voverlap(self, obj: "LTComponent") -> bool:
         assert isinstance(obj, LTComponent), str(type(obj))
         return obj.y0 <= self.y1 and self.y0 <= obj.y1
 
-    def vdistance(self, obj):
+    def vdistance(self, obj: "LTComponent") -> float:
         assert isinstance(obj, LTComponent), str(type(obj))
         if self.is_voverlap(obj):
             return 0
         else:
             return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
 
-    def voverlap(self, obj):
+    def voverlap(self, obj: "LTComponent") -> float:
         assert isinstance(obj, LTComponent), str(type(obj))
         if self.is_voverlap(obj):
             return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
@@ -192,8 +205,16 @@ def voverlap(self, obj):
 class LTCurve(LTComponent):
     """A generic Bezier curve"""
 
-    def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
-                 stroking_color=None, non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        pts: List[Point],
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
         LTComponent.__init__(self, get_bound(pts))
         self.pts = pts
         self.linewidth = linewidth
@@ -204,7 +225,7 @@ def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
         self.non_stroking_color = non_stroking_color
         return
 
-    def get_pts(self):
+    def get_pts(self) -> str:
         return ','.join('%.3f,%.3f' % p for p in self.pts)
 
 
@@ -214,8 +235,17 @@ class LTLine(LTCurve):
     Could be used for separating text or figures.
     """
 
-    def __init__(self, linewidth, p0, p1, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None, non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        p0: Point,
+        p1: Point,
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
         LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
                          stroking_color, non_stroking_color)
         return
@@ -227,8 +257,16 @@ class LTRect(LTCurve):
     Could be used for framing another pictures or figures.
     """
 
-    def __init__(self, linewidth, bbox, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None,  non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        bbox: Rect,
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
         (x0, y0, x1, y1) = bbox
         LTCurve.__init__(self, linewidth,
                          [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
@@ -242,7 +280,7 @@ class LTImage(LTComponent):
     Embedded images can be in JPEG, Bitmap or JBIG2.
     """
 
-    def __init__(self, name, stream, bbox):
+    def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None:
         LTComponent.__init__(self, bbox)
         self.name = name
         self.stream = stream
@@ -255,7 +293,7 @@ def __init__(self, name, stream, bbox):
             self.colorspace = [self.colorspace]
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s(%s) %s %r>' %
                 (self.__class__.__name__, self.name,
                  bbox2str(self.bbox), self.srcsize))
@@ -269,19 +307,30 @@ class LTAnno(LTItem, LTText):
     according to the relationship between two characters (e.g. a space).
     """
 
-    def __init__(self, text):
+    def __init__(self, text: str) -> None:
         self._text = text
         return
 
-    def get_text(self):
+    def get_text(self) -> str:
         return self._text
 
 
 class LTChar(LTComponent, LTText):
     """Actual letter in the text as a Unicode string."""
 
-    def __init__(self, matrix, font, fontsize, scaling, rise,
-                 text, textwidth, textdisp, ncs, graphicstate):
+    def __init__(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        text: str,
+        textwidth: float,
+        textdisp: Union[float, Tuple[Optional[float], float]],
+        ncs: PDFColorSpace,
+        graphicstate: PDFGraphicState
+    ) -> None:
         LTText.__init__(self)
         self._text = text
         self.matrix = matrix
@@ -292,6 +341,7 @@ def __init__(self, matrix, font, fontsize, scaling, rise,
         # compute the boundary rectangle.
         if font.is_vertical():
             # vertical
+            assert isinstance(textdisp, tuple)
             (vx, vy) = textdisp
             if vx is None:
                 vx = fontsize * 0.5
@@ -320,114 +370,129 @@ def __init__(self, matrix, font, fontsize, scaling, rise,
             self.size = self.height
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
                 (self.__class__.__name__, bbox2str(self.bbox),
                  matrix2str(self.matrix), self.fontname, self.adv,
                  self.get_text()))
 
-    def get_text(self):
+    def get_text(self) -> str:
         return self._text
 
-    def is_compatible(self, obj):
+    def is_compatible(self, obj: object) -> bool:
         """Returns True if two characters can coexist in the same line."""
         return True
 
 
-class LTContainer(LTComponent):
+LTItemT = TypeVar('LTItemT', bound=LTItem)
+
+
+class LTContainer(LTComponent, Generic[LTItemT]):
     """Object that can be extended and analyzed"""
 
-    def __init__(self, bbox):
+    def __init__(self, bbox: Rect) -> None:
         LTComponent.__init__(self, bbox)
-        self._objs = []
+        self._objs: List[LTItemT] = []
         return
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[LTItemT]:
         return iter(self._objs)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._objs)
 
-    def add(self, obj):
+    def add(self, obj: LTItemT) -> None:
         self._objs.append(obj)
         return
 
-    def extend(self, objs):
+    def extend(self, objs: Iterable[LTItemT]) -> None:
         for obj in objs:
             self.add(obj)
         return
 
-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
         for obj in self._objs:
             obj.analyze(laparams)
         return
 
 
-class LTExpandableContainer(LTContainer):
-    def __init__(self):
+class LTExpandableContainer(LTContainer[LTItemT]):
+    def __init__(self) -> None:
         LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
         return
 
-    def add(self, obj):
-        LTContainer.add(self, obj)
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # super() LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
+        LTContainer.add(self, cast(LTItemT, obj))
         self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
                        max(self.x1, obj.x1), max(self.y1, obj.y1)))
         return
 
 
-class LTTextContainer(LTExpandableContainer, LTText):
-    def __init__(self):
+class LTTextContainer(LTExpandableContainer[LTItemT], LTText):
+    def __init__(self) -> None:
         LTText.__init__(self)
         LTExpandableContainer.__init__(self)
         return
 
-    def get_text(self):
-        return ''.join(obj.get_text() for obj in self
+    def get_text(self) -> str:
+        return ''.join(cast(LTText, obj).get_text() for obj in self
                        if isinstance(obj, LTText))
 
 
-class LTTextLine(LTTextContainer):
+TextLineElement = Union[LTChar, LTAnno]
+
+
+class LTTextLine(LTTextContainer[TextLineElement]):
     """Contains a list of LTChar objects that represent a single text line.
 
     The characters are aligned either horizontally or vertically, depending on
     the text's writing mode.
     """
 
-    def __init__(self, word_margin):
-        LTTextContainer.__init__(self)
+    def __init__(self, word_margin: float) -> None:
+        super().__init__()
         self.word_margin = word_margin
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s %s %r>' %
                 (self.__class__.__name__, bbox2str(self.bbox),
                  self.get_text()))
 
-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
         LTTextContainer.analyze(self, laparams)
         LTContainer.add(self, LTAnno('\n'))
         return
 
-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
+                       ) -> List["LTTextLine"]:
         raise NotImplementedError
 
 
 class LTTextLineHorizontal(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float) -> None:
         LTTextLine.__init__(self, word_margin)
-        self._x1 = +INF
+        self._x1: float = +INF
         return
 
-    def add(self, obj):
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
         if isinstance(obj, LTChar) and self.word_margin:
             margin = self.word_margin * max(obj.width, obj.height)
             if self._x1 < obj.x0 - margin:
                 LTContainer.add(self, LTAnno(' '))
         self._x1 = obj.x1
-        LTTextLine.add(self, obj)
+        super().add(obj)
         return
 
-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(
+        self,
+        plane: Plane[LTComponentT],
+        ratio: float
+    ) -> List[LTTextLine]:
         """
         Finds neighboring LTTextLineHorizontals in the plane.
 
@@ -445,45 +510,67 @@ def find_neighbors(self, plane, ratio):
                      self._is_right_aligned_with(obj, tolerance=d) or
                      self._is_centrally_aligned_with(obj, tolerance=d)))]
 
-    def _is_left_aligned_with(self, other, tolerance=0):
+    def _is_left_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the left-hand edge of `other` is within `tolerance`.
         """
         return abs(other.x0 - self.x0) <= tolerance
 
-    def _is_right_aligned_with(self, other, tolerance=0):
+    def _is_right_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the right-hand edge of `other` is within `tolerance`.
         """
         return abs(other.x1 - self.x1) <= tolerance
 
-    def _is_centrally_aligned_with(self, other, tolerance=0):
+    def _is_centrally_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the horizontal center of `other` is within `tolerance`.
         """
         return abs(
             (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance
 
-    def _is_same_height_as(self, other, tolerance):
+    def _is_same_height_as(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         return abs(other.height - self.height) <= tolerance
 
 
 class LTTextLineVertical(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float) -> None:
         LTTextLine.__init__(self, word_margin)
-        self._y0 = -INF
+        self._y0: float = -INF
         return
 
-    def add(self, obj):
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
         if isinstance(obj, LTChar) and self.word_margin:
             margin = self.word_margin * max(obj.width, obj.height)
             if obj.y1 + margin < self._y0:
                 LTContainer.add(self, LTAnno(' '))
         self._y0 = obj.y0
-        LTTextLine.add(self, obj)
+        super().add(obj)
         return
 
-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(
+        self,
+        plane: Plane[LTComponentT],
+        ratio: float
+    ) -> List[LTTextLine]:
         """
         Finds neighboring LTTextLineVerticals in the plane.
 
@@ -501,30 +588,42 @@ def find_neighbors(self, plane, ratio):
                      self._is_upper_aligned_with(obj, tolerance=d) or
                      self._is_centrally_aligned_with(obj, tolerance=d)))]
 
-    def _is_lower_aligned_with(self, other, tolerance=0):
+    def _is_lower_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the lower edge of `other` is within `tolerance`.
         """
         return abs(other.y0 - self.y0) <= tolerance
 
-    def _is_upper_aligned_with(self, other, tolerance=0):
+    def _is_upper_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the upper edge of `other` is within `tolerance`.
         """
         return abs(other.y1 - self.y1) <= tolerance
 
-    def _is_centrally_aligned_with(self, other, tolerance=0):
+    def _is_centrally_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
         """
         Whether the vertical center of `other` is within `tolerance`.
         """
         return abs(
             (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance
 
-    def _is_same_width_as(self, other, tolerance):
+    def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool:
         return abs(other.width - self.width) <= tolerance
 
 
-class LTTextBox(LTTextContainer):
+class LTTextBox(LTTextContainer[LTTextLine]):
     """Represents a group of text chunks in a rectangular area.
 
     Note that this box is created by geometric analysis and does not
@@ -532,72 +631,86 @@ class LTTextBox(LTTextContainer):
     of LTTextLine objects.
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         LTTextContainer.__init__(self)
-        self.index = -1
+        self.index: int = -1
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s(%s) %s %r>' %
                 (self.__class__.__name__,
                  self.index, bbox2str(self.bbox), self.get_text()))
 
+    def get_writing_mode(self) -> str:
+        raise NotImplementedError
+
 
 class LTTextBoxHorizontal(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
         self._objs.sort(key=lambda obj: -obj.y1)
         return
 
-    def get_writing_mode(self):
+    def get_writing_mode(self) -> str:
         return 'lr-tb'
 
 
 class LTTextBoxVertical(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
         self._objs.sort(key=lambda obj: -obj.x1)
         return
 
-    def get_writing_mode(self):
+    def get_writing_mode(self) -> str:
         return 'tb-rl'
 
 
-class LTTextGroup(LTTextContainer):
-    def __init__(self, objs):
-        LTTextContainer.__init__(self)
+TextGroupElement = Union[LTTextBox, "LTTextGroup"]
+
+
+class LTTextGroup(LTTextContainer[TextGroupElement]):
+    def __init__(self, objs: Iterable[TextGroupElement]) -> None:
+        super().__init__()
         self.extend(objs)
         return
 
 
 class LTTextGroupLRTB(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
+        assert laparams.boxes_flow is not None
+        boxes_flow = laparams.boxes_flow
         # reorder the objects from top-left to bottom-right.
         self._objs.sort(
-            key=lambda obj: (1 - laparams.boxes_flow) * obj.x0
-            - (1 + laparams.boxes_flow) * (obj.y0 + obj.y1))
+            key=lambda obj: (1 - boxes_flow) * obj.x0
+            - (1 + boxes_flow) * (obj.y0 + obj.y1))
         return
 
 
 class LTTextGroupTBRL(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
+        assert laparams.boxes_flow is not None
+        boxes_flow = laparams.boxes_flow
         # reorder the objects from top-right to bottom-left.
         self._objs.sort(
-            key=lambda obj: - (1 + laparams.boxes_flow) * (obj.x0 + obj.x1)
-                            - (1 - laparams.boxes_flow) * obj.y1)
+            key=lambda obj: - (1 + boxes_flow) * (obj.x0 + obj.x1)
+                            - (1 - boxes_flow) * obj.y1)
         return
 
 
-class LTLayoutContainer(LTContainer):
-    def __init__(self, bbox):
+class LTLayoutContainer(LTContainer[LTComponent]):
+    def __init__(self, bbox: Rect) -> None:
         LTContainer.__init__(self, bbox)
-        self.groups = None
+        self.groups: Optional[List[LTTextGroup]] = None
         return
 
     # group_objects: group text object to textlines.
-    def group_objects(self, laparams, objs):
+    def group_objects(
+        self,
+        laparams: LAParams,
+        objs: Iterable[LTComponent]
+    ) -> Iterator[LTTextLine]:
         obj0 = None
         line = None
         for obj1 in objs:
@@ -667,15 +780,20 @@ def group_objects(self, laparams, objs):
             obj0 = obj1
         if line is None:
             line = LTTextLineHorizontal(laparams.word_margin)
+            assert obj0 is not None
             line.add(obj0)
         yield line
         return
 
-    def group_textlines(self, laparams, lines):
+    def group_textlines(
+        self,
+        laparams: LAParams,
+        lines: Iterable[LTTextLine]
+    ) -> Iterator[LTTextBox]:
         """Group neighboring lines to textboxes"""
-        plane = Plane(self.bbox)
+        plane: Plane[LTTextLine] = Plane(self.bbox)
         plane.extend(lines)
-        boxes = {}
+        boxes: Dict[LTTextLine, LTTextBox] = {}
         for line in lines:
             neighbors = line.find_neighbors(plane, laparams.line_margin)
             members = [line]
@@ -684,7 +802,7 @@ def group_textlines(self, laparams, lines):
                 if obj1 in boxes:
                     members.extend(boxes.pop(obj1))
             if isinstance(line, LTTextLineHorizontal):
-                box = LTTextBoxHorizontal()
+                box: LTTextBox = LTTextBoxHorizontal()
             else:
                 box = LTTextBoxVertical()
             for obj in uniq(members):
@@ -702,7 +820,11 @@ def group_textlines(self, laparams, lines):
                 yield box
         return
 
-    def group_textboxes(self, laparams, boxes):
+    def group_textboxes(
+        self,
+        laparams: LAParams,
+        boxes: Sequence[LTTextBox]
+    ) -> List[LTTextGroup]:
         """Group textboxes hierarchically.
 
         Get pair-wise distances, via dist func defined below, and then merge
@@ -718,10 +840,13 @@ def group_textboxes(self, laparams, boxes):
 
         :param laparams: LAParams object.
         :param boxes: All textbox objects to be grouped.
-        :return: a list that has only one element, the final top level textbox.
+        :return: a list that has only one element, the final top level group.
         """
 
-        def dist(obj1, obj2):
+        ElementT = Union[LTTextBox, LTTextGroup]
+        plane: Plane[ElementT] = Plane(self.bbox)
+
+        def dist(obj1: LTComponent, obj2: LTComponent) -> float:
             """A distance function between two TextBoxes.
 
             Consider the bounding rectangle for obj1 and obj2.
@@ -740,7 +865,7 @@ def dist(obj1, obj2):
             return (x1 - x0) * (y1 - y0) \
                 - obj1.width*obj1.height - obj2.width*obj2.height
 
-        def isany(obj1, obj2):
+        def isany(obj1: ElementT, obj2: ElementT) -> Set[ElementT]:
             """Check if there's any other object between obj1 and obj2."""
             x0 = min(obj1.x0, obj2.x0)
             y0 = min(obj1.y0, obj2.y0)
@@ -749,16 +874,15 @@ def isany(obj1, obj2):
             objs = set(plane.find((x0, y0, x1, y1)))
             return objs.difference((obj1, obj2))
 
-        dists = []
+        dists: List[Tuple[bool, float, int, int, ElementT, ElementT]] = []
         for i in range(len(boxes)):
-            obj1 = boxes[i]
+            box1 = boxes[i]
             for j in range(i+1, len(boxes)):
-                obj2 = boxes[j]
-                dists.append((False, dist(obj1, obj2), id(obj1), id(obj2),
-                              obj1, obj2))
+                box2 = boxes[j]
+                dists.append((False, dist(box1, box2), id(box1), id(box2),
+                              box1, box2))
         heapq.heapify(dists)
 
-        plane = Plane(self.bbox)
         plane.extend(boxes)
         done = set()
         while len(dists) > 0:
@@ -770,7 +894,7 @@ def isany(obj1, obj2):
                     continue
                 if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or \
                         isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL)):
-                    group = LTTextGroupTBRL([obj1, obj2])
+                    group: LTTextGroup = LTTextGroupTBRL([obj1, obj2])
                 else:
                     group = LTTextGroupLRTB([obj1, obj2])
                 plane.remove(obj1)
@@ -781,9 +905,10 @@ def isany(obj1, obj2):
                     heapq.heappush(dists, (False, dist(group, other),
                                            id(group), id(other), group, other))
                 plane.add(group)
-        return list(plane)
+        # By now only groups are in the plane
+        return list(cast(LTTextGroup, g) for g in plane)
 
-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
         # textobjs is a list of LTChar objects, i.e.
         # it has all the individual characters in the page.
         (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar),
@@ -801,7 +926,7 @@ def analyze(self, laparams):
             for textbox in textboxes:
                 textbox.analyze(laparams)
 
-            def getkey(box):
+            def getkey(box: LTTextBox) -> Tuple[int, float, float]:
                 if isinstance(box, LTTextBoxVertical):
                     return (0, -box.x1, -box.y0)
                 else:
@@ -814,7 +939,8 @@ def getkey(box):
                 group.analyze(laparams)
                 assigner.run(group)
             textboxes.sort(key=lambda box: box.index)
-        self._objs = textboxes + otherobjs + empties
+        self._objs = (cast(List[LTComponent], textboxes) + otherobjs
+                      + cast(List[LTComponent], empties))
         return
 
 
@@ -826,7 +952,7 @@ class LTFigure(LTLayoutContainer):
     recursively.
     """
 
-    def __init__(self, name, bbox, matrix):
+    def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         self.name = name
         self.matrix = matrix
         (x, y, w, h) = bbox
@@ -835,12 +961,12 @@ def __init__(self, name, bbox, matrix):
         LTLayoutContainer.__init__(self, bbox)
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s(%s) %s matrix=%s>' %
                 (self.__class__.__name__, self.name,
                  bbox2str(self.bbox), matrix2str(self.matrix)))
 
-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
         if not laparams.all_texts:
             return
         LTLayoutContainer.analyze(self, laparams)
@@ -854,13 +980,13 @@ class LTPage(LTLayoutContainer):
     LTCurve and LTLine.
     """
 
-    def __init__(self, pageid, bbox, rotate=0):
+    def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None:
         LTLayoutContainer.__init__(self, bbox)
         self.pageid = pageid
         self.rotate = rotate
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<%s(%r) %s rotate=%r>' %
                 (self.__class__.__name__, self.pageid,
                  bbox2str(self.bbox), self.rotate))
diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py
index f0ed8a87..31c085ed 100644
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@@ -1,5 +1,6 @@
 from io import BytesIO
 import logging
+from typing import BinaryIO, Iterator, List, Optional, cast
 
 
 logger = logging.getLogger(__name__)
@@ -11,16 +12,17 @@ class CorruptDataError(Exception):
 
 class LZWDecoder:
 
-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
         self.fp = fp
         self.buff = 0
         self.bpos = 8
         self.nbits = 9
-        self.table = None
-        self.prevbuf = None
+        # NB: self.table stores None only in indices 256 and 257
+        self.table: Optional[List[Optional[bytes]]] = None
+        self.prevbuf: Optional[bytes] = None
         return
 
-    def readbits(self, bits):
+    def readbits(self, bits: int) -> int:
         v = 0
         while 1:
             # the number of remaining bits we can get from the current buffer.
@@ -45,7 +47,7 @@ def readbits(self, bits):
                 self.bpos = 0
         return v
 
-    def feed(self, code):
+    def feed(self, code: int) -> bytes:
         x = b''
         if code == 256:
             self.table = [bytes((c,)) for c in range(256)]  # 0-255
@@ -56,14 +58,16 @@ def feed(self, code):
         elif code == 257:
             pass
         elif not self.prevbuf:
-            x = self.prevbuf = self.table[code]
+            assert self.table is not None
+            x = self.prevbuf = cast(bytes, self.table[code])  # assume not None
         else:
+            assert self.table is not None
             if code < len(self.table):
-                x = self.table[code]
+                x = cast(bytes, self.table[code])  # assume not None
                 self.table.append(self.prevbuf+x[:1])
             elif code == len(self.table):
                 self.table.append(self.prevbuf+self.prevbuf[:1])
-                x = self.table[code]
+                x = cast(bytes, self.table[code])
             else:
                 raise CorruptDataError
             table_length = len(self.table)
@@ -76,7 +80,7 @@ def feed(self, code):
             self.prevbuf = x
         return x
 
-    def run(self):
+    def run(self) -> Iterator[bytes]:
         while 1:
             try:
                 code = self.readbits(self.nbits)
@@ -88,12 +92,13 @@ def run(self):
                 # just ignore corrupt data and stop yielding there
                 break
             yield x
+            assert self.table is not None
             logger.debug('nbits=%d, code=%d, output=%r, table=%r'
                          % (self.nbits, code, x, self.table[258:]))
         return
 
 
-def lzwdecode(data):
+def lzwdecode(data: bytes) -> bytes:
     fp = BytesIO(data)
     s = LZWDecoder(fp).run()
     return b''.join(s)
diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py
index ff28d54e..df685ed1 100644
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@@ -1,4 +1,5 @@
 import collections
+from typing import Dict
 from .psparser import LIT
 
 
@@ -9,17 +10,17 @@
 
 class PDFColorSpace:
 
-    def __init__(self, name, ncomponents):
+    def __init__(self, name: str, ncomponents: int) -> None:
         self.name = name
         self.ncomponents = ncomponents
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFColorSpace: %s, ncomponents=%d>' % \
                (self.name, self.ncomponents)
 
 
-PREDEFINED_COLORSPACE = collections.OrderedDict()
+PREDEFINED_COLORSPACE: Dict[str, PDFColorSpace] = collections.OrderedDict()
 
 for (name, n) in [
     ('DeviceGray', 1),  # default value first
diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index 82ede760..0a370633 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -1,66 +1,116 @@
+from pdfminer.psparser import PSLiteral
+from typing import (BinaryIO, Iterable, List, Optional, Sequence,
+                    TYPE_CHECKING, Union, cast)
 from . import utils
+from .utils import Matrix, Point, Rect, PathSegment
+from .pdfcolor import PDFColorSpace
+from .pdffont import PDFFont
 from .pdffont import PDFUnicodeNotDefined
+from .pdfpage import PDFPage
+from .pdftypes import PDFStream
+
+if TYPE_CHECKING:
+    from .pdfinterp import PDFGraphicState
+    from .pdfinterp import PDFResourceManager
+    from .pdfinterp import PDFTextState
+    from .pdfinterp import PDFStackT
+
+
+PDFTextSeq = Iterable[Union[int, float, bytes]]
 
 
 class PDFDevice:
     """Translate the output of PDFPageInterpreter to the output that is needed
     """
 
-    def __init__(self, rsrcmgr):
+    def __init__(self, rsrcmgr: "PDFResourceManager") -> None:
         self.rsrcmgr = rsrcmgr
-        self.ctm = None
+        self.ctm: Optional[Matrix] = None
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFDevice>'
 
-    def __enter__(self):
+    def __enter__(self) -> "PDFDevice":
         return self
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: object,
+        exc_val: object,
+        exc_tb: object
+    ) -> None:
         self.close()
 
-    def close(self):
+    def close(self) -> None:
         return
 
-    def set_ctm(self, ctm):
+    def set_ctm(self, ctm: Matrix) -> None:
         self.ctm = ctm
         return
 
-    def begin_tag(self, tag, props=None):
+    def begin_tag(
+        self,
+        tag: PSLiteral,
+        props: Optional["PDFStackT"] = None
+    ) -> None:
         return
 
-    def end_tag(self):
+    def end_tag(self) -> None:
         return
 
-    def do_tag(self, tag, props=None):
+    def do_tag(
+        self,
+        tag: PSLiteral,
+        props: Optional["PDFStackT"] = None
+    ) -> None:
         return
 
-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
         return
 
-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
         return
 
-    def begin_figure(self, name, bbox, matrix):
+    def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         return
 
-    def end_figure(self, name):
+    def end_figure(self, name: str) -> None:
         return
 
-    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        graphicstate: "PDFGraphicState",
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
         return
 
-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
         return
 
-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
         return
 
 
 class PDFTextDevice(PDFDevice):
 
-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
+        assert self.ctm is not None
         matrix = utils.mult_matrix(textstate.matrix, self.ctm)
         font = textstate.font
         fontsize = textstate.fontsize
@@ -68,6 +118,7 @@ def render_string(self, textstate, seq, ncs, graphicstate):
         charspace = textstate.charspace * scaling
         wordspace = textstate.wordspace * scaling
         rise = textstate.rise
+        assert font is not None
         if font.is_multibyte():
             wordspace = 0
         dxscale = .001 * fontsize * scaling
@@ -83,13 +134,25 @@ def render_string(self, textstate, seq, ncs, graphicstate):
                 graphicstate)
         return
 
-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
+    def render_string_horizontal(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> Point:
         (x, y) = pos
         needcharspace = False
         for obj in seq:
-            if utils.isnumber(obj):
+            if isinstance(obj, (int, float)):
                 x -= obj*dxscale
                 needcharspace = True
             else:
@@ -104,13 +167,25 @@ def render_string_horizontal(self, seq, matrix, pos,
                     needcharspace = True
         return (x, y)
 
-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
+    def render_string_vertical(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> Point:
         (x, y) = pos
         needcharspace = False
         for obj in seq:
-            if utils.isnumber(obj):
+            if isinstance(obj, (int, float)):
                 y -= obj*dxscale
                 needcharspace = True
             else:
@@ -125,23 +200,44 @@ def render_string_vertical(self, seq, matrix, pos,
                     needcharspace = True
         return (x, y)
 
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        cid: int,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> float:
         return 0
 
 
 class TagExtractor(PDFDevice):
 
-    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        outfp: BinaryIO,
+        codec: str = 'utf-8'
+    ) -> None:
         PDFDevice.__init__(self, rsrcmgr)
         self.outfp = outfp
         self.codec = codec
         self.pageno = 0
-        self._stack = []
+        self._stack: List[PSLiteral] = []
         return
 
-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
         font = textstate.font
+        assert font is not None
         text = ''
         for obj in seq:
             if isinstance(obj, str):
@@ -158,40 +254,42 @@ def render_string(self, textstate, seq, ncs, graphicstate):
         self._write(utils.enc(text))
         return
 
-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
         output = '<page id="%s" bbox="%s" rotate="%d">' %\
                  (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
         self._write(output)
         return
 
-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
         self._write('</page>\n')
         self.pageno += 1
         return
 
-    def begin_tag(self, tag, props=None):
+    def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+                  ) -> None:
         s = ''
         if isinstance(props, dict):
             s = ''.join([
                 ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
                 for (k, v) in sorted(props.items())
             ])
-        out_s = '<{}{}>'.format(utils.enc(tag.name), s)
+        out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s)
         self._write(out_s)
         self._stack.append(tag)
         return
 
-    def end_tag(self):
+    def end_tag(self) -> None:
         assert self._stack, str(self.pageno)
         tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(tag.name)
+        out_s = '</%s>' % utils.enc(cast(str, tag.name))
         self._write(out_s)
         return
 
-    def do_tag(self, tag, props=None):
+    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+               ) -> None:
         self.begin_tag(tag, props)
         self._stack.pop(-1)
         return
 
-    def _write(self, s: str):
+    def _write(self, s: str) -> None:
         self.outfp.write(s.encode(self.codec))
diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
index 6a576f57..88589706 100644
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@@ -2,16 +2,18 @@
 import re
 import struct
 from hashlib import sha256, md5, sha384, sha512
+from typing import (Any, Callable, Dict, Iterable, Iterator, KeysView, List,
+                    Optional, Sequence, Tuple, Type, Union, cast)
 
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
 
 from . import settings
 from .arcfour import Arcfour
-from .pdfparser import PDFSyntaxError, PDFStreamParser
-from .pdftypes import PDFException, uint_value, PDFTypeError, PDFStream, \
+from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser
+from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream,\
     PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \
-    dict_value, stream_value
+    uint_value, dict_value, stream_value
 from .psparser import PSEOF, literal_name, LIT, KWD
 from .utils import choplist, nunpack, decode_text
 
@@ -51,7 +53,7 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError):
 
 
 class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed):
-    def __init__(self, *args):
+    def __init__(self, *args: object) -> None:
         from warnings import warn
         warn('PDFTextExtractionNotAllowedError will be removed in the future. '
              'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning)
@@ -65,31 +67,33 @@ def __init__(self, *args):
 
 
 class PDFBaseXRef:
-
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
         raise NotImplementedError
 
-    def get_objids(self):
+    def get_objids(self) -> Iterable[int]:
         return []
 
     # Must return
     #     (strmid, index, genno)
     #  or (None, pos, genno)
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         raise KeyError(objid)
 
+    def load(self, parser: PDFParser) -> None:
+        raise NotImplementedError
+
 
 class PDFXRef(PDFBaseXRef):
 
-    def __init__(self):
-        self.offsets = {}
-        self.trailer = {}
+    def __init__(self) -> None:
+        self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
+        self.trailer: Dict[str, Any] = {}
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         while True:
             try:
                 (pos, line) = parser.nextline()
@@ -123,15 +127,15 @@ def load(self, parser):
                     error_msg = 'Invalid XRef format: {!r}, line={!r}'\
                         .format(parser, line)
                     raise PDFNoValidXRef(error_msg)
-                (pos, genno, use) = f
-                if use != b'n':
+                (pos_b, genno_b, use_b) = f
+                if use_b != b'n':
                     continue
-                self.offsets[objid] = (None, int(pos), int(genno))
+                self.offsets[objid] = (None, int(pos_b), int(genno_b))
         log.info('xref objects: %r', self.offsets)
         self.load_trailer(parser)
         return
 
-    def load_trailer(self, parser):
+    def load_trailer(self, parser: PDFParser) -> None:
         try:
             (_, kwd) = parser.nexttoken()
             assert kwd is KWD(b'trailer'), str(kwd)
@@ -145,13 +149,13 @@ def load_trailer(self, parser):
         log.debug('trailer=%r', self.trailer)
         return
 
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
         return self.trailer
 
-    def get_objids(self):
+    def get_objids(self) -> KeysView[int]:
         return self.offsets.keys()
 
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         try:
             return self.offsets[objid]
         except KeyError:
@@ -160,30 +164,30 @@ def get_pos(self, objid):
 
 class PDFXRefFallback(PDFXRef):
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFXRefFallback: offsets=%r>' % (self.offsets.keys())
 
     PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         parser.seek(0)
         while 1:
             try:
-                (pos, line) = parser.nextline()
+                (pos, line_bytes) = parser.nextline()
             except PSEOF:
                 break
-            if line.startswith(b'trailer'):
+            if line_bytes.startswith(b'trailer'):
                 parser.seek(pos)
                 self.load_trailer(parser)
                 log.info('trailer: %r', self.trailer)
                 break
-            line = line.decode('latin-1')  # default pdf encoding
+            line = line_bytes.decode('latin-1')  # default pdf encoding
             m = self.PDFOBJ_CUE.match(line)
             if not m:
                 continue
-            (objid, genno) = m.groups()
-            objid = int(objid)
-            genno = int(genno)
+            (objid_s, genno_s) = m.groups()
+            objid = int(objid_s)
+            genno = int(genno_s)
             self.offsets[objid] = (None, pos, genno)
             # expand ObjStm.
             parser.seek(pos)
@@ -198,11 +202,11 @@ def load(self, parser):
                         raise PDFSyntaxError('N is not defined: %r' % stream)
                     n = 0
                 parser1 = PDFStreamParser(stream.get_data())
-                objs = []
+                objs: List[int] = []
                 try:
                     while 1:
                         (_, obj) = parser1.nextobject()
-                        objs.append(obj)
+                        objs.append(cast(int, obj))
                 except PSEOF:
                     pass
                 n = min(n, len(objs)//2)
@@ -214,17 +218,19 @@ def load(self, parser):
 
 class PDFXRefStream(PDFBaseXRef):
 
-    def __init__(self):
-        self.data = None
-        self.entlen = None
-        self.fl1 = self.fl2 = self.fl3 = None
-        self.ranges = []
+    def __init__(self) -> None:
+        self.data: Optional[bytes] = None
+        self.entlen: Optional[int] = None
+        self.fl1: Optional[int] = None
+        self.fl2: Optional[int] = None
+        self.fl3: Optional[int] = None
+        self.ranges: List[Tuple[int, int]] = []
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFXRefStream: ranges=%r>' % (self.ranges)
 
-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
         (_, objid) = parser.nexttoken()  # ignored
         (_, genno) = parser.nexttoken()  # ignored
         (_, kwd) = parser.nexttoken()
@@ -236,8 +242,11 @@ def load(self, parser):
         index_array = stream.get('Index', (0, size))
         if len(index_array) % 2 != 0:
             raise PDFSyntaxError('Invalid index number')
-        self.ranges.extend(choplist(2, index_array))
+        self.ranges.extend(cast(Iterator[Tuple[int, int]],
+                                choplist(2, index_array)))
         (self.fl1, self.fl2, self.fl3) = stream['W']
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
         self.data = stream.get_data()
         self.entlen = self.fl1+self.fl2+self.fl3
         self.trailer = stream.attrs
@@ -246,12 +255,14 @@ def load(self, parser):
                  self.fl1, self.fl2, self.fl3)
         return
 
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
         return self.trailer
 
-    def get_objids(self):
+    def get_objids(self) -> Iterator[int]:
         for (start, nobjs) in self.ranges:
             for i in range(nobjs):
+                assert self.entlen is not None
+                assert self.data is not None
                 offset = self.entlen * i
                 ent = self.data[offset:offset+self.entlen]
                 f1 = nunpack(ent[:self.fl1], 1)
@@ -259,7 +270,7 @@ def get_objids(self):
                     yield start+i
         return
 
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
         index = 0
         for (start, nobjs) in self.ranges:
             if start <= objid and objid < start+nobjs:
@@ -269,6 +280,10 @@ def get_pos(self, objid):
                 index += nobjs
         else:
             raise KeyError(objid)
+        assert self.entlen is not None
+        assert self.data is not None
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
         offset = self.entlen * index
         ent = self.data[offset:offset+self.entlen]
         f1 = nunpack(ent[:self.fl1], 1)
@@ -287,16 +302,21 @@ class PDFStandardSecurityHandler:
 
     PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08'
                         b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz')
-    supported_revisions = (2, 3)
-
-    def __init__(self, docid, param, password=''):
+    supported_revisions: Tuple[int, ...] = (2, 3)
+
+    def __init__(
+        self,
+        docid: Sequence[bytes],
+        param: Dict[str, Any],
+        password: str = ''
+    ) -> None:
         self.docid = docid
         self.param = param
         self.password = password
         self.init()
         return
 
-    def init(self):
+    def init(self) -> None:
         self.init_params()
         if self.r not in self.supported_revisions:
             error_msg = 'Unsupported revision: param=%r' % self.param
@@ -304,7 +324,7 @@ def init(self):
         self.init_key()
         return
 
-    def init_params(self):
+    def init_params(self) -> None:
         self.v = int_value(self.param.get('V', 0))
         self.r = int_value(self.param['R'])
         self.p = uint_value(self.param['P'], 32)
@@ -313,22 +333,22 @@ def init_params(self):
         self.length = int_value(self.param.get('Length', 40))
         return
 
-    def init_key(self):
+    def init_key(self) -> None:
         self.key = self.authenticate(self.password)
         if self.key is None:
             raise PDFPasswordIncorrect
         return
 
-    def is_printable(self):
+    def is_printable(self) -> bool:
         return bool(self.p & 4)
 
-    def is_modifiable(self):
+    def is_modifiable(self) -> bool:
         return bool(self.p & 8)
 
-    def is_extractable(self):
+    def is_extractable(self) -> bool:
         return bool(self.p & 16)
 
-    def compute_u(self, key):
+    def compute_u(self, key: bytes) -> bytes:
         if self.r == 2:
             # Algorithm 3.4
             return Arcfour(key).encrypt(self.PASSWORD_PADDING)  # 2
@@ -343,7 +363,7 @@ def compute_u(self, key):
             result += result  # 6
             return result
 
-    def compute_encryption_key(self, password):
+    def compute_encryption_key(self, password: bytes) -> bytes:
         # Algorithm 3.2
         password = (password + self.PASSWORD_PADDING)[:32]  # 1
         hash = md5(password)  # 2
@@ -352,7 +372,7 @@ def compute_encryption_key(self, password):
         hash.update(struct.pack('<L', self.p))  # 4
         hash.update(self.docid[0])  # 5
         if self.r >= 4:
-            if not self.encrypt_metadata:
+            if not cast(PDFStandardSecurityHandlerV4, self).encrypt_metadata:
                 hash.update(b'\xff\xff\xff\xff')
         result = hash.digest()
         n = 5
@@ -362,28 +382,28 @@ def compute_encryption_key(self, password):
                 result = md5(result[:n]).digest()
         return result[:n]
 
-    def authenticate(self, password):
-        password = password.encode("latin1")
-        key = self.authenticate_user_password(password)
+    def authenticate(self, password: str) -> Optional[bytes]:
+        password_bytes = password.encode("latin1")
+        key = self.authenticate_user_password(password_bytes)
         if key is None:
-            key = self.authenticate_owner_password(password)
+            key = self.authenticate_owner_password(password_bytes)
         return key
 
-    def authenticate_user_password(self, password):
+    def authenticate_user_password(self, password: bytes) -> Optional[bytes]:
         key = self.compute_encryption_key(password)
         if self.verify_encryption_key(key):
             return key
         else:
             return None
 
-    def verify_encryption_key(self, key):
+    def verify_encryption_key(self, key: bytes) -> bool:
         # Algorithm 3.6
         u = self.compute_u(key)
         if self.r == 2:
             return u == self.u
         return u[:16] == self.u[:16]
 
-    def authenticate_owner_password(self, password):
+    def authenticate_owner_password(self, password: bytes) -> Optional[bytes]:
         # Algorithm 3.7
         password = (password + self.PASSWORD_PADDING)[:32]
         hash = md5(password)
@@ -403,12 +423,19 @@ def authenticate_owner_password(self, password):
                 user_password = Arcfour(k).decrypt(user_password)
         return self.authenticate_user_password(user_password)
 
-    def decrypt(self, objid, genno, data, attrs=None):
+    def decrypt(
+        self,
+        objid: int,
+        genno: int,
+        data: bytes,
+        attrs: Optional[Dict[str, Any]] = None
+    ) -> bytes:
         return self.decrypt_rc4(objid, genno, data)
 
-    def decrypt_rc4(self, objid, genno, data):
+    def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes:
+        assert self.key is not None
         key = self.key + struct.pack('<L', objid)[:3] \
-              + struct.pack('<L', genno)[:2]
+            + struct.pack('<L', genno)[:2]
         hash = md5(key)
         key = hash.digest()[:min(len(key), 16)]
         return Arcfour(key).decrypt(data)
@@ -416,9 +443,9 @@ def decrypt_rc4(self, objid, genno, data):
 
 class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
 
-    supported_revisions = (4,)
+    supported_revisions: Tuple[int, ...] = (4,)
 
-    def init_params(self):
+    def init_params(self) -> None:
         super().init_params()
         self.length = 128
         self.cf = dict_value(self.param.get('CF'))
@@ -442,7 +469,10 @@ def init_params(self):
             raise PDFEncryptionError(error_msg)
         return
 
-    def get_cfm(self, name):
+    def get_cfm(
+        self,
+        name: str
+    ) -> Optional[Callable[[int, int, bytes], bytes]]:
         if name == 'V2':
             return self.decrypt_rc4
         elif name == 'AESV2':
@@ -450,7 +480,14 @@ def get_cfm(self, name):
         else:
             return None
 
-    def decrypt(self, objid, genno, data, attrs=None, name=None):
+    def decrypt(
+        self,
+        objid: int,
+        genno: int,
+        data: bytes,
+        attrs: Optional[Dict[str, Any]] = None,
+        name: Optional[str] = None
+    ) -> bytes:
         if not self.encrypt_metadata and attrs is not None:
             t = attrs.get('Type')
             if t is not None and literal_name(t) == 'Metadata':
@@ -459,27 +496,28 @@ def decrypt(self, objid, genno, data, attrs=None, name=None):
             name = self.strf
         return self.cfm[name](objid, genno, data)
 
-    def decrypt_identity(self, objid, genno, data):
+    def decrypt_identity(self, objid: int, genno: int, data: bytes) -> bytes:
         return data
 
-    def decrypt_aes128(self, objid, genno, data):
+    def decrypt_aes128(self, objid: int, genno: int, data: bytes) -> bytes:
+        assert self.key is not None
         key = self.key + struct.pack('<L', objid)[:3] \
-              + struct.pack('<L', genno)[:2] + b'sAlT'
+            + struct.pack('<L', genno)[:2] + b'sAlT'
         hash = md5(key)
         key = hash.digest()[:min(len(key), 16)]
         initialization_vector = data[:16]
         ciphertext = data[16:]
         cipher = Cipher(algorithms.AES(key),
                         modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
+                        backend=default_backend())  # type: ignore
+        return cipher.decryptor().update(ciphertext)  # type: ignore
 
 
 class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
 
     supported_revisions = (5, 6)
 
-    def init_params(self):
+    def init_params(self) -> None:
         super().init_params()
         self.length = 256
         self.oe = str_value(self.param['OE'])
@@ -492,31 +530,34 @@ def init_params(self):
         self.u_key_salt = self.u[40:]
         return
 
-    def get_cfm(self, name):
+    def get_cfm(
+        self,
+        name: str
+    ) -> Optional[Callable[[int, int, bytes], bytes]]:
         if name == 'AESV3':
             return self.decrypt_aes256
         else:
             return None
 
-    def authenticate(self, password):
-        password = self._normalize_password(password)
-        hash = self._password_hash(password, self.o_validation_salt, self.u)
+    def authenticate(self, password: str) -> Optional[bytes]:
+        password_b = self._normalize_password(password)
+        hash = self._password_hash(password_b, self.o_validation_salt, self.u)
         if hash == self.o_hash:
-            hash = self._password_hash(password, self.o_key_salt, self.u)
+            hash = self._password_hash(password_b, self.o_key_salt, self.u)
             cipher = Cipher(algorithms.AES(hash),
                             modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.oe)
-        hash = self._password_hash(password, self.u_validation_salt)
+                            backend=default_backend())  # type: ignore
+            return cipher.decryptor().update(self.oe)  # type: ignore
+        hash = self._password_hash(password_b, self.u_validation_salt)
         if hash == self.u_hash:
-            hash = self._password_hash(password, self.u_key_salt)
+            hash = self._password_hash(password_b, self.u_key_salt)
             cipher = Cipher(algorithms.AES(hash),
                             modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.ue)
+                            backend=default_backend())  # type: ignore
+            return cipher.decryptor().update(self.ue)  # type: ignore
         return None
 
-    def _normalize_password(self, password):
+    def _normalize_password(self, password: str) -> bytes:
         if self.r == 6:
             # saslprep expects non-empty strings, apparently
             if not password:
@@ -525,7 +566,12 @@ def _normalize_password(self, password):
             password = saslprep(password)
         return password.encode('utf-8')[:127]
 
-    def _password_hash(self, password, salt, vector=None):
+    def _password_hash(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
         """
         Compute password hash depending on revision number
         """
@@ -533,7 +579,12 @@ def _password_hash(self, password, salt, vector=None):
             return self._r5_password(password, salt, vector)
         return self._r6_password(password, salt[0:8], vector)
 
-    def _r5_password(self, password, salt, vector):
+    def _r5_password(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
         """
         Compute the password for revision 5
         """
@@ -543,7 +594,12 @@ def _r5_password(self, password, salt, vector):
             hash.update(vector)
         return hash.digest()
 
-    def _r6_password(self, password, salt, vector):
+    def _r6_password(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
         """
         Compute the password for revision 6
         """
@@ -568,22 +624,28 @@ def _r6_password(self, password, salt, vector):
         return k[:32]
 
     @staticmethod
-    def _bytes_mod_3(input_bytes):
+    def _bytes_mod_3(input_bytes: bytes) -> int:
         # 256 is 1 mod 3, so we can just sum 'em
         return sum(b % 3 for b in input_bytes) % 3
 
-    def _aes_cbc_encrypt(self, key, iv, data):
+    def _aes_cbc_encrypt(
+        self,
+        key: bytes,
+        iv: bytes,
+        data: bytes
+    ) -> bytes:
         cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
-        encryptor = cipher.encryptor()
-        return encryptor.update(data) + encryptor.finalize()
+        encryptor = cipher.encryptor()  # type: ignore
+        return encryptor.update(data) + encryptor.finalize()  # type: ignore
 
-    def decrypt_aes256(self, objid, genno, data):
+    def decrypt_aes256(self, objid: int, genno: int, data: bytes) -> bytes:
         initialization_vector = data[:16]
         ciphertext = data[16:]
+        assert self.key is not None
         cipher = Cipher(algorithms.AES(self.key),
                         modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
+                        backend=default_backend())  # type: ignore
+        return cipher.decryptor().update(ciphertext)  # type: ignore
 
 
 class PDFDocument:
@@ -599,24 +661,30 @@ class PDFDocument:
 
     """
 
-    security_handler_registry = {
+    security_handler_registry: Dict[int, Type[PDFStandardSecurityHandler]] = {
         1: PDFStandardSecurityHandler,
         2: PDFStandardSecurityHandler,
         4: PDFStandardSecurityHandlerV4,
         5: PDFStandardSecurityHandlerV5,
     }
 
-    def __init__(self, parser, password='', caching=True, fallback=True):
+    def __init__(
+        self,
+        parser: PDFParser,
+        password: str = '',
+        caching: bool = True,
+        fallback: bool = True
+    ) -> None:
         "Set the document to use a given PDFParser object."
         self.caching = caching
-        self.xrefs = []
+        self.xrefs: List[PDFBaseXRef] = []
         self.info = []
-        self.catalog = None
-        self.encryption = None
-        self.decipher = None
+        self.catalog: Dict[str, Any] = {}
+        self.encryption: Optional[Tuple[Any, Any]] = None
+        self.decipher: Optional[DecipherCallable] = None
         self._parser = None
-        self._cached_objs = {}
-        self._parsed_objs = {}
+        self._cached_objs: Dict[int, Tuple[object, int]] = {}
+        self._parsed_objs: Dict[int, Tuple[List[object], int]] = {}
         self._parser = parser
         self._parser.set_document(self)
         self.is_printable = self.is_modifiable = self.is_extractable = True
@@ -629,9 +697,9 @@ def __init__(self, parser, password='', caching=True, fallback=True):
             pass  # fallback = True
         if fallback:
             parser.fallback = True
-            xref = PDFXRefFallback()
-            xref.load(parser)
-            self.xrefs.append(xref)
+            newxref = PDFXRefFallback()
+            newxref.load(parser)
+            self.xrefs.append(newxref)
         for xref in self.xrefs:
             trailer = xref.get_trailer()
             if not trailer:
@@ -665,7 +733,8 @@ def __init__(self, parser, password='', caching=True, fallback=True):
 
     # _initialize_password(password=b'')
     #   Perform the initialization with a given password.
-    def _initialize_password(self, password=''):
+    def _initialize_password(self, password: str = '') -> None:
+        assert self.encryption is not None
         (docid, param) = self.encryption
         if literal_name(param.get('Filter')) != 'Standard':
             raise PDFEncryptionError('Unknown filter: param=%r' % param)
@@ -678,15 +747,22 @@ def _initialize_password(self, password=''):
         self.is_printable = handler.is_printable()
         self.is_modifiable = handler.is_modifiable()
         self.is_extractable = handler.is_extractable()
+        assert self._parser is not None
         self._parser.fallback = False  # need to read streams with exact length
         return
 
-    def _getobj_objstm(self, stream, index, objid):
+    def _getobj_objstm(
+        self,
+        stream: PDFStream,
+        index: int,
+        objid: int
+    ) -> object:
         if stream.objid in self._parsed_objs:
             (objs, n) = self._parsed_objs[stream.objid]
         else:
             (objs, n) = self._get_objects(stream)
             if self.caching:
+                assert stream.objid is not None
                 self._parsed_objs[stream.objid] = (objs, n)
         i = n*2+index
         try:
@@ -695,19 +771,19 @@ def _getobj_objstm(self, stream, index, objid):
             raise PDFSyntaxError('index too big: %r' % index)
         return obj
 
-    def _get_objects(self, stream):
+    def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]:
         if stream.get('Type') is not LITERAL_OBJSTM:
             if settings.STRICT:
                 raise PDFSyntaxError('Not a stream object: %r' % stream)
         try:
-            n = stream['N']
+            n = cast(int, stream['N'])
         except KeyError:
             if settings.STRICT:
                 raise PDFSyntaxError('N is not defined: %r' % stream)
             n = 0
         parser = PDFStreamParser(stream.get_data())
         parser.set_document(self)
-        objs = []
+        objs: List[object] = []
         try:
             while 1:
                 (_, obj) = parser.nextobject()
@@ -716,7 +792,8 @@ def _get_objects(self, stream):
             pass
         return (objs, n)
 
-    def _getobj_parse(self, pos, objid):
+    def _getobj_parse(self, pos: int, objid: int) -> object:
+        assert self._parser is not None
         self._parser.seek(pos)
         (_, objid1) = self._parser.nexttoken()  # objid
         (_, genno) = self._parser.nexttoken()  # genno
@@ -744,7 +821,7 @@ def _getobj_parse(self, pos, objid):
         return obj
 
     # can raise PDFObjectNotFound
-    def getobj(self, objid):
+    def getobj(self, objid: int) -> object:
         """Get object from PDF
 
         :raises PDFException if PDFDocument is not initialized
@@ -783,11 +860,14 @@ def getobj(self, objid):
                 self._cached_objs[objid] = (obj, genno)
         return obj
 
-    def get_outlines(self):
+    OutlineType = Tuple[Any, Any, Any, Any, Any]
+
+    def get_outlines(self) -> Iterator[OutlineType]:
         if 'Outlines' not in self.catalog:
             raise PDFNoOutlines
 
-        def search(entry, level):
+        def search(entry: object, level: int
+                   ) -> Iterator[PDFDocument.OutlineType]:
             entry = dict_value(entry)
             if 'Title' in entry:
                 if 'A' in entry or 'Dest' in entry:
@@ -803,7 +883,11 @@ def search(entry, level):
             return
         return search(self.catalog['Outlines'], 0)
 
-    def lookup_name(self, cat, key):
+    def lookup_name(
+        self,
+        cat: str,
+        key: Union[str, bytes]
+    ) -> Any:
         try:
             names = dict_value(self.catalog['Names'])
         except (PDFTypeError, KeyError):
@@ -811,14 +895,15 @@ def lookup_name(self, cat, key):
         # may raise KeyError
         d0 = dict_value(names[cat])
 
-        def lookup(d):
+        def lookup(d: Dict[str, Any]) -> Any:
             if 'Limits' in d:
                 (k1, k2) = list_value(d['Limits'])
                 if key < k1 or k2 < key:
                     return None
             if 'Names' in d:
                 objs = list_value(d['Names'])
-                names = dict(choplist(2, objs))
+                names = dict(cast(Iterator[Tuple[Union[str, bytes], Any]],
+                                  choplist(2, objs)))
                 return names[key]
             if 'Kids' in d:
                 for c in list_value(d['Kids']):
@@ -828,7 +913,7 @@ def lookup(d):
             raise KeyError((cat, key))
         return lookup(d0)
 
-    def get_dest(self, name):
+    def get_dest(self, name: Union[str, bytes]) -> Any:
         try:
             # PDF-1.2 or later
             obj = self.lookup_name('Dests', name)
@@ -843,7 +928,7 @@ def get_dest(self, name):
         return obj
 
     # find_xref
-    def find_xref(self, parser):
+    def find_xref(self, parser: PDFParser) -> int:
         """Internal function used to locate the first XRef."""
         # search the last xref table by scanning the file backwards.
         prev = None
@@ -857,10 +942,16 @@ def find_xref(self, parser):
         else:
             raise PDFNoValidXRef('Unexpected EOF')
         log.info('xref found: pos=%r', prev)
+        assert prev is not None
         return int(prev)
 
     # read xref table
-    def read_xref_from(self, parser, start, xrefs):
+    def read_xref_from(
+        self,
+        parser: PDFParser,
+        start: int,
+        xrefs: List[PDFBaseXRef]
+    ) -> None:
         """Reads XRefs from the given location."""
         parser.seek(start)
         parser.reset()
@@ -873,7 +964,7 @@ def read_xref_from(self, parser, start, xrefs):
             # XRefStream: PDF-1.5
             parser.seek(pos)
             parser.reset()
-            xref = PDFXRefStream()
+            xref: PDFBaseXRef = PDFXRefStream()
             xref.load(parser)
         else:
             if token is parser.KEYWORD_XREF:
diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
index 74ad6a61..df0813d5 100644
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@@ -2,11 +2,15 @@
 import struct
 import sys
 from io import BytesIO
+from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Mapping,
+                    Optional, Tuple, Union, cast, TYPE_CHECKING)
 
 from . import settings
 from .cmapdb import CMap
+from .cmapdb import CMapBase
 from .cmapdb import CMapDB
 from .cmapdb import CMapParser
+from .cmapdb import UnicodeMap
 from .cmapdb import FileUnicodeMap
 from .encodingdb import EncodingDB
 from .encodingdb import name2unicode
@@ -22,52 +26,59 @@
 from .psparser import KWD
 from .psparser import LIT
 from .psparser import PSEOF
+from .psparser import PSKeyword
 from .psparser import PSLiteral
 from .psparser import PSStackParser
 from .psparser import literal_name
+from .utils import Matrix, Point
+from .utils import Rect
 from .utils import apply_matrix_norm
 from .utils import choplist
-from .utils import isnumber
 from .utils import nunpack
 
+if TYPE_CHECKING:
+    from .pdfinterp import PDFResourceManager
+
 log = logging.getLogger(__name__)
 
 
-def get_widths(seq):
-    widths = {}
-    r = []
+def get_widths(seq: Iterable[object]) -> Dict[int, float]:
+    """Build a mapping of character widths for horizontal writing."""
+    widths: Dict[int, float] = {}
+    r: List[float] = []
     for v in seq:
         if isinstance(v, list):
             if r:
                 char1 = r[-1]
                 for (i, w) in enumerate(v):
-                    widths[char1+i] = w
+                    widths[cast(int, char1) + i] = w
                 r = []
-        elif isnumber(v):
+        elif isinstance(v, (int, float)):  # == utils.isnumber(v)
             r.append(v)
             if len(r) == 3:
                 (char1, char2, w) = r
-                for i in range(char1, char2+1):
+                for i in range(cast(int, char1), cast(int, char2) + 1):
                     widths[i] = w
                 r = []
     return widths
 
 
-def get_widths2(seq):
-    widths = {}
-    r = []
+def get_widths2(seq: Iterable[object]) -> Dict[int, Tuple[float, Point]]:
+    """Build a mapping of character widths for vertical writing."""
+    widths: Dict[int, Tuple[float, Point]] = {}
+    r: List[float] = []
     for v in seq:
         if isinstance(v, list):
             if r:
                 char1 = r[-1]
                 for (i, (w, vx, vy)) in enumerate(choplist(3, v)):
-                    widths[char1+i] = (w, (vx, vy))
+                    widths[cast(int, char1) + i] = (w, (vx, vy))
                 r = []
-        elif isnumber(v):
+        elif isinstance(v, (int, float)):  # == utils.isnumber(v)
             r.append(v)
             if len(r) == 5:
                 (char1, char2, w, vx, vy) = r
-                for i in range(char1, char2+1):
+                for i in range(cast(int, char1), cast(int, char2) + 1):
                     widths[i] = (w, (vx, vy))
                 r = []
     return widths
@@ -76,11 +87,13 @@ def get_widths2(seq):
 class FontMetricsDB:
 
     @classmethod
-    def get_metrics(cls, fontname):
+    def get_metrics(cls, fontname: str
+                    ) -> Tuple[Dict[str, object], Dict[str, int]]:
         return FONT_METRICS[fontname]
 
 
-class Type1FontHeaderParser(PSStackParser):
+# int here means that we're not extending PSStackParser with additional types.
+class Type1FontHeaderParser(PSStackParser[int]):
 
     KEYWORD_BEGIN = KWD(b'begin')
     KEYWORD_END = KWD(b'end')
@@ -91,12 +104,12 @@ class Type1FontHeaderParser(PSStackParser):
     KEYWORD_READONLY = KWD(b'readonly')
     KEYWORD_FOR = KWD(b'for')
 
-    def __init__(self, data):
+    def __init__(self, data: BinaryIO) -> None:
         PSStackParser.__init__(self, data)
-        self._cid2unicode = {}
+        self._cid2unicode: Dict[int, str] = {}
         return
 
-    def get_encoding(self):
+    def get_encoding(self) -> Dict[int, str]:
         """Parse the font encoding.
 
         The Type1 font encoding maps character codes to character names. These
@@ -116,12 +129,12 @@ def get_encoding(self):
             except PSEOF:
                 break
             try:
-                self._cid2unicode[cid] = name2unicode(name)
+                self._cid2unicode[cid] = name2unicode(cast(str, name))
             except KeyError as e:
                 log.debug(str(e))
         return self._cid2unicode
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         if token is self.KEYWORD_PUT:
             ((_, key), (_, value)) = self.pop(2)
             if (isinstance(key, int) and isinstance(value, PSLiteral)):
@@ -140,10 +153,10 @@ def do_keyword(self, pos, token):
 }
 
 
-def getdict(data):
-    d = {}
+def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]:
+    d: Dict[int, List[Union[float, int]]] = {}
     fp = BytesIO(data)
-    stack = []
+    stack: List[Union[float, int]] = []
     while 1:
         c = fp.read(1)
         if not c:
@@ -162,7 +175,9 @@ def getdict(data):
                     if n == 15:
                         loop = False
                     else:
-                        s += NIBBLES[n]
+                        nibble = NIBBLES[n]
+                        assert nibble is not None
+                        s += nibble
             value = float(s)
         elif 32 <= b0 and b0 <= 246:
             value = b0-139
@@ -270,9 +285,9 @@ class CFFFont:
 
     class INDEX:
 
-        def __init__(self, fp):
+        def __init__(self, fp: BinaryIO) -> None:
             self.fp = fp
-            self.offsets = []
+            self.offsets: List[int] = []
             (count, offsize) = struct.unpack('>HB', self.fp.read(3))
             for i in range(count+1):
                 self.offsets.append(nunpack(self.fp.read(offsize)))
@@ -280,20 +295,20 @@ def __init__(self, fp):
             self.fp.seek(self.base+self.offsets[-1])
             return
 
-        def __repr__(self):
+        def __repr__(self) -> str:
             return '<INDEX: size=%d>' % len(self)
 
-        def __len__(self):
+        def __len__(self) -> int:
             return len(self.offsets)-1
 
-        def __getitem__(self, i):
+        def __getitem__(self, i: int) -> bytes:
             self.fp.seek(self.base+self.offsets[i])
             return self.fp.read(self.offsets[i+1]-self.offsets[i])
 
-        def __iter__(self):
+        def __iter__(self) -> Iterator[bytes]:
             return iter(self[i] for i in range(len(self)))
 
-    def __init__(self, name, fp):
+    def __init__(self, name: str, fp: BinaryIO) -> None:
         self.name = name
         self.fp = fp
         # Header
@@ -314,13 +329,13 @@ def __init__(self, name, fp):
         (encoding_pos,) = self.top_dict.get(16, [0])
         (charstring_pos,) = self.top_dict.get(17, [0])
         # CharStrings
-        self.fp.seek(charstring_pos)
+        self.fp.seek(cast(int, charstring_pos))
         self.charstring = self.INDEX(self.fp)
         self.nglyphs = len(self.charstring)
         # Encodings
         self.code2gid = {}
         self.gid2code = {}
-        self.fp.seek(encoding_pos)
+        self.fp.seek(cast(int, encoding_pos))
         format = self.fp.read(1)
         if format == b'\x00':
             # Format 0
@@ -344,17 +359,18 @@ def __init__(self, name, fp):
         # Charsets
         self.name2gid = {}
         self.gid2name = {}
-        self.fp.seek(charset_pos)
+        self.fp.seek(cast(int, charset_pos))
         format = self.fp.read(1)
         if format == b'\x00':
             # Format 0
             n = self.nglyphs-1
-            for (gid, sid) in enumerate(struct.unpack('>'+'H'*n,
-                                                      self.fp.read(2*n))):
+            for (gid, sid) in enumerate(
+                    cast(Tuple[int, ...],
+                         struct.unpack('>' + 'H' * n, self.fp.read(2 * n)))):
                 gid += 1
-                name = self.getstr(sid)
-                self.name2gid[name] = gid
-                self.gid2name[gid] = name
+                sidname = self.getstr(sid)
+                self.name2gid[sidname] = gid
+                self.gid2name[gid] = sidname
         elif format == b'\x01':
             # Format 1
             (n,) = struct.unpack('B', self.fp.read(1))
@@ -362,9 +378,9 @@ def __init__(self, name, fp):
             for i in range(n):
                 (first, nleft) = struct.unpack('BB', self.fp.read(2))
                 for gid in range(first, first+nleft+1):
-                    name = self.getstr(sid)
-                    self.name2gid[name] = gid
-                    self.gid2name[gid] = name
+                    sidname = self.getstr(sid)
+                    self.name2gid[sidname] = gid
+                    self.gid2name[gid] = sidname
                     sid += 1
         elif format == b'\x02':
             # Format 2
@@ -373,7 +389,9 @@ def __init__(self, name, fp):
             raise ValueError('unsupported charset format: %r' % format)
         return
 
-    def getstr(self, sid):
+    def getstr(self, sid: int) -> Union[str, bytes]:
+        # This returns str for one of the STANDARD_STRINGS but bytes otherwise,
+        # and appears to be a needless source of type complexity.
         if sid < len(self.STANDARD_STRINGS):
             return self.STANDARD_STRINGS[sid]
         return self.string_index[sid-len(self.STANDARD_STRINGS)]
@@ -384,17 +402,19 @@ class TrueTypeFont:
     class CMapNotFound(Exception):
         pass
 
-    def __init__(self, name, fp):
+    def __init__(self, name: str, fp: BinaryIO) -> None:
         self.name = name
         self.fp = fp
-        self.tables = {}
+        self.tables: Dict[bytes, Tuple[int, int]] = {}
         self.fonttype = fp.read(4)
         try:
-            (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
+            (ntables, _1, _2, _3) = cast(Tuple[int, int, int, int],
+                                         struct.unpack('>HHHH', fp.read(8)))
             for _ in range(ntables):
-                (name, tsum, offset, length) = struct.unpack('>4sLLL',
-                                                             fp.read(16))
-                self.tables[name] = (offset, length)
+                (name_bytes, tsum, offset, length) = \
+                    cast(Tuple[bytes, int, int, int],
+                         struct.unpack('>4sLLL', fp.read(16)))
+                self.tables[name_bytes] = (offset, length)
         except struct.error:
             # Do not fail if there are not enough bytes to read. Even for
             # corrupted PDFs we would like to get as much information as
@@ -402,34 +422,40 @@ def __init__(self, name, fp):
             pass
         return
 
-    def create_unicode_map(self):
+    def create_unicode_map(self) -> FileUnicodeMap:
         if b'cmap' not in self.tables:
             raise TrueTypeFont.CMapNotFound
         (base_offset, length) = self.tables[b'cmap']
         fp = self.fp
         fp.seek(base_offset)
-        (version, nsubtables) = struct.unpack('>HH', fp.read(4))
-        subtables = []
+        (version, nsubtables) = \
+            cast(Tuple[int, int], struct.unpack('>HH', fp.read(4)))
+        subtables: List[Tuple[int, int, int]] = []
         for i in range(nsubtables):
-            subtables.append(struct.unpack('>HHL', fp.read(8)))
-        char2gid = {}
+            subtables.append(
+                cast(Tuple[int, int, int], struct.unpack('>HHL', fp.read(8))))
+        char2gid: Dict[int, int] = {}
         # Only supports subtable type 0, 2 and 4.
         for (_1, _2, st_offset) in subtables:
             fp.seek(base_offset+st_offset)
-            (fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6))
+            (fmttype, fmtlen, fmtlang) = \
+                cast(Tuple[int, int, int], struct.unpack('>HHH', fp.read(6)))
             if fmttype == 0:
-                char2gid.update(enumerate(struct.unpack('>256B',
-                                                        fp.read(256))))
+                char2gid.update(enumerate(
+                    cast(Tuple[int, ...],
+                         struct.unpack('>256B', fp.read(256)))))
             elif fmttype == 2:
-                subheaderkeys = struct.unpack('>256H', fp.read(512))
+                subheaderkeys = cast(Tuple[int, ...],
+                                     struct.unpack('>256H', fp.read(512)))
                 firstbytes = [0]*8192
                 for (i, k) in enumerate(subheaderkeys):
                     firstbytes[k//8] = i
                 nhdrs = max(subheaderkeys)//8 + 1
-                hdrs = []
+                hdrs: List[Tuple[int, int, int, int, int]] = []
                 for i in range(nhdrs):
                     (firstcode, entcount, delta, offset) = \
-                        struct.unpack('>HHhH', fp.read(8))
+                        cast(Tuple[int, int, int, int],
+                             struct.unpack('>HHhH', fp.read(8)))
                     hdrs.append((i, firstcode, entcount, delta,
                                  fp.tell()-2+offset))
                 for (i, firstcode, entcount, delta, pos) in hdrs:
@@ -438,24 +464,36 @@ def create_unicode_map(self):
                     first = firstcode + (firstbytes[i] << 8)
                     fp.seek(pos)
                     for c in range(entcount):
-                        gid = struct.unpack('>H', fp.read(2))
+                        gid = cast(Tuple[int],
+                                   struct.unpack('>H', fp.read(2)))[0]
                         if gid:
                             gid += delta
                         char2gid[first+c] = gid
             elif fmttype == 4:
-                (segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
+                (segcount, _1, _2, _3) = \
+                    cast(Tuple[int, int, int, int],
+                         struct.unpack('>HHHH', fp.read(8)))
                 segcount //= 2
-                ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
+                ecs = cast(Tuple[int, ...],
+                           struct.unpack('>%dH' % segcount,
+                                         fp.read(2*segcount)))
                 fp.read(2)
-                scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
+                scs = cast(Tuple[int, ...],
+                           struct.unpack('>%dH' % segcount,
+                                         fp.read(2*segcount)))
+                idds = cast(Tuple[int, ...],
+                            struct.unpack('>%dh' % segcount,
+                                          fp.read(2*segcount)))
                 pos = fp.tell()
-                idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
+                idrs = cast(Tuple[int, ...],
+                            struct.unpack('>%dH' % segcount,
+                                          fp.read(2*segcount)))
                 for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
                     if idr:
                         fp.seek(pos+idr)
                         for c in range(sc, ec+1):
-                            b = struct.unpack('>H', fp.read(2))[0]
+                            b = cast(Tuple[int],
+                                     struct.unpack('>H', fp.read(2)))[0]
                             char2gid[c] = (b + idd) & 0xffff
                     else:
                         for c in range(sc, ec+1):
@@ -480,12 +518,21 @@ class PDFUnicodeNotDefined(PDFFontError):
 LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
 LITERAL_TYPE1C = LIT('Type1C')
 
+# Font widths are maintained in a dict type that maps from *either* unicode
+# chars or integer character IDs.
+FontWidthDict = Union[Dict[int, float], Dict[str, float]]
+
 
 class PDFFont:
 
-    def __init__(self, descriptor, widths, default_width=None):
+    def __init__(
+        self,
+        descriptor: Mapping[str, Any],
+        widths: FontWidthDict,
+        default_width: Optional[float] = None
+    ) -> None:
         self.descriptor = descriptor
-        self.widths = resolve_all(widths)
+        self.widths: FontWidthDict = resolve_all(widths)
         self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
         if isinstance(self.fontname, PSLiteral):
             self.fontname = literal_name(self.fontname)
@@ -498,8 +545,8 @@ def __init__(self, descriptor, widths, default_width=None):
         else:
             self.default_width = default_width
         self.leading = num_value(descriptor.get('Leading', 0))
-        self.bbox = list_value(resolve_all(descriptor.get('FontBBox',
-                                                          (0, 0, 0, 0))))
+        self.bbox = cast(Rect, list_value(
+            resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0)))))
         self.hscale = self.vscale = .001
 
         # PDF RM 9.8.1 specifies /Descent should always be a negative number.
@@ -510,57 +557,72 @@ def __init__(self, descriptor, widths, default_width=None):
             self.descent = -self.descent
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFFont>'
 
-    def is_vertical(self):
+    def is_vertical(self) -> bool:
         return False
 
-    def is_multibyte(self):
+    def is_multibyte(self) -> bool:
         return False
 
-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
         return bytearray(bytes)  # map(ord, bytes)
 
-    def get_ascent(self):
+    def get_ascent(self) -> float:
         """Ascent above the baseline, in text space units"""
         return self.ascent * self.vscale
 
-    def get_descent(self):
+    def get_descent(self) -> float:
         """Descent below the baseline, in text space units; always negative"""
         return self.descent * self.vscale
 
-    def get_width(self):
+    def get_width(self) -> float:
         w = self.bbox[2]-self.bbox[0]
         if w == 0:
             w = -self.default_width
         return w * self.hscale
 
-    def get_height(self):
+    def get_height(self) -> float:
         h = self.bbox[3]-self.bbox[1]
         if h == 0:
             h = self.ascent - self.descent
         return h * self.vscale
 
-    def char_width(self, cid):
+    def char_width(self, cid: int) -> float:
+        # Because character widths may be mapping either IDs or strings,
+        # we try to lookup the character ID first, then its str equivalent.
         try:
-            return self.widths[cid] * self.hscale
+            return cast(Dict[int, float], self.widths)[cid] * self.hscale
         except KeyError:
+            str_widths = cast(Dict[str, float], self.widths)
             try:
-                return self.widths[self.to_unichr(cid)] * self.hscale
+                return str_widths[self.to_unichr(cid)] * self.hscale
             except (KeyError, PDFUnicodeNotDefined):
                 return self.default_width * self.hscale
 
-    def char_disp(self, cid):
+    def char_disp(
+        self,
+        cid: int
+    ) -> Union[float, Tuple[Optional[float], float]]:
+        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
         return 0
 
-    def string_width(self, s):
+    def string_width(self, s: bytes) -> float:
         return sum(self.char_width(cid) for cid in self.decode(s))
 
+    def to_unichr(self, cid: int) -> str:
+        raise NotImplementedError
+
 
 class PDFSimpleFont(PDFFont):
 
-    def __init__(self, descriptor, widths, spec):
+    def __init__(
+        self,
+        descriptor: Mapping[str, Any],
+        widths: FontWidthDict,
+        spec: Mapping[str, Any]
+    ) -> None:
         # Font encoding is specified either by a name of
         # built-in encoding or a dictionary that describes
         # the differences.
@@ -575,7 +637,7 @@ def __init__(self, descriptor, widths, spec):
             self.cid2unicode = EncodingDB.get_encoding(name, diff)
         else:
             self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
-        self.unicode_map = None
+        self.unicode_map: Optional[UnicodeMap] = None
         if 'ToUnicode' in spec:
             strm = stream_value(spec['ToUnicode'])
             self.unicode_map = FileUnicodeMap()
@@ -583,7 +645,7 @@ def __init__(self, descriptor, widths, spec):
         PDFFont.__init__(self, descriptor, widths)
         return
 
-    def to_unichr(self, cid):
+    def to_unichr(self, cid: int) -> str:
         if self.unicode_map:
             try:
                 return self.unicode_map.get_unichr(cid)
@@ -597,21 +659,28 @@ def to_unichr(self, cid):
 
 class PDFType1Font(PDFSimpleFont):
 
-    def __init__(self, rsrcmgr, spec):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any]
+    ) -> None:
         try:
             self.basefont = literal_name(spec['BaseFont'])
         except KeyError:
             if settings.STRICT:
                 raise PDFFontError('BaseFont is missing')
             self.basefont = 'unknown'
+
+        widths: FontWidthDict
         try:
-            (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
+            (descriptor, int_widths) = FontMetricsDB.get_metrics(self.basefont)
+            widths = cast(Dict[str, float], int_widths)  # implicit int->float
         except KeyError:
             descriptor = dict_value(spec.get('FontDescriptor', {}))
             firstchar = int_value(spec.get('FirstChar', 0))
             # lastchar = int_value(spec.get('LastChar', 255))
-            widths = list_value(spec.get('Widths', [0]*256))
-            widths = {i+firstchar: w for (i, w) in enumerate(widths)}
+            width_list = list_value(spec.get('Widths', [0]*256))
+            widths = {i+firstchar: w for (i, w) in enumerate(width_list)}
         PDFSimpleFont.__init__(self, descriptor, widths, spec)
         if 'Encoding' not in spec and 'FontFile' in descriptor:
             # try to recover the missing encoding info from the font file.
@@ -622,41 +691,51 @@ def __init__(self, rsrcmgr, spec):
             self.cid2unicode = parser.get_encoding()
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFType1Font: basefont=%r>' % self.basefont
 
 
 class PDFTrueTypeFont(PDFType1Font):
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFTrueTypeFont: basefont=%r>' % self.basefont
 
 
 class PDFType3Font(PDFSimpleFont):
 
-    def __init__(self, rsrcmgr, spec):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any]
+    ) -> None:
         firstchar = int_value(spec.get('FirstChar', 0))
         # lastchar = int_value(spec.get('LastChar', 0))
-        widths = list_value(spec.get('Widths', [0]*256))
-        widths = {i+firstchar: w for (i, w) in enumerate(widths)}
+        width_list = list_value(spec.get('Widths', [0]*256))
+        widths = {i+firstchar: w for (i, w) in enumerate(width_list)}
         if 'FontDescriptor' in spec:
             descriptor = dict_value(spec['FontDescriptor'])
         else:
             descriptor = {'Ascent': 0, 'Descent': 0,
                           'FontBBox': spec['FontBBox']}
         PDFSimpleFont.__init__(self, descriptor, widths, spec)
-        self.matrix = tuple(list_value(spec.get('FontMatrix')))
+        self.matrix = cast(Matrix, tuple(list_value(spec.get('FontMatrix'))))
         (_, self.descent, _, self.ascent) = self.bbox
         (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFType3Font>'
 
 
 class PDFCIDFont(PDFFont):
-
-    def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
+    default_disp: Union[float, Tuple[Optional[float], float]]
+
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any],
+        strict: bool = settings.STRICT
+    ) -> None:
         try:
             self.basefont = literal_name(spec['BaseFont'])
         except KeyError:
@@ -669,7 +748,7 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
         cid_ordering = resolve1(
             self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")
         self.cidcoding = '{}-{}'.format(cid_registry, cid_ordering)
-        self.cmap = self.get_cmap_from_spec(spec, strict)
+        self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict)
 
         try:
             descriptor = dict_value(spec['FontDescriptor'])
@@ -682,7 +761,7 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
             self.fontfile = stream_value(descriptor.get('FontFile2'))
             ttf = TrueTypeFont(self.basefont,
                                BytesIO(self.fontfile.get_data()))
-        self.unicode_map = None
+        self.unicode_map: Optional[UnicodeMap] = None
         if 'ToUnicode' in spec:
             strm = stream_value(spec['ToUnicode'])
             self.unicode_map = FileUnicodeMap()
@@ -703,12 +782,12 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
         self.vertical = self.cmap.is_vertical()
         if self.vertical:
             # writing mode: vertical
-            widths = get_widths2(list_value(spec.get('W2', [])))
+            widths2 = get_widths2(list_value(spec.get('W2', [])))
             self.disps = {cid: (vx, vy)
-                          for (cid, (_, (vx, vy))) in widths.items()}
+                          for (cid, (_, (vx, vy))) in widths2.items()}
             (vy, w) = resolve1(spec.get('DW2', [880, -1000]))
             self.default_disp = (None, vy)
-            widths = {cid: w for (cid, (w, _)) in widths.items()}
+            widths = {cid: w for (cid, (w, _)) in widths2.items()}
             default_width = w
         else:
             # writing mode: horizontal
@@ -719,7 +798,11 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
         PDFFont.__init__(self, descriptor, widths, default_width=default_width)
         return
 
-    def get_cmap_from_spec(self, spec, strict):
+    def get_cmap_from_spec(
+        self,
+        spec: Mapping[str, Any],
+        strict: bool
+    ) -> CMapBase:
         """Get cmap from font specification
 
         For certain PDFs, Encoding Type isn't mentioned as an attribute of
@@ -738,7 +821,7 @@ def get_cmap_from_spec(self, spec, strict):
             return CMap()
 
     @staticmethod
-    def _get_cmap_name(spec, strict):
+    def _get_cmap_name(spec: Mapping[str, Any], strict: bool) -> str:
         """Get cmap name from font specification"""
         cmap_name = 'unknown'  # default value
 
@@ -752,34 +835,37 @@ def _get_cmap_name(spec, strict):
             if strict:
                 raise PDFFontError('Encoding is unspecified')
 
-        if type(cmap_name) is PDFStream:
-            if 'CMapName' in cmap_name:
-                cmap_name = cmap_name.get('CMapName').name
+        if type(cmap_name) is PDFStream:  # type: ignore[comparison-overlap]
+            cmap_name_stream: PDFStream = cast(PDFStream, cmap_name)
+            if 'CMapName' in cmap_name_stream:
+                cmap_name = cmap_name_stream.get('CMapName').name
             else:
                 if strict:
                     raise PDFFontError('CMapName unspecified for encoding')
 
-        cmap_name = IDENTITY_ENCODER.get(cmap_name, cmap_name)
-        return cmap_name
+        return IDENTITY_ENCODER.get(cmap_name, cmap_name)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFCIDFont: basefont={!r}, cidcoding={!r}>'\
             .format(self.basefont, self.cidcoding)
 
-    def is_vertical(self):
+    def is_vertical(self) -> bool:
         return self.vertical
 
-    def is_multibyte(self):
+    def is_multibyte(self) -> bool:
         return True
 
-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
         return self.cmap.decode(bytes)
 
-    def char_disp(self, cid):
+    def char_disp(
+        self,
+        cid: int
+    ) -> Union[float, Tuple[Optional[float], float]]:
         "Returns an integer for horizontal fonts, a tuple for vertical fonts."
         return self.disps.get(cid, self.default_disp)
 
-    def to_unichr(self, cid):
+    def to_unichr(self, cid: int) -> str:
         try:
             if not self.unicode_map:
                 raise KeyError(cid)
@@ -788,7 +874,7 @@ def to_unichr(self, cid):
             raise PDFUnicodeNotDefined(self.cidcoding, cid)
 
 
-def main(argv):
+def main(argv: List[str]) -> None:
     for fname in argv[1:]:
         fp = open(fname, 'rb')
         font = CFFFont(fname, fp)
@@ -798,4 +884,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    main(sys.argv)
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index ef67947c..6387b42b 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -1,9 +1,12 @@
 import re
 import logging
+from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
 from io import BytesIO
 from .cmapdb import CMapDB
 from .cmapdb import CMap
-from .psparser import PSTypeError
+from .cmapdb import CMapBase
+from .psparser import PSLiteral, PSTypeError
+from .psparser import PSStackType
 from .psparser import PSEOF
 from .psparser import PSKeyword
 from .psparser import literal_name
@@ -12,6 +15,9 @@
 from .psparser import LIT
 from .psparser import KWD
 from . import settings
+from .pdfdevice import PDFDevice
+from .pdfdevice import PDFTextSeq
+from .pdfpage import PDFPage
 from .pdftypes import PDFException
 from .pdftypes import PDFStream
 from .pdftypes import PDFObjRef
@@ -19,6 +25,7 @@
 from .pdftypes import list_value
 from .pdftypes import dict_value
 from .pdftypes import stream_value
+from .pdffont import PDFFont
 from .pdffont import PDFFontError
 from .pdffont import PDFType1Font
 from .pdffont import PDFTrueTypeFont
@@ -26,6 +33,7 @@
 from .pdffont import PDFCIDFont
 from .pdfcolor import PDFColorSpace
 from .pdfcolor import PREDEFINED_COLORSPACE
+from .utils import Matrix, Point, PathSegment, Rect
 from .utils import choplist
 from .utils import mult_matrix
 from .utils import MATRIX_IDENTITY
@@ -50,22 +58,24 @@ class PDFInterpreterError(PDFException):
 
 
 class PDFTextState:
-
-    def __init__(self):
-        self.font = None
-        self.fontsize = 0
-        self.charspace = 0
-        self.wordspace = 0
-        self.scaling = 100
-        self.leading = 0
-        self.render = 0
-        self.rise = 0
+    matrix: Matrix
+    linematrix: Point
+
+    def __init__(self) -> None:
+        self.font: Optional[PDFFont] = None
+        self.fontsize: float = 0
+        self.charspace: float = 0
+        self.wordspace: float = 0
+        self.scaling: float = 100
+        self.leading: float = 0
+        self.render: int = 0
+        self.rise: float = 0
         self.reset()
         # self.matrix is set
         # self.linematrix is set
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
                'wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, ' \
                'matrix=%r, linematrix=%r>' \
@@ -73,7 +83,7 @@ def __repr__(self):
                   self.scaling, self.leading, self.render, self.rise,
                   self.matrix, self.linematrix)
 
-    def copy(self):
+    def copy(self) -> "PDFTextState":
         obj = PDFTextState()
         obj.font = self.font
         obj.fontsize = self.fontsize
@@ -87,31 +97,37 @@ def copy(self):
         obj.linematrix = self.linematrix
         return obj
 
-    def reset(self):
+    def reset(self) -> None:
         self.matrix = MATRIX_IDENTITY
         self.linematrix = (0, 0)
         return
 
 
+Color = Union[
+    float,                              # Greyscale
+    Tuple[float, float, float],         # R, G, B
+    Tuple[float, float, float, float]]  # C, M, Y, K
+
+
 class PDFGraphicState:
 
-    def __init__(self):
-        self.linewidth = 0
-        self.linecap = None
-        self.linejoin = None
-        self.miterlimit = None
-        self.dash = None
-        self.intent = None
-        self.flatness = None
+    def __init__(self) -> None:
+        self.linewidth: float = 0
+        self.linecap: Optional[object] = None
+        self.linejoin: Optional[object] = None
+        self.miterlimit: Optional[object] = None
+        self.dash: Optional[Tuple[object, object]] = None
+        self.intent: Optional[object] = None
+        self.flatness: Optional[object] = None
 
         # stroking color
-        self.scolor = None
+        self.scolor: Optional[Color] = None
 
         # non stroking color
-        self.ncolor = None
+        self.ncolor: Optional[Color] = None
         return
 
-    def copy(self):
+    def copy(self) -> "PDFGraphicState":
         obj = PDFGraphicState()
         obj.linewidth = self.linewidth
         obj.linecap = self.linecap
@@ -124,7 +140,7 @@ def copy(self):
         obj.ncolor = self.ncolor
         return obj
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
                 ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
                 ' stroking color=%r, non stroking color=%r>' %
@@ -141,12 +157,12 @@ class PDFResourceManager:
     allocated multiple times.
     """
 
-    def __init__(self, caching=True):
+    def __init__(self, caching: bool = True) -> None:
         self.caching = caching
-        self._cached_fonts = {}
+        self._cached_fonts: Dict[object, PDFFont] = {}
         return
 
-    def get_procset(self, procs):
+    def get_procset(self, procs: Sequence[object]) -> None:
         for proc in procs:
             if proc is LITERAL_PDF:
                 pass
@@ -156,7 +172,7 @@ def get_procset(self, procs):
                 pass
         return
 
-    def get_cmap(self, cmapname, strict=False):
+    def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
         try:
             return CMapDB.get_cmap(cmapname)
         except CMapDB.CMapNotFound:
@@ -164,7 +180,7 @@ def get_cmap(self, cmapname, strict=False):
                 raise
             return CMap()
 
-    def get_font(self, objid, spec):
+    def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont:
         if objid and objid in self._cached_fonts:
             font = self._cached_fonts[objid]
         else:
@@ -209,15 +225,18 @@ def get_font(self, objid, spec):
         return font
 
 
-class PDFContentParser(PSStackParser):
+class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
 
-    def __init__(self, streams):
+    def __init__(self, streams: Sequence[object]) -> None:
         self.streams = streams
         self.istream = 0
-        PSStackParser.__init__(self, None)
+        # PSStackParser.__init__(fp=None) is safe only because we've overloaded
+        # all the methods that would attempt to access self.fp without first
+        # calling self.fillfp().
+        PSStackParser.__init__(self, None)  # type: ignore[arg-type]
         return
 
-    def fillfp(self):
+    def fillfp(self) -> None:
         if not self.fp:
             if self.istream < len(self.streams):
                 strm = stream_value(self.streams[self.istream])
@@ -227,12 +246,12 @@ def fillfp(self):
             self.fp = BytesIO(strm.get_data())
         return
 
-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
         self.fillfp()
         PSStackParser.seek(self, pos)
         return
 
-    def fillbuf(self):
+    def fillbuf(self) -> None:
         if self.charpos < len(self.buf):
             return
         while 1:
@@ -241,19 +260,23 @@ def fillbuf(self):
             self.buf = self.fp.read(self.BUFSIZ)
             if self.buf:
                 break
-            self.fp = None
+            self.fp = None  # type: ignore[assignment]
         self.charpos = 0
         return
 
-    def get_inline_data(self, pos, target=b'EI'):
+    def get_inline_data(
+        self,
+        pos: int,
+        target: bytes = b'EI'
+    ) -> Tuple[int, bytes]:
         self.seek(pos)
         i = 0
         data = b''
         while i <= len(target):
             self.fillbuf()
             if i:
-                c = self.buf[self.charpos]
-                c = bytes((c,))
+                ci = self.buf[self.charpos]
+                c = bytes((ci,))
                 data += c
                 self.charpos += 1
                 if len(target) <= i and c.isspace():
@@ -275,7 +298,7 @@ def get_inline_data(self, pos, target=b'EI'):
         data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data)
         return (pos, data)
 
-    def flush(self):
+    def flush(self) -> None:
         self.add_results(*self.popall())
         return
 
@@ -283,7 +306,7 @@ def flush(self):
     KEYWORD_ID = KWD(b'ID')
     KEYWORD_EI = KWD(b'EI')
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         if token is self.KEYWORD_BI:
             # inline image within a content stream
             self.start_type(pos, 'inline')
@@ -307,30 +330,34 @@ def do_keyword(self, pos, token):
         return
 
 
+PDFStackT = PSStackType[PDFStream]
+"""Types that may appear on the PDF argument stack."""
+
+
 class PDFPageInterpreter:
     """Processor for the content of a PDF page
 
     Reference: PDF Reference, Appendix A, Operator Summary
     """
 
-    def __init__(self, rsrcmgr, device):
+    def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None:
         self.rsrcmgr = rsrcmgr
         self.device = device
         return
 
-    def dup(self):
+    def dup(self) -> "PDFPageInterpreter":
         return self.__class__(self.rsrcmgr, self.device)
 
-    def init_resources(self, resources):
+    def init_resources(self, resources: Dict[object, object]) -> None:
         """Prepare the fonts and XObjects listed in the Resource attribute."""
         self.resources = resources
-        self.fontmap = {}
+        self.fontmap: Dict[object, PDFFont] = {}
         self.xobjmap = {}
-        self.csmap = PREDEFINED_COLORSPACE.copy()
+        self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
         if not resources:
             return
 
-        def get_colorspace(spec):
+        def get_colorspace(spec: object) -> Optional[PDFColorSpace]:
             if isinstance(spec, list):
                 name = literal_name(spec[0])
             else:
@@ -343,6 +370,7 @@ def get_colorspace(spec):
                 return PDFColorSpace(name, len(list_value(spec[1])))
             else:
                 return PREDEFINED_COLORSPACE.get(name)
+
         for (k, v) in dict_value(resources).items():
             log.debug('Resource: %r: %r', k, v)
             if k == 'Font':
@@ -354,7 +382,9 @@ def get_colorspace(spec):
                     self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
             elif k == 'ColorSpace':
                 for (csid, spec) in dict_value(v).items():
-                    self.csmap[csid] = get_colorspace(resolve1(spec))
+                    colorspace = get_colorspace(resolve1(spec))
+                    if colorspace is not None:
+                        self.csmap[csid] = colorspace
             elif k == 'ProcSet':
                 self.rsrcmgr.get_procset(list_value(v))
             elif k == 'XObject':
@@ -362,130 +392,180 @@ def get_colorspace(spec):
                     self.xobjmap[xobjid] = xobjstrm
         return
 
-    def init_state(self, ctm):
+    def init_state(self, ctm: Matrix) -> None:
         """Initialize the text and graphic states for rendering a page."""
-        self.gstack = []  # stack for graphical states.
+        # gstack: stack for graphical states.
+        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []
         self.ctm = ctm
         self.device.set_ctm(self.ctm)
         self.textstate = PDFTextState()
         self.graphicstate = PDFGraphicState()
-        self.curpath = []
+        self.curpath: List[PathSegment] = []
         # argstack: stack for command arguments.
-        self.argstack = []
+        self.argstack: List[PDFStackT] = []
         # set some global states.
-        self.scs = self.ncs = None
+        self.scs: Optional[PDFColorSpace] = None
+        self.ncs: Optional[PDFColorSpace] = None
         if self.csmap:
             self.scs = self.ncs = next(iter(self.csmap.values()))
         return
 
-    def push(self, obj):
+    def push(self, obj: PDFStackT) -> None:
         self.argstack.append(obj)
         return
 
-    def pop(self, n):
+    def pop(self, n: int) -> List[PDFStackT]:
         if n == 0:
             return []
         x = self.argstack[-n:]
         self.argstack = self.argstack[:-n]
         return x
 
-    def get_current_state(self):
+    def get_current_state(
+        self
+    ) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
         return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
 
-    def set_current_state(self, state):
+    def set_current_state(
+        self,
+        state: Tuple[Matrix, PDFTextState, PDFGraphicState]
+    ) -> None:
         (self.ctm, self.textstate, self.graphicstate) = state
         self.device.set_ctm(self.ctm)
         return
 
-    def do_q(self):
+    def do_q(self) -> None:
         """Save graphics state"""
         self.gstack.append(self.get_current_state())
         return
 
-    def do_Q(self):
+    def do_Q(self) -> None:
         """Restore graphics state"""
         if self.gstack:
             self.set_current_state(self.gstack.pop())
         return
 
-    def do_cm(self, a1, b1, c1, d1, e1, f1):
+    def do_cm(
+        self,
+        a1: PDFStackT,
+        b1: PDFStackT,
+        c1: PDFStackT,
+        d1: PDFStackT,
+        e1: PDFStackT,
+        f1: PDFStackT
+    ) -> None:
         """Concatenate matrix to current transformation matrix"""
-        self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
+        self.ctm = \
+            mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
         self.device.set_ctm(self.ctm)
         return
 
-    def do_w(self, linewidth):
+    def do_w(self, linewidth: PDFStackT) -> None:
         """Set line width"""
-        self.graphicstate.linewidth = linewidth
+        self.graphicstate.linewidth = cast(float, linewidth)
         return
 
-    def do_J(self, linecap):
+    def do_J(self, linecap: PDFStackT) -> None:
         """Set line cap style"""
         self.graphicstate.linecap = linecap
         return
 
-    def do_j(self, linejoin):
+    def do_j(self, linejoin: PDFStackT) -> None:
         """Set line join style"""
         self.graphicstate.linejoin = linejoin
         return
 
-    def do_M(self, miterlimit):
+    def do_M(self, miterlimit: PDFStackT) -> None:
         """Set miter limit"""
         self.graphicstate.miterlimit = miterlimit
         return
 
-    def do_d(self, dash, phase):
+    def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None:
         """Set line dash pattern"""
         self.graphicstate.dash = (dash, phase)
         return
 
-    def do_ri(self, intent):
+    def do_ri(self, intent: PDFStackT) -> None:
         """Set color rendering intent"""
         self.graphicstate.intent = intent
         return
 
-    def do_i(self, flatness):
+    def do_i(self, flatness: PDFStackT) -> None:
         """Set flatness tolerance"""
         self.graphicstate.flatness = flatness
         return
 
-    def do_gs(self, name):
+    def do_gs(self, name: PDFStackT) -> None:
         """Set parameters from graphics state parameter dictionary"""
         # todo
         return
 
-    def do_m(self, x, y):
+    def do_m(self, x: PDFStackT, y: PDFStackT) -> None:
         """Begin new subpath"""
-        self.curpath.append(('m', x, y))
+        self.curpath.append(('m', cast(float, x), cast(float, y)))
         return
 
-    def do_l(self, x, y):
+    def do_l(self, x: PDFStackT, y: PDFStackT) -> None:
         """Append straight line segment to path"""
-        self.curpath.append(('l', x, y))
-        return
-
-    def do_c(self, x1, y1, x2, y2, x3, y3):
+        self.curpath.append(('l', cast(float, x), cast(float, y)))
+        return
+
+    def do_c(
+        self,
+        x1: PDFStackT,
+        y1: PDFStackT,
+        x2: PDFStackT,
+        y2: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
         """Append curved segment to path (three control points)"""
-        self.curpath.append(('c', x1, y1, x2, y2, x3, y3))
-        return
-
-    def do_v(self, x2, y2, x3, y3):
+        self.curpath.append(('c', cast(float, x1), cast(float, y1),
+                             cast(float, x2), cast(float, y2),
+                             cast(float, x3), cast(float, y3)))
+        return
+
+    def do_v(
+        self,
+        x2: PDFStackT,
+        y2: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
         """Append curved segment to path (initial point replicated)"""
-        self.curpath.append(('v', x2, y2, x3, y3))
+        self.curpath.append(('v', cast(float, x2), cast(float, y2),
+                             cast(float, x3), cast(float, y3)))
         return
 
-    def do_y(self, x1, y1, x3, y3):
+    def do_y(
+        self,
+        x1: PDFStackT,
+        y1: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
         """Append curved segment to path (final point replicated)"""
-        self.curpath.append(('y', x1, y1, x3, y3))
+        self.curpath.append(('y', cast(float, x1), cast(float, y1),
+                             cast(float, x3), cast(float, y3)))
         return
 
-    def do_h(self):
+    def do_h(self) -> None:
         """Close subpath"""
         self.curpath.append(('h',))
         return
 
-    def do_re(self, x, y, w, h):
+    def do_re(
+        self,
+        x: PDFStackT,
+        y: PDFStackT,
+        w: PDFStackT,
+        h: PDFStackT
+    ) -> None:
         """Append rectangle to path"""
+        x = cast(float, x)
+        y = cast(float, y)
+        w = cast(float, w)
+        h = cast(float, h)
         self.curpath.append(('m', x, y))
         self.curpath.append(('l', x+w, y))
         self.curpath.append(('l', x+w, y+h))
@@ -493,77 +573,77 @@ def do_re(self, x, y, w, h):
         self.curpath.append(('h',))
         return
 
-    def do_S(self):
+    def do_S(self) -> None:
         """Stroke path"""
         self.device.paint_path(self.graphicstate, True, False, False,
                                self.curpath)
         self.curpath = []
         return
 
-    def do_s(self):
+    def do_s(self) -> None:
         """Close and stroke path"""
         self.do_h()
         self.do_S()
         return
 
-    def do_f(self):
+    def do_f(self) -> None:
         """Fill path using nonzero winding number rule"""
         self.device.paint_path(self.graphicstate, False, True, False,
                                self.curpath)
         self.curpath = []
         return
 
-    def do_F(self):
+    def do_F(self) -> None:
         """Fill path using nonzero winding number rule (obsolete)"""
         return self.do_f()
 
-    def do_f_a(self):
+    def do_f_a(self) -> None:
         """Fill path using even-odd rule"""
         self.device.paint_path(self.graphicstate, False, True, True,
                                self.curpath)
         self.curpath = []
         return
 
-    def do_B(self):
+    def do_B(self) -> None:
         """Fill and stroke path using nonzero winding number rule"""
         self.device.paint_path(self.graphicstate, True, True, False,
                                self.curpath)
         self.curpath = []
         return
 
-    def do_B_a(self):
+    def do_B_a(self) -> None:
         """Fill and stroke path using even-odd rule"""
         self.device.paint_path(self.graphicstate, True, True, True,
                                self.curpath)
         self.curpath = []
         return
 
-    def do_b(self):
+    def do_b(self) -> None:
         """Close, fill, and stroke path using nonzero winding number rule"""
         self.do_h()
         self.do_B()
         return
 
-    def do_b_a(self):
+    def do_b_a(self) -> None:
         """Close, fill, and stroke path using even-odd rule"""
         self.do_h()
         self.do_B_a()
         return
 
-    def do_n(self):
+    def do_n(self) -> None:
         """End path without filling or stroking"""
         self.curpath = []
         return
 
-    def do_W(self):
+    def do_W(self) -> None:
         """Set clipping path using nonzero winding number rule"""
         return
 
-    def do_W_a(self):
+    def do_W_a(self) -> None:
         """Set clipping path using even-odd rule"""
         return
 
-    def do_CS(self, name):
+    def do_CS(self, name: PDFStackT) -> None:
         """Set color space for stroking operations
 
         Introduced in PDF 1.1
@@ -575,7 +655,7 @@ def do_CS(self, name):
                 raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
         return
 
-    def do_cs(self, name):
+    def do_cs(self, name: PDFStackT) -> None:
         """Set color space for nonstroking operations"""
         try:
             self.ncs = self.csmap[literal_name(name)]
@@ -584,37 +664,53 @@ def do_cs(self, name):
                 raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
         return
 
-    def do_G(self, gray):
+    def do_G(self, gray: PDFStackT) -> None:
         """Set gray level for stroking operations"""
-        self.graphicstate.scolor = gray
+        self.graphicstate.scolor = cast(float, gray)
         return
 
-    def do_g(self, gray):
+    def do_g(self, gray: PDFStackT) -> None:
         """Set gray level for nonstroking operations"""
-        self.graphicstate.ncolor = gray
+        self.graphicstate.ncolor = cast(float, gray)
         return
 
-    def do_RG(self, r, g, b):
+    def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
         """Set RGB color for stroking operations"""
-        self.graphicstate.scolor = (r, g, b)
+        self.graphicstate.scolor = \
+            (cast(float, r), cast(float, g), cast(float, b))
         return
 
-    def do_rg(self, r, g, b):
+    def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
         """Set RGB color for nonstroking operations"""
-        self.graphicstate.ncolor = (r, g, b)
+        self.graphicstate.ncolor = \
+            (cast(float, r), cast(float, g), cast(float, b))
         return
 
-    def do_K(self, c, m, y, k):
+    def do_K(
+        self,
+        c: PDFStackT,
+        m: PDFStackT,
+        y: PDFStackT,
+        k: PDFStackT
+    ) -> None:
         """Set CMYK color for stroking operations"""
-        self.graphicstate.scolor = (c, m, y, k)
+        self.graphicstate.scolor = \
+            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
         return
 
-    def do_k(self, c, m, y, k):
+    def do_k(
+        self,
+        c: PDFStackT,
+        m: PDFStackT,
+        y: PDFStackT,
+        k: PDFStackT
+    ) -> None:
         """Set CMYK color for nonstroking operations"""
-        self.graphicstate.ncolor = (c, m, y, k)
+        self.graphicstate.ncolor = \
+            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
         return
 
-    def do_SCN(self):
+    def do_SCN(self) -> None:
         """Set color for stroking operations."""
         if self.scs:
             n = self.scs.ncomponents
@@ -622,10 +718,10 @@ def do_SCN(self):
             if settings.STRICT:
                 raise PDFInterpreterError('No colorspace specified!')
             n = 1
-        self.graphicstate.scolor = self.pop(n)
+        self.graphicstate.scolor = cast(Color, self.pop(n))
         return
 
-    def do_scn(self):
+    def do_scn(self) -> None:
         """Set color for nonstroking operations"""
         if self.ncs:
             n = self.ncs.ncomponents
@@ -633,24 +729,24 @@ def do_scn(self):
             if settings.STRICT:
                 raise PDFInterpreterError('No colorspace specified!')
             n = 1
-        self.graphicstate.ncolor = self.pop(n)
+        self.graphicstate.ncolor = cast(Color, self.pop(n))
         return
 
-    def do_SC(self):
+    def do_SC(self) -> None:
         """Set color for stroking operations"""
         self.do_SCN()
         return
 
-    def do_sc(self):
+    def do_sc(self) -> None:
         """Set color for nonstroking operations"""
         self.do_scn()
         return
 
-    def do_sh(self, name):
+    def do_sh(self, name: object) -> None:
         """Paint area defined by shading pattern"""
         return
 
-    def do_BT(self):
+    def do_BT(self) -> None:
         """Begin text object
 
         Initializing the text matrix, Tm, and the text line matrix, Tlm, to
@@ -660,82 +756,82 @@ def do_BT(self):
         self.textstate.reset()
         return
 
-    def do_ET(self):
+    def do_ET(self) -> None:
         """End a text object"""
         return
 
-    def do_BX(self):
+    def do_BX(self) -> None:
         """Begin compatibility section"""
         return
 
-    def do_EX(self):
+    def do_EX(self) -> None:
         """End compatibility section"""
         return
 
-    def do_MP(self, tag):
+    def do_MP(self, tag: PDFStackT) -> None:
         """Define marked-content point"""
-        self.device.do_tag(tag)
+        self.device.do_tag(cast(PSLiteral, tag))
         return
 
-    def do_DP(self, tag, props):
+    def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None:
         """Define marked-content point with property list"""
-        self.device.do_tag(tag, props)
+        self.device.do_tag(cast(PSLiteral, tag), props)
         return
 
-    def do_BMC(self, tag):
+    def do_BMC(self, tag: PDFStackT) -> None:
         """Begin marked-content sequence"""
-        self.device.begin_tag(tag)
+        self.device.begin_tag(cast(PSLiteral, tag))
         return
 
-    def do_BDC(self, tag, props):
+    def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None:
         """Begin marked-content sequence with property list"""
-        self.device.begin_tag(tag, props)
+        self.device.begin_tag(cast(PSLiteral, tag), props)
         return
 
-    def do_EMC(self):
+    def do_EMC(self) -> None:
         """End marked-content sequence"""
         self.device.end_tag()
         return
 
-    def do_Tc(self, space):
+    def do_Tc(self, space: PDFStackT) -> None:
         """Set character spacing.
 
         Character spacing is used by the Tj, TJ, and ' operators.
 
         :param space: a number expressed in unscaled text space units.
         """
-        self.textstate.charspace = space
+        self.textstate.charspace = cast(float, space)
         return
 
-    def do_Tw(self, space):
+    def do_Tw(self, space: PDFStackT) -> None:
         """Set the word spacing.
 
         Word spacing is used by the Tj, TJ, and ' operators.
 
         :param space: a number expressed in unscaled text space units
         """
-        self.textstate.wordspace = space
+        self.textstate.wordspace = cast(float, space)
         return
 
-    def do_Tz(self, scale):
+    def do_Tz(self, scale: PDFStackT) -> None:
         """Set the horizontal scaling.
 
         :param scale: is a number specifying the percentage of the normal width
         """
-        self.textstate.scaling = scale
+        self.textstate.scaling = cast(float, scale)
         return
 
-    def do_TL(self, leading):
+    def do_TL(self, leading: PDFStackT) -> None:
         """Set the text leading.
 
         Text leading is used only by the T*, ', and " operators.
 
         :param leading: a number expressed in unscaled text space units
         """
-        self.textstate.leading = -leading
+        self.textstate.leading = -cast(float, leading)
         return
 
-    def do_Tf(self, fontid, fontsize):
+    def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None:
         """Set the text font
 
         :param fontid: the name of a font resource in the Font subdictionary
@@ -748,44 +844,56 @@ def do_Tf(self, fontid, fontsize):
             if settings.STRICT:
                 raise PDFInterpreterError('Undefined Font id: %r' % fontid)
             self.textstate.font = self.rsrcmgr.get_font(None, {})
-        self.textstate.fontsize = fontsize
+        self.textstate.fontsize = cast(float, fontsize)
         return
 
-    def do_Tr(self, render):
+    def do_Tr(self, render: PDFStackT) -> None:
         """Set the text rendering mode"""
-        self.textstate.render = render
+        self.textstate.render = cast(int, render)
         return
 
-    def do_Ts(self, rise):
+    def do_Ts(self, rise: PDFStackT) -> None:
         """Set the text rise
 
         :param rise: a number expressed in unscaled text space units
         """
-        self.textstate.rise = rise
+        self.textstate.rise = cast(float, rise)
         return
 
-    def do_Td(self, tx, ty):
+    def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None:
         """Move text position"""
+        tx = cast(float, tx)
+        ty = cast(float, ty)
         (a, b, c, d, e, f) = self.textstate.matrix
         self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_TD(self, tx, ty):
+    def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None:
         """Move text position and set leading"""
+        tx = cast(float, tx)
+        ty = cast(float, ty)
         (a, b, c, d, e, f) = self.textstate.matrix
         self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
         self.textstate.leading = ty
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_Tm(self, a, b, c, d, e, f):
+    def do_Tm(
+        self,
+        a: PDFStackT,
+        b: PDFStackT,
+        c: PDFStackT,
+        d: PDFStackT,
+        e: PDFStackT,
+        f: PDFStackT
+    ) -> None:
         """Set text matrix and text line matrix"""
-        self.textstate.matrix = (a, b, c, d, e, f)
+        self.textstate.matrix = cast(Matrix, (a, b, c, d, e, f))
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_T_a(self):
+    def do_T_a(self) -> None:
         """Move to start of next text line"""
         (a, b, c, d, e, f) = self.textstate.matrix
         self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e,
@@ -793,22 +901,23 @@ def do_T_a(self):
         self.textstate.linematrix = (0, 0)
         return
 
-    def do_TJ(self, seq):
+    def do_TJ(self, seq: PDFStackT) -> None:
         """Show text, allowing individual glyph positioning"""
         if self.textstate.font is None:
             if settings.STRICT:
                 raise PDFInterpreterError('No font specified!')
             return
-        self.device.render_string(self.textstate, seq, self.ncs,
-                                  self.graphicstate.copy())
+        assert self.ncs is not None
+        self.device.render_string(self.textstate, cast(PDFTextSeq, seq),
+                                  self.ncs, self.graphicstate.copy())
         return
 
-    def do_Tj(self, s):
+    def do_Tj(self, s: PDFStackT) -> None:
         """Show text"""
         self.do_TJ([s])
         return
 
-    def do__q(self, s):
+    def do__q(self, s: PDFStackT) -> None:
         """Move to next line and show text
 
         The ' (single quote) operator.
@@ -817,7 +926,7 @@ def do__q(self, s):
         self.do_TJ([s])
         return
 
-    def do__w(self, aw, ac, s):
+    def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None:
         """Set word and character spacing, move to next line, and show text
 
         The " (double quote) operator.
@@ -827,15 +936,15 @@ def do__w(self, aw, ac, s):
         self.do_TJ([s])
         return
 
-    def do_BI(self):
+    def do_BI(self) -> None:
         """Begin inline image object"""
         return
 
-    def do_ID(self):
+    def do_ID(self) -> None:
         """Begin inline image data"""
         return
 
-    def do_EI(self, obj):
+    def do_EI(self, obj: PDFStackT) -> None:
         """End inline image object"""
         if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj:
             iobjid = str(id(obj))
@@ -844,9 +953,9 @@ def do_EI(self, obj):
             self.device.end_figure(iobjid)
         return
 
-    def do_Do(self, xobjid):
+    def do_Do(self, xobjid_arg: PDFStackT) -> None:
         """Invoke named XObject"""
-        xobjid = literal_name(xobjid)
+        xobjid = cast(str, literal_name(xobjid_arg))
         try:
             xobj = stream_value(self.xobjmap[xobjid])
         except KeyError:
@@ -857,8 +966,9 @@ def do_Do(self, xobjid):
         subtype = xobj.get('Subtype')
         if subtype is LITERAL_FORM and 'BBox' in xobj:
             interpreter = self.dup()
-            bbox = list_value(xobj['BBox'])
-            matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
+            bbox = cast(Rect, list_value(xobj['BBox']))
+            matrix = cast(Matrix, list_value(
+                xobj.get('Matrix', MATRIX_IDENTITY)))
             # According to PDF reference 1.7 section 4.9.1, XObjects in
             # earlier PDFs (prior to v1.2) use the page's Resources entry
             # instead of having their own Resources entry.
@@ -880,7 +990,7 @@ def do_Do(self, xobjid):
             pass
         return
 
-    def process_page(self, page):
+    def process_page(self, page: PDFPage) -> None:
         log.info('Processing page: %r', page)
         (x0, y0, x1, y1) = page.mediabox
         if page.rotate == 90:
@@ -896,7 +1006,12 @@ def process_page(self, page):
         self.device.end_page(page)
         return
 
-    def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
+    def render_contents(
+        self,
+        resources: Dict[object, object],
+        streams: Sequence[object],
+        ctm: Matrix = MATRIX_IDENTITY
+    ) -> None:
         """Render the content streams.
 
         This method may be called recursively.
@@ -908,7 +1023,7 @@ def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
         self.execute(list_value(streams))
         return
 
-    def execute(self, streams):
+    def execute(self, streams: Sequence[object]) -> None:
         try:
             parser = PDFContentParser(streams)
         except PSEOF:
diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py
index 48da18c9..8380c239 100644
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@@ -1,4 +1,6 @@
 import logging
+from pdfminer.utils import Rect
+from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
 import warnings
 from . import settings
 from .psparser import LIT
@@ -32,7 +34,7 @@ class PDFPage:
       attrs: a dictionary of page attributes.
       contents: a list of PDFStream objects that represents the page content.
       lastmod: the last modified time of the page.
-      resources: a list of resources used by the page.
+      resources: a dictionary of resources used by the page.
       mediabox: the physical size of the page.
       cropbox: the crop rectangle of the page.
       rotate: the page rotation (in degree).
@@ -40,7 +42,12 @@ class PDFPage:
       beads: a chain that represents natural reading order.
     """
 
-    def __init__(self, doc, pageid, attrs):
+    def __init__(
+        self,
+        doc: PDFDocument,
+        pageid: object,
+        attrs: object
+    ) -> None:
         """Initialize a page object.
 
         doc: a PDFDocument object.
@@ -51,10 +58,11 @@ def __init__(self, doc, pageid, attrs):
         self.pageid = pageid
         self.attrs = dict_value(attrs)
         self.lastmod = resolve1(self.attrs.get('LastModified'))
-        self.resources = resolve1(self.attrs.get('Resources', dict()))
-        self.mediabox = resolve1(self.attrs['MediaBox'])
+        self.resources: Dict[object, object] = \
+            resolve1(self.attrs.get('Resources', dict()))
+        self.mediabox: Rect = resolve1(self.attrs['MediaBox'])
         if 'CropBox' in self.attrs:
-            self.cropbox = resolve1(self.attrs['CropBox'])
+            self.cropbox: Rect = resolve1(self.attrs['CropBox'])
         else:
             self.cropbox = self.mediabox
         self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360
@@ -66,23 +74,28 @@ def __init__(self, doc, pageid, attrs):
             contents = []
         if not isinstance(contents, list):
             contents = [contents]
-        self.contents = contents
+        self.contents: List[object] = contents
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFPage: Resources={!r}, MediaBox={!r}>'\
             .format(self.resources, self.mediabox)
 
     INHERITABLE_ATTRS = {'Resources', 'MediaBox', 'CropBox', 'Rotate'}
 
     @classmethod
-    def create_pages(cls, document):
-        def search(obj, parent):
+    def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]:
+        def search(
+            obj: object,
+            parent: Dict[str, object]
+        ) -> Iterator[Tuple[int, Dict[object, Dict[object, object]]]]:
             if isinstance(obj, int):
                 objid = obj
                 tree = dict_value(document.getobj(objid)).copy()
             else:
-                objid = obj.objid
+                # This looks broken. obj.objid means obj could be either
+                # PDFObjRef or PDFStream, but neither is valid for dict_value.
+                objid = obj.objid  # type: ignore[attr-defined]
                 tree = dict_value(obj).copy()
             for (k, v) in parent.items():
                 if k in cls.INHERITABLE_ATTRS and k not in tree:
@@ -119,9 +132,15 @@ def search(obj, parent):
         return
 
     @classmethod
-    def get_pages(cls, fp,
-                  pagenos=None, maxpages=0, password='',
-                  caching=True, check_extractable=False):
+    def get_pages(
+        cls,
+        fp: BinaryIO,
+        pagenos: Optional[Container[int]] = None,
+        maxpages: int = 0,
+        password: str = '',
+        caching: bool = True,
+        check_extractable: bool = False
+    ) -> Iterator["PDFPage"]:
         # Create a PDF parser object associated with the file object.
         parser = PDFParser(fp)
         # Create a PDF document object that stores the document structure.
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index b604b9dd..18ad9ebd 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -1,6 +1,8 @@
 import logging
 from io import BytesIO
+from typing import BinaryIO, TYPE_CHECKING, Optional, Union
 from .psparser import PSStackParser
+from .psparser import PSKeyword
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
 from .psparser import KWD
@@ -11,6 +13,9 @@
 from .pdftypes import int_value
 from .pdftypes import dict_value
 
+if TYPE_CHECKING:
+    from .pdfdocument import PDFDocument
+
 log = logging.getLogger(__name__)
 
 
@@ -18,7 +23,8 @@ class PDFSyntaxError(PDFException):
     pass
 
 
-class PDFParser(PSStackParser):
+# PDFParser stack holds all the base types plus PDFStream, PDFObjRef, and None
+class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
     """
     PDFParser fetch PDF objects from a file stream.
     It can handle indirect references by referring to
@@ -35,13 +41,13 @@ class PDFParser(PSStackParser):
 
     """
 
-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
         PSStackParser.__init__(self, fp)
-        self.doc = None
+        self.doc: Optional["PDFDocument"] = None
         self.fallback = False
         return
 
-    def set_document(self, doc):
+    def set_document(self, doc: "PDFDocument") -> None:
         """Associates the parser with a PDFDocument object."""
         self.doc = doc
         return
@@ -53,7 +59,7 @@ def set_document(self, doc):
     KEYWORD_XREF = KWD(b'xref')
     KEYWORD_STARTXREF = KWD(b'startxref')
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         """Handles PDF-related keywords."""
 
         if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
@@ -71,7 +77,9 @@ def do_keyword(self, pos, token):
             if len(self.curstack) >= 2:
                 try:
                     ((_, objid), (_, genno)) = self.pop(2)
-                    (objid, genno) = (int(objid), int(genno))
+                    (objid, genno) = (
+                        int(objid), int(genno))  # type: ignore[arg-type]
+                    assert self.doc is not None
                     obj = PDFObjRef(self.doc, objid, genno)
                     self.push((pos, obj))
                 except PSSyntaxError:
@@ -114,13 +122,13 @@ def do_keyword(self, pos, token):
                 objlen += len(line)
                 if self.fallback:
                     data += line
-            data = bytes(data)
             self.seek(pos+objlen)
             # XXX limit objlen not to exceed object boundary
             log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos,
                       objlen, dic, data[:10])
-            obj = PDFStream(dic, data, self.doc.decipher)
-            self.push((pos, obj))
+            assert self.doc is not None
+            stream = PDFStream(dic, bytes(data), self.doc.decipher)
+            self.push((pos, stream))
 
         else:
             # others
@@ -138,22 +146,23 @@ class PDFStreamParser(PDFParser):
     indirect references to other objects in the same document.
     """
 
-    def __init__(self, data):
+    def __init__(self, data: bytes) -> None:
         PDFParser.__init__(self, BytesIO(data))
         return
 
-    def flush(self):
+    def flush(self) -> None:
         self.add_results(*self.popall())
         return
 
     KEYWORD_OBJ = KWD(b'obj')
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         if token is self.KEYWORD_R:
             # reference to indirect object
             try:
                 ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (int(objid), int(genno))
+                (objid, genno) = (
+                    int(objid), int(genno))  # type: ignore[arg-type]
                 obj = PDFObjRef(self.doc, objid, genno)
                 self.push((pos, obj))
             except PSSyntaxError:
diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index 14c729b8..6190ea99 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -1,5 +1,8 @@
 import zlib
 import logging
+import sys
+from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List,
+                    Tuple, cast)
 from .lzw import lzwdecode
 from .ascii85 import ascii85decode
 from .ascii85 import asciihexdecode
@@ -10,7 +13,9 @@
 from .psparser import LIT
 from . import settings
 from .utils import apply_png_predictor
-from .utils import isnumber
+
+if TYPE_CHECKING:
+    from .pdfdocument import PDFDocument
 
 
 log = logging.getLogger(__name__)
@@ -28,6 +33,21 @@
 LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
 
 
+if sys.version_info >= (3, 8):
+    from typing import Protocol
+
+    class DecipherCallable(Protocol):
+        """Fully typed a decipher callback, with optional parameter."""
+        def __call__(self, objid: int, genno: int, data: bytes,
+                     attrs: Optional[Dict[str, Any]] = None) -> bytes:
+            raise NotImplementedError
+
+else:  # Fallback for older Python
+    from typing import Callable
+
+    DecipherCallable = Callable[..., bytes]
+
+
 class PDFObject(PSObject):
     pass
 
@@ -54,7 +74,12 @@ class PDFNotImplementedError(PDFException):
 
 class PDFObjRef(PDFObject):
 
-    def __init__(self, doc, objid, _):
+    def __init__(
+        self,
+        doc: Optional["PDFDocument"],
+        objid: int,
+        _: object
+    ) -> None:
         if objid == 0:
             if settings.STRICT:
                 raise PDFValueError('PDF object id cannot be 0.')
@@ -62,17 +87,18 @@ def __init__(self, doc, objid, _):
         self.objid = objid
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<PDFObjRef:%d>' % (self.objid)
 
-    def resolve(self, default=None):
+    def resolve(self, default: object = None) -> Any:
+        assert self.doc is not None
         try:
             return self.doc.getobj(self.objid)
         except PDFObjectNotFound:
             return default
 
 
-def resolve1(x, default=None):
+def resolve1(x: object, default: object = None) -> Any:
     """Resolves an object.
 
     If this is an array or dictionary, it may still contains
@@ -83,7 +109,7 @@ def resolve1(x, default=None):
     return x
 
 
-def resolve_all(x, default=None):
+def resolve_all(x: object, default: object = None) -> Any:
     """Recursively resolves the given object and all the internals.
 
     Make sure there is no indirect reference within the nested object.
@@ -99,7 +125,12 @@ def resolve_all(x, default=None):
     return x
 
 
-def decipher_all(decipher, objid, genno, x):
+def decipher_all(
+    decipher: DecipherCallable,
+    objid: int,
+    genno: int,
+    x: object
+) -> Any:
     """Recursively deciphers the given object.
     """
     if isinstance(x, bytes):
@@ -112,7 +143,7 @@ def decipher_all(decipher, objid, genno, x):
     return x
 
 
-def int_value(x):
+def int_value(x: object) -> int:
     x = resolve1(x)
     if not isinstance(x, int):
         if settings.STRICT:
@@ -121,7 +152,7 @@ def int_value(x):
     return x
 
 
-def float_value(x):
+def float_value(x: object) -> float:
     x = resolve1(x)
     if not isinstance(x, float):
         if settings.STRICT:
@@ -130,34 +161,34 @@ def float_value(x):
     return x
 
 
-def num_value(x):
+def num_value(x: object) -> float:
     x = resolve1(x)
-    if not isnumber(x):
+    if not isinstance(x, (int, float)):  # == utils.isnumber(x)
         if settings.STRICT:
             raise PDFTypeError('Int or Float required: %r' % x)
         return 0
     return x
 
 
-def uint_value(x, n_bits):
+def uint_value(x: object, n_bits: int) -> int:
     """Resolve number and interpret it as a two's-complement unsigned number"""
-    x = int_value(x)
-    if x > 0:
-        return x
+    xi = int_value(x)
+    if xi > 0:
+        return xi
     else:
-        return x + 2**n_bits
+        return xi + cast(int, 2**n_bits)
 
 
-def str_value(x):
+def str_value(x: object) -> bytes:
     x = resolve1(x)
     if not isinstance(x, bytes):
         if settings.STRICT:
             raise PDFTypeError('String required: %r' % x)
-        return ''
+        return b''
     return x
 
 
-def list_value(x):
+def list_value(x: object) -> Union[List[Any], Tuple[Any, ...]]:
     x = resolve1(x)
     if not isinstance(x, (list, tuple)):
         if settings.STRICT:
@@ -166,7 +197,7 @@ def list_value(x):
     return x
 
 
-def dict_value(x):
+def dict_value(x: object) -> Dict[Any, Any]:
     x = resolve1(x)
     if not isinstance(x, dict):
         if settings.STRICT:
@@ -176,7 +207,7 @@ def dict_value(x):
     return x
 
 
-def stream_value(x):
+def stream_value(x: object) -> "PDFStream":
     x = resolve1(x)
     if not isinstance(x, PDFStream):
         if settings.STRICT:
@@ -187,22 +218,27 @@ def stream_value(x):
 
 class PDFStream(PDFObject):
 
-    def __init__(self, attrs, rawdata, decipher=None):
+    def __init__(
+        self,
+        attrs: Dict[str, Any],
+        rawdata: bytes,
+        decipher: Optional[DecipherCallable] = None
+    ) -> None:
         assert isinstance(attrs, dict), str(type(attrs))
         self.attrs = attrs
-        self.rawdata = rawdata
+        self.rawdata: Optional[bytes] = rawdata
         self.decipher = decipher
-        self.data = None
-        self.objid = None
-        self.genno = None
+        self.data: Optional[bytes] = None
+        self.objid: Optional[int] = None
+        self.genno: Optional[int] = None
         return
 
-    def set_objid(self, objid, genno):
+    def set_objid(self, objid: int, genno: int) -> None:
         self.objid = objid
         self.genno = genno
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         if self.data is None:
             assert self.rawdata is not None
             return '<PDFStream(%r): raw=%d, %r>' % \
@@ -212,22 +248,22 @@ def __repr__(self):
             return '<PDFStream(%r): len=%d, %r>' % \
                    (self.objid, len(self.data), self.attrs)
 
-    def __contains__(self, name):
+    def __contains__(self, name: object) -> bool:
         return name in self.attrs
 
-    def __getitem__(self, name):
+    def __getitem__(self, name: str) -> Any:
         return self.attrs[name]
 
-    def get(self, name, default=None):
+    def get(self, name: str, default: object = None) -> Any:
         return self.attrs.get(name, default)
 
-    def get_any(self, names, default=None):
+    def get_any(self, names: Iterable[str], default: object = None) -> Any:
         for name in names:
             if name in self.attrs:
                 return self.attrs[name]
         return default
 
-    def get_filters(self):
+    def get_filters(self) -> List[Tuple[Any, Any]]:
         filters = self.get_any(('F', 'Filter'))
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if not filters:
@@ -248,12 +284,14 @@ def get_filters(self):
         # return list solves https://github.com/pdfminer/pdfminer.six/issues/15
         return list(zip(_filters, params))
 
-    def decode(self):
+    def decode(self) -> None:
         assert self.data is None \
                and self.rawdata is not None, str((self.data, self.rawdata))
         data = self.rawdata
         if self.decipher:
             # Handle encryption
+            assert self.objid is not None
+            assert self.genno is not None
             data = self.decipher(self.objid, self.genno, data, self.attrs)
         filters = self.get_filters()
         if not filters:
@@ -314,10 +352,11 @@ def decode(self):
         self.rawdata = None
         return
 
-    def get_data(self):
+    def get_data(self) -> bytes:
         if self.data is None:
             self.decode()
+            assert self.data is not None
         return self.data
 
-    def get_rawdata(self):
+    def get_rawdata(self) -> Optional[bytes]:
         return self.rawdata
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index 10cf05a7..a05009e4 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -4,7 +4,8 @@
 
 import re
 import logging
-
+from typing import (Any, BinaryIO, Dict, Generic, Iterator, List,
+                    Optional, Tuple, Type, TypeVar, Union)
 
 from . import settings
 from .utils import choplist
@@ -51,10 +52,12 @@ class PSLiteral(PSObject):
     Always use PSLiteralTable.intern().
     """
 
-    def __init__(self, name):
+    NameType = Union[str, bytes]
+
+    def __init__(self, name: NameType) -> None:
         self.name = name
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         name = self.name
         return '/%r' % name
 
@@ -71,31 +74,36 @@ class PSKeyword(PSObject):
     Always use PSKeywordTable.intern().
     """
 
-    def __init__(self, name):
+    def __init__(self, name: bytes) -> None:
         self.name = name
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         name = self.name
         return '/%r' % name
 
 
-class PSSymbolTable:
+_SymbolT = TypeVar('_SymbolT', PSLiteral, PSKeyword)
+
+
+class PSSymbolTable(Generic[_SymbolT]):
     """A utility class for storing PSLiteral/PSKeyword objects.
 
     Interned objects can be checked its identity with "is" operator.
     """
 
-    def __init__(self, klass):
-        self.dict = {}
-        self.klass = klass
+    def __init__(self, klass: Type[_SymbolT]) -> None:
+        self.dict: Dict[PSLiteral.NameType, _SymbolT] = {}
+        self.klass: Type[_SymbolT] = klass
         return
 
-    def intern(self, name):
+    def intern(self, name: PSLiteral.NameType) -> _SymbolT:
         if name in self.dict:
             lit = self.dict[name]
         else:
-            lit = self.klass(name)
+            # Type confusion issue: PSKeyword always takes bytes as name
+            #                       PSLiteral uses either str or bytes
+            lit = self.klass(name)  # type: ignore[arg-type]
             self.dict[name] = lit
         return lit
 
@@ -112,7 +120,7 @@ def intern(self, name):
 KEYWORD_DICT_END = KWD(b'>>')
 
 
-def literal_name(x):
+def literal_name(x: object) -> Any:
     if not isinstance(x, PSLiteral):
         if settings.STRICT:
             raise PSTypeError('Literal required: {!r}'.format(x))
@@ -120,14 +128,15 @@ def literal_name(x):
             name = x
     else:
         name = x.name
-        try:
-            name = str(name, 'utf-8')
-        except Exception:
-            pass
+        if not isinstance(name, str):
+            try:
+                name = str(name, 'utf-8')
+            except Exception:
+                pass
     return name
 
 
-def keyword_name(x):
+def keyword_name(x: object) -> Any:
     if not isinstance(x, PSKeyword):
         if settings.STRICT:
             raise PSTypeError('Keyword required: %r' % x)
@@ -161,32 +170,35 @@ def keyword_name(x):
 }
 
 
+PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes]
+
+
 class PSBaseParser:
 
     """Most basic PostScript parser that performs only tokenization.
     """
     BUFSIZ = 4096
 
-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
         self.fp = fp
         self.seek(0)
         return
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,
                                         self.bufpos)
 
-    def flush(self):
+    def flush(self) -> None:
         return
 
-    def close(self):
+    def close(self) -> None:
         self.flush()
         return
 
-    def tell(self):
+    def tell(self) -> int:
         return self.bufpos+self.charpos
 
-    def poll(self, pos=None, n=80):
+    def poll(self, pos: Optional[int] = None, n: int = 80) -> None:
         pos0 = self.fp.tell()
         if not pos:
             pos = self.bufpos+self.charpos
@@ -195,7 +207,7 @@ def poll(self, pos=None, n=80):
         self.fp.seek(pos0)
         return
 
-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
         """Seeks the parser to the given position.
         """
         log.debug('seek: %r', pos)
@@ -208,10 +220,10 @@ def seek(self, pos):
         self._parse1 = self._parse_main
         self._curtoken = b''
         self._curtokenpos = 0
-        self._tokens = []
+        self._tokens: List[Tuple[int, PSBaseParserToken]] = []
         return
 
-    def fillbuf(self):
+    def fillbuf(self) -> None:
         if self.charpos < len(self.buf):
             return
         # fetch next chunk.
@@ -222,7 +234,7 @@ def fillbuf(self):
         self.charpos = 0
         return
 
-    def nextline(self):
+    def nextline(self) -> Tuple[int, bytes]:
         """Fetches a next line that ends either with \\r or \\n.
         """
         linebuf = b''
@@ -252,7 +264,7 @@ def nextline(self):
 
         return (linepos, linebuf)
 
-    def revreadlines(self):
+    def revreadlines(self) -> Iterator[bytes]:
         """Fetches a next line backword.
 
         This is used to locate the trailers at the end of a file.
@@ -277,7 +289,7 @@ def revreadlines(self):
                 buf = b''
         return
 
-    def _parse_main(self, s, i):
+    def _parse_main(self, s: bytes, i: int) -> int:
         m = NONSPC.search(s, i)
         if not m:
             return len(s)
@@ -321,11 +333,11 @@ def _parse_main(self, s, i):
             self._add_token(KWD(c))
             return j+1
 
-    def _add_token(self, obj):
+    def _add_token(self, obj: PSBaseParserToken) -> None:
         self._tokens.append((self._curtokenpos, obj))
         return
 
-    def _parse_comment(self, s, i):
+    def _parse_comment(self, s: bytes, i: int) -> int:
         m = EOL.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -337,7 +349,7 @@ def _parse_comment(self, s, i):
         # self._tokens.append(self._curtoken)
         return j
 
-    def _parse_literal(self, s, i):
+    def _parse_literal(self, s: bytes, i: int) -> int:
         m = END_LITERAL.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -350,14 +362,14 @@ def _parse_literal(self, s, i):
             self._parse1 = self._parse_literal_hex
             return j+1
         try:
-            self._curtoken = str(self._curtoken, 'utf-8')
+            name: Union[str, bytes] = str(self._curtoken, 'utf-8')
         except Exception:
-            pass
-        self._add_token(LIT(self._curtoken))
+            name = self._curtoken
+        self._add_token(LIT(name))
         self._parse1 = self._parse_main
         return j
 
-    def _parse_literal_hex(self, s, i):
+    def _parse_literal_hex(self, s: bytes, i: int) -> int:
         c = s[i:i+1]
         if HEX.match(c) and len(self.hex) < 2:
             self.hex += c
@@ -367,7 +379,7 @@ def _parse_literal_hex(self, s, i):
         self._parse1 = self._parse_literal
         return i
 
-    def _parse_number(self, s, i):
+    def _parse_number(self, s: bytes, i: int) -> int:
         m = END_NUMBER.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -386,7 +398,7 @@ def _parse_number(self, s, i):
         self._parse1 = self._parse_main
         return j
 
-    def _parse_float(self, s, i):
+    def _parse_float(self, s: bytes, i: int) -> int:
         m = END_NUMBER.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -400,7 +412,7 @@ def _parse_float(self, s, i):
         self._parse1 = self._parse_main
         return j
 
-    def _parse_keyword(self, s, i):
+    def _parse_keyword(self, s: bytes, i: int) -> int:
         m = END_KEYWORD.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -408,7 +420,7 @@ def _parse_keyword(self, s, i):
         j = m.start(0)
         self._curtoken += s[i:j]
         if self._curtoken == b'true':
-            token = True
+            token: Union[bool, PSKeyword] = True
         elif self._curtoken == b'false':
             token = False
         else:
@@ -417,7 +429,7 @@ def _parse_keyword(self, s, i):
         self._parse1 = self._parse_main
         return j
 
-    def _parse_string(self, s, i):
+    def _parse_string(self, s: bytes, i: int) -> int:
         m = END_STRING.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -443,7 +455,7 @@ def _parse_string(self, s, i):
         self._parse1 = self._parse_main
         return j+1
 
-    def _parse_string_1(self, s, i):
+    def _parse_string_1(self, s: bytes, i: int) -> int:
         """Parse literal strings
 
         PDF Reference 3.2.3
@@ -470,7 +482,7 @@ def _parse_string_1(self, s, i):
         self._parse1 = self._parse_string
         return i+1
 
-    def _parse_wopen(self, s, i):
+    def _parse_wopen(self, s: bytes, i: int) -> int:
         c = s[i:i+1]
         if c == b'<':
             self._add_token(KEYWORD_DICT_BEGIN)
@@ -480,7 +492,7 @@ def _parse_wopen(self, s, i):
             self._parse1 = self._parse_hexstring
         return i
 
-    def _parse_wclose(self, s, i):
+    def _parse_wclose(self, s: bytes, i: int) -> int:
         c = s[i:i+1]
         if c == b'>':
             self._add_token(KEYWORD_DICT_END)
@@ -488,7 +500,7 @@ def _parse_wclose(self, s, i):
         self._parse1 = self._parse_main
         return i
 
-    def _parse_hexstring(self, s, i):
+    def _parse_hexstring(self, s: bytes, i: int) -> int:
         m = END_HEX_STRING.search(s, i)
         if not m:
             self._curtoken += s[i:]
@@ -501,7 +513,7 @@ def _parse_hexstring(self, s, i):
         self._parse1 = self._parse_main
         return j
 
-    def nexttoken(self):
+    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
         while not self._tokens:
             self.fillbuf()
             self.charpos = self._parse1(self.buf, self.charpos)
@@ -510,39 +522,51 @@ def nexttoken(self):
         return token
 
 
-class PSStackParser(PSBaseParser):
-    def __init__(self, fp):
+# Stack slots may by occupied by any of:
+#  * the PSBaseParserToken types
+#  * list (via KEYWORD_ARRAY)
+#  * dict (via KEYWORD_DICT)
+#  * subclass-specific extensions (e.g. PDFStream, PDFObjRef) via ExtraT
+ExtraT = TypeVar("ExtraT")
+PSStackType = Union[float, bool, PSLiteral, bytes, List, Dict, ExtraT]
+PSStackEntry = Tuple[int, PSStackType[ExtraT]]
+
+
+class PSStackParser(PSBaseParser, Generic[ExtraT]):
+
+    def __init__(self, fp: BinaryIO) -> None:
         PSBaseParser.__init__(self, fp)
         self.reset()
         return
 
-    def reset(self):
-        self.context = []
-        self.curtype = None
-        self.curstack = []
-        self.results = []
+    def reset(self) -> None:
+        self.context: List[Tuple[int, Optional[str],
+                           List[PSStackEntry[ExtraT]]]] = []
+        self.curtype: Optional[str] = None
+        self.curstack: List[PSStackEntry[ExtraT]] = []
+        self.results: List[PSStackEntry[ExtraT]] = []
         return
 
-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
         PSBaseParser.seek(self, pos)
         self.reset()
         return
 
-    def push(self, *objs):
+    def push(self, *objs: PSStackEntry[ExtraT]) -> None:
         self.curstack.extend(objs)
         return
 
-    def pop(self, n):
+    def pop(self, n: int) -> List[PSStackEntry[ExtraT]]:
         objs = self.curstack[-n:]
         self.curstack[-n:] = []
         return objs
 
-    def popall(self):
+    def popall(self) -> List[PSStackEntry[ExtraT]]:
         objs = self.curstack
         self.curstack = []
         return objs
 
-    def add_results(self, *objs):
+    def add_results(self, *objs: PSStackEntry[ExtraT]) -> None:
         try:
             log.debug('add_results: %r', objs)
         except Exception:
@@ -550,13 +574,13 @@ def add_results(self, *objs):
         self.results.extend(objs)
         return
 
-    def start_type(self, pos, type):
+    def start_type(self, pos: int, type: str) -> None:
         self.context.append((pos, self.curtype, self.curstack))
         (self.curtype, self.curstack) = (type, [])
         log.debug('start_type: pos=%r, type=%r', pos, type)
         return
 
-    def end_type(self, type):
+    def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
         if self.curtype != type:
             raise PSTypeError('Type mismatch: {!r} != {!r}'
                               .format(self.curtype, type))
@@ -565,10 +589,10 @@ def end_type(self, type):
         log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
         return (pos, objs)
 
-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
         return
 
-    def nextobject(self):
+    def nextobject(self) -> PSStackEntry[ExtraT]:
         """Yields a list of objects.
 
         Arrays and dictionaries are represented as Python lists and
diff --git a/pdfminer/runlength.py b/pdfminer/runlength.py
index f8ea228d..b79e18e6 100644
--- a/pdfminer/runlength.py
+++ b/pdfminer/runlength.py
@@ -6,7 +6,7 @@
 #
 
 
-def rldecode(data):
+def rldecode(data: bytes) -> bytes:
     """
     RunLength decoder (Adobe version) implementation based on PDF Reference
     version 1.4 section 3.3.4:
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index 4aabb52d..a5cf0334 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -4,8 +4,15 @@
 import io
 import pathlib
 import struct
+from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
+                    List, Optional, Set, TextIO, Tuple, TypeVar, Union,
+                    TYPE_CHECKING, cast)
+from typing_extensions import Literal
 from html import escape
 
+if TYPE_CHECKING:
+    from .layout import LTComponent
+
 import chardet  # For str encoding detection
 
 # from sys import maxint as INF doesn't work anymore under Python3, but PDF
@@ -13,40 +20,54 @@
 INF = (1 << 31) - 1
 
 
+FileOrName = Union[pathlib.PurePath, str, io.IOBase]
+AnyIO = Union[TextIO, BinaryIO]
+
+
 class open_filename(object):
     """
     Context manager that allows opening a filename
     (str or pathlib.PurePath type is supported) and closes it on exit,
     (just like `open`), but does nothing for file-like objects.
     """
-    def __init__(self, filename, *args, **kwargs):
+    def __init__(
+        self,
+        filename: FileOrName,
+        *args: Any,
+        **kwargs: Any
+    ) -> None:
         if isinstance(filename, pathlib.PurePath):
             filename = str(filename)
         if isinstance(filename, str):
-            self.file_handler = open(filename, *args, **kwargs)
+            self.file_handler: AnyIO = open(filename, *args, **kwargs)
             self.closing = True
         elif isinstance(filename, io.IOBase):
-            self.file_handler = filename
+            self.file_handler = cast(AnyIO, filename)
             self.closing = False
         else:
             raise TypeError('Unsupported input type: %s' % type(filename))
 
-    def __enter__(self):
+    def __enter__(self) -> AnyIO:
         return self.file_handler
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: object,
+        exc_val: object,
+        exc_tb: object
+    ) -> Literal[False]:
         if self.closing:
             self.file_handler.close()
         return False
 
 
-def make_compat_bytes(in_str):
+def make_compat_bytes(in_str: str) -> bytes:
     "Converts to bytes, encoding to unicode."
     assert isinstance(in_str, str), str(type(in_str))
     return in_str.encode()
 
 
-def make_compat_str(o):
+def make_compat_str(o: object) -> str:
     """Converts everything to string, if bytes guessing the encoding."""
     if isinstance(o, bytes):
         enc = chardet.detect(o)
@@ -55,7 +76,7 @@ def make_compat_str(o):
         return str(o)
 
 
-def shorten_str(s, size):
+def shorten_str(s: str, size: int) -> str:
     if size < 7:
         return s[:size]
     if len(s) > size:
@@ -65,8 +86,11 @@ def shorten_str(s, size):
         return s
 
 
-def compatible_encode_method(bytesorstring, encoding='utf-8',
-                             erraction='ignore'):
+def compatible_encode_method(
+    bytesorstring: Union[bytes, str],
+    encoding: str = 'utf-8',
+    erraction: str = 'ignore'
+) -> str:
     """When Py2 str.encode is called, it often means bytes.encode in Py3.
 
      This does either.
@@ -77,7 +101,7 @@ def compatible_encode_method(bytesorstring, encoding='utf-8',
     return bytesorstring.decode(encoding, erraction)
 
 
-def paeth_predictor(left, above, upper_left):
+def paeth_predictor(left: int, above: int, upper_left: int) -> int:
     # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
     # Initial estimate
     p = left + above - upper_left
@@ -95,7 +119,13 @@ def paeth_predictor(left, above, upper_left):
         return upper_left
 
 
-def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
+def apply_png_predictor(
+    pred: int,
+    colors: int,
+    columns: int,
+    bitspercomponent: int,
+    data: bytes
+) -> bytes:
     """Reverse the effect of the PNG predictor
 
     Documentation: http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
@@ -190,11 +220,20 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
     return buf
 
 
+Point = Tuple[float, float]
+Rect = Tuple[float, float, float, float]
+Matrix = Tuple[float, float, float, float, float, float]
+PathSegment = Union[
+    Tuple[str],                                             # Literal['h']
+    Tuple[str, float, float],                               # Literal['m', 'l']
+    Tuple[str, float, float, float, float],                 # Literal['v', 'y']
+    Tuple[str, float, float, float, float, float, float]]   # Literal['c']
+
 #  Matrix operations
-MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
+MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)
 
 
-def mult_matrix(m1, m0):
+def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
     (a1, b1, c1, d1, e1, f1) = m1
     (a0, b0, c0, d0, e0, f0) = m0
     """Returns the multiplication of two matrices."""
@@ -203,21 +242,21 @@ def mult_matrix(m1, m0):
             a0 * e1 + c0 * f1 + e0, b0 * e1 + d0 * f1 + f0)
 
 
-def translate_matrix(m, v):
+def translate_matrix(m: Matrix, v: Point) -> Matrix:
     """Translates a matrix by (x, y)."""
     (a, b, c, d, e, f) = m
     (x, y) = v
     return a, b, c, d, x * a + y * c + e, x * b + y * d + f
 
 
-def apply_matrix_pt(m, v):
+def apply_matrix_pt(m: Matrix, v: Point) -> Point:
     (a, b, c, d, e, f) = m
     (x, y) = v
     """Applies a matrix to a point."""
     return a * x + c * y + e, b * x + d * y + f
 
 
-def apply_matrix_norm(m, v):
+def apply_matrix_norm(m: Matrix, v: Point) -> Point:
     """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
     (a, b, c, d, e, f) = m
     (p, q) = v
@@ -226,11 +265,14 @@ def apply_matrix_norm(m, v):
 
 #  Utility functions
 
-def isnumber(x):
+def isnumber(x: object) -> bool:
     return isinstance(x, (int, float))
 
 
-def uniq(objs):
+_T = TypeVar('_T')
+
+
+def uniq(objs: Iterable[_T]) -> Iterator[_T]:
     """Eliminates duplicated elements."""
     done = set()
     for obj in objs:
@@ -241,7 +283,10 @@ def uniq(objs):
     return
 
 
-def fsplit(pred, objs):
+def fsplit(
+    pred: Callable[[_T], bool],
+    objs: Iterable[_T]
+) -> Tuple[List[_T], List[_T]]:
     """Split a list into two classes according to the predicate."""
     t = []
     f = []
@@ -253,14 +298,15 @@ def fsplit(pred, objs):
     return t, f
 
 
-def drange(v0, v1, d):
+def drange(v0: float, v1: float, d: int) -> range:
     """Returns a discrete range."""
     return range(int(v0) // d, int(v1 + d) // d)
 
 
-def get_bound(pts):
+def get_bound(pts: Iterable[Point]) -> Rect:
     """Compute a minimal rectangle that covers all the points."""
-    (x0, y0, x1, y1) = (INF, INF, -INF, -INF)
+    limit: Rect = (INF, INF, -INF, -INF)
+    (x0, y0, x1, y1) = limit
     for (x, y) in pts:
         x0 = min(x0, x)
         y0 = min(y0, y)
@@ -269,7 +315,11 @@ def get_bound(pts):
     return x0, y0, x1, y1
 
 
-def pick(seq, func, maxobj=None):
+def pick(
+    seq: Iterable[_T],
+    func: Callable[[_T], float],
+    maxobj: Optional[_T] = None
+) -> Optional[_T]:
     """Picks the object obj where func(obj) has the highest value."""
     maxscore = None
     for obj in seq:
@@ -279,7 +329,7 @@ def pick(seq, func, maxobj=None):
     return maxobj
 
 
-def choplist(n, seq):
+def choplist(n: int, seq: Iterable[_T]) -> Iterator[Tuple[_T, ...]]:
     """Groups every n elements of the list."""
     r = []
     for x in seq:
@@ -290,7 +340,7 @@ def choplist(n, seq):
     return
 
 
-def nunpack(s, default=0):
+def nunpack(s: bytes, default: int = 0) -> int:
     """Unpacks 1 to 4 or 8 byte integers (big endian)."""
     length = len(s)
     if not length:
@@ -298,13 +348,13 @@ def nunpack(s, default=0):
     elif length == 1:
         return ord(s)
     elif length == 2:
-        return struct.unpack('>H', s)[0]
+        return cast(int, struct.unpack('>H', s)[0])
     elif length == 3:
-        return struct.unpack('>L', b'\x00' + s)[0]
+        return cast(int, struct.unpack('>L', b'\x00' + s)[0])
     elif length == 4:
-        return struct.unpack('>L', s)[0]
+        return cast(int, struct.unpack('>L', s)[0])
     elif length == 8:
-        return struct.unpack('>Q', s)[0]
+        return cast(int, struct.unpack('>Q', s)[0])
     else:
         raise TypeError('invalid length: %d' % length)
 
@@ -345,7 +395,7 @@ def nunpack(s, default=0):
 ))
 
 
-def decode_text(s):
+def decode_text(s: bytes) -> str:
     """Decodes a PDFDocEncoding string to Unicode."""
     if s.startswith(b'\xfe\xff'):
         return str(s[2:], 'utf-16be', 'ignore')
@@ -353,25 +403,25 @@ def decode_text(s):
         return ''.join(PDFDocEncoding[c] for c in s)
 
 
-def enc(x):
+def enc(x: str) -> str:
     """Encodes a string for SGML/XML/HTML"""
     if isinstance(x, bytes):
         return ''
     return escape(x)
 
 
-def bbox2str(bbox):
+def bbox2str(bbox: Rect) -> str:
     (x0, y0, x1, y1) = bbox
     return '{:.3f},{:.3f},{:.3f},{:.3f}'.format(x0, y0, x1, y1)
 
 
-def matrix2str(m):
+def matrix2str(m: Matrix) -> str:
     (a, b, c, d, e, f) = m
     return '[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]'\
         .format(a, b, c, d, e, f)
 
 
-def vecBetweenBoxes(obj1, obj2):
+def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
     """A distance function between two TextBoxes.
 
     Consider the bounding rectangle for obj1 and obj2.
@@ -397,7 +447,10 @@ def vecBetweenBoxes(obj1, obj2):
         return max(0, iw), max(0, ih)
 
 
-class Plane:
+LTComponentT = TypeVar('LTComponentT', bound='LTComponent')
+
+
+class Plane(Generic[LTComponentT]):
     """A set-like data structure for objects placed on a plane.
 
     Can efficiently find objects in a certain rectangular area.
@@ -405,26 +458,26 @@ class Plane:
     which is sorted by its x or y coordinate.
     """
 
-    def __init__(self, bbox, gridsize=50):
-        self._seq = []  # preserve the object order.
-        self._objs = set()
-        self._grid = {}
+    def __init__(self, bbox: Rect, gridsize: int = 50) -> None:
+        self._seq: List[LTComponentT] = []  # preserve the object order.
+        self._objs: Set[LTComponentT] = set()
+        self._grid: Dict[Point, List[LTComponentT]] = {}
         self.gridsize = gridsize
         (self.x0, self.y0, self.x1, self.y1) = bbox
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return '<Plane objs=%r>' % list(self)
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[LTComponentT]:
         return (obj for obj in self._seq if obj in self._objs)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._objs)
 
-    def __contains__(self, obj):
+    def __contains__(self, obj: object) -> bool:
         return obj in self._objs
 
-    def _getrange(self, bbox):
+    def _getrange(self, bbox: Rect) -> Iterator[Point]:
         (x0, y0, x1, y1) = bbox
         if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0:
             return
@@ -436,15 +489,15 @@ def _getrange(self, bbox):
             for grid_x in drange(x0, x1, self.gridsize):
                 yield (grid_x, grid_y)
 
-    def extend(self, objs):
+    def extend(self, objs: Iterable[LTComponentT]) -> None:
         for obj in objs:
             self.add(obj)
 
-    def add(self, obj):
+    def add(self, obj: LTComponentT) -> None:
         """place an object."""
         for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
             if k not in self._grid:
-                r = []
+                r: List[LTComponentT] = []
                 self._grid[k] = r
             else:
                 r = self._grid[k]
@@ -452,7 +505,7 @@ def add(self, obj):
         self._seq.append(obj)
         self._objs.add(obj)
 
-    def remove(self, obj):
+    def remove(self, obj: LTComponentT) -> None:
         """displace an object."""
         for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
             try:
@@ -461,7 +514,7 @@ def remove(self, obj):
                 pass
         self._objs.remove(obj)
 
-    def find(self, bbox):
+    def find(self, bbox: Rect) -> Iterator[LTComponentT]:
         """finds objects that are in a certain area."""
         (x0, y0, x1, y1) = bbox
         done = set()
diff --git a/setup.py b/setup.py
index 941a3548..ce94b5f0 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
         'cryptography',
     ],
     extras_require={
-        "dev": ["nose", "tox"],
+        "dev": ["nose", "tox", "mypy == 0.910"],
         "docs": ["sphinx", "sphinx-argparse"],
     },
     description='PDF parser and analyzer',
diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py
index 8fcb7691..df1dc25e 100644
--- a/tests/test_tools_dumppdf.py
+++ b/tests/test_tools_dumppdf.py
@@ -1,5 +1,5 @@
 import warnings
-
+from nose.tools import raises
 from helpers import absolute_sample_path
 from tempfilepath import TemporaryFilePath
 from pdfminer.pdfdocument import PDFNoValidXRefWarning
@@ -51,3 +51,13 @@ def test_5(self):
 
     def test_6(self):
         run('nonfree/naacl06-shinyama.pdf', '-t -a')
+
+    @raises(TypeError)
+    def test_simple1_raw(self):
+        """Known issue: crash in dumpxml writing binary to text stream."""
+        run('simple1.pdf', '-r -a')
+
+    @raises(TypeError)
+    def test_simple1_binary(self):
+        """Known issue: crash in dumpxml writing binary to text stream."""
+        run('simple1.pdf', '-b -a')
diff --git a/tools/conv_afm.py b/tools/conv_afm.py
index 32cea90c..07f7ebfe 100755
--- a/tools/conv_afm.py
+++ b/tools/conv_afm.py
@@ -42,4 +42,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
diff --git a/tools/conv_cmap.py b/tools/conv_cmap.py
index 3f782c8d..7ce0aef1 100755
--- a/tools/conv_cmap.py
+++ b/tools/conv_cmap.py
@@ -199,4 +199,4 @@ def usage():
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
diff --git a/tools/conv_glyphlist.py b/tools/conv_glyphlist.py
index f94dcc8c..dc65f509 100755
--- a/tools/conv_glyphlist.py
+++ b/tools/conv_glyphlist.py
@@ -24,4 +24,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 8724c815..ffdf4241 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -4,6 +4,8 @@
 import os.path
 import re
 import sys
+from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
+    Union, cast
 import warnings
 from argparse import ArgumentParser
 
@@ -22,13 +24,15 @@
 ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
 
 
-def escape(s):
+def escape(s: Union[str, bytes]) -> str:
     if isinstance(s, bytes):
-        s = str(s, 'latin-1')
-    return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), s)
+        us = str(s, 'latin-1')
+    else:
+        us = s
+    return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), us)
 
 
-def dumpxml(out, obj, codec=None):
+def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
     if obj is None:
         out.write('<null />')
         return
@@ -51,15 +55,17 @@ def dumpxml(out, obj, codec=None):
         out.write('</list>')
         return
 
-    if isinstance(obj, ((str,), bytes)):
+    if isinstance(obj, (str, bytes)):
         out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
         return
 
     if isinstance(obj, PDFStream):
         if codec == 'raw':
-            out.write(obj.get_rawdata())
+            # Bug: writing bytes to text I/O. This will raise TypeError.
+            out.write(obj.get_rawdata())  # type: ignore [arg-type]
         elif codec == 'binary':
-            out.write(obj.get_data())
+            # Bug: writing bytes to text I/O. This will raise TypeError.
+            out.write(obj.get_data())  # type: ignore [arg-type]
         else:
             out.write('<stream>\n<props>\n')
             dumpxml(out, obj.attrs)
@@ -76,11 +82,15 @@ def dumpxml(out, obj, codec=None):
         return
 
     if isinstance(obj, PSKeyword):
-        out.write('<keyword>%s</keyword>' % obj.name)
+        # Likely bug: obj.name is bytes, not str
+        out.write('<keyword>%s</keyword>'
+                  % obj.name)  # type: ignore [str-bytes-safe]
         return
 
     if isinstance(obj, PSLiteral):
-        out.write('<literal>%s</literal>' % obj.name)
+        # Likely bug: obj.name may be bytes, not str
+        out.write('<literal>%s</literal>'
+                  % obj.name)  # type: ignore [str-bytes-safe]
         return
 
     if isnumber(obj):
@@ -90,11 +100,15 @@ def dumpxml(out, obj, codec=None):
     raise TypeError(obj)
 
 
-def dumptrailers(out, doc, show_fallback_xref=False):
+def dumptrailers(
+    out: TextIO,
+    doc: PDFDocument,
+    show_fallback_xref: bool = False
+) -> None:
     for xref in doc.xrefs:
         if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
             out.write('<trailer>\n')
-            dumpxml(out, xref.trailer)
+            dumpxml(out, xref.get_trailer())
             out.write('\n</trailer>\n\n')
     no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
     if no_xrefs and not show_fallback_xref:
@@ -105,7 +119,12 @@ def dumptrailers(out, doc, show_fallback_xref=False):
     return
 
 
-def dumpallobjs(out, doc, codec=None, show_fallback_xref=False):
+def dumpallobjs(
+    out: TextIO,
+    doc: PDFDocument,
+    codec: Optional[str] = None,
+    show_fallback_xref: bool = False
+) -> None:
     visited = set()
     out.write('<pdf>')
     for xref in doc.xrefs:
@@ -127,15 +146,23 @@ def dumpallobjs(out, doc, codec=None, show_fallback_xref=False):
     return
 
 
-def dumpoutline(outfp, fname, objids, pagenos, password='',
-                dumpall=False, codec=None, extractdir=None):
+def dumpoutline(
+    outfp: TextIO,
+    fname: str,
+    objids: Any,
+    pagenos: Container[int],
+    password: str = '',
+    dumpall: bool = False,
+    codec: Optional[str] = None,
+    extractdir: Optional[str] = None
+) -> None:
     fp = open(fname, 'rb')
     parser = PDFParser(fp)
     doc = PDFDocument(parser, password)
     pages = {page.pageid: pageno for (pageno, page)
              in enumerate(PDFPage.create_pages(doc), 1)}
 
-    def resolve_dest(dest):
+    def resolve_dest(dest: object) -> Any:
         if isinstance(dest, (str, bytes)):
             dest = resolve1(doc.get_dest(dest))
         elif isinstance(dest, PSLiteral):
@@ -183,10 +210,10 @@ def resolve_dest(dest):
 LITERAL_EMBEDDEDFILE = LIT('EmbeddedFile')
 
 
-def extractembedded(outfp, fname, objids, pagenos, password='',
-                    dumpall=False, codec=None, extractdir=None):
-    def extract1(objid, obj):
-        filename = os.path.basename(obj.get('UF') or obj.get('F').decode())
+def extractembedded(fname: str, password: str, extractdir: str) -> None:
+    def extract1(objid: int, obj: Dict[str, Any]) -> None:
+        filename = os.path.basename(obj.get('UF') or
+                                    cast(bytes, obj.get('F')).decode())
         fileref = obj['EF'].get('UF') or obj['EF'].get('F')
         fileobj = doc.getobj(fileref.objid)
         if not isinstance(fileobj, PDFStream):
@@ -221,8 +248,17 @@ def extract1(objid, obj):
     return
 
 
-def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False,
-            codec=None, extractdir=None, show_fallback_xref=False):
+def dumppdf(
+    outfp: TextIO,
+    fname: str,
+    objids: Iterable[int],
+    pagenos: Container[int],
+    password: str = '',
+    dumpall: bool = False,
+    codec: Optional[str] = None,
+    extractdir: Optional[str] = None,
+    show_fallback_xref: bool = False
+) -> None:
     fp = open(fname, 'rb')
     parser = PDFParser(fp)
     doc = PDFDocument(parser, password)
@@ -249,7 +285,7 @@ def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False,
     return
 
 
-def create_parser():
+def create_parser() -> ArgumentParser:
     parser = ArgumentParser(description=__doc__, add_help=True)
     parser.add_argument('files', type=str, default=None, nargs='+',
                         help='One or more paths to PDF files.')
@@ -313,7 +349,7 @@ def create_parser():
     return parser
 
 
-def main(argv=None):
+def main(argv: Optional[List[str]] = None) -> None:
     parser = create_parser()
     args = parser.parse_args(args=argv)
 
@@ -340,7 +376,7 @@ def main(argv=None):
     password = args.password
 
     if args.raw_stream:
-        codec = 'raw'
+        codec: Optional[str] = 'raw'
     elif args.binary_stream:
         codec = 'binary'
     elif args.text_stream:
@@ -356,8 +392,7 @@ def main(argv=None):
             )
         elif args.extract_embedded:
             extractembedded(
-                outfp, fname, objids, pagenos, password=password,
-                dumpall=args.all, codec=codec, extractdir=args.extract_embedded
+                fname, password=password, extractdir=args.extract_embedded
             )
         else:
             dumppdf(
@@ -370,4 +405,4 @@ def main(argv=None):
 
 
 if __name__ == '__main__':
-    sys.exit(main())
+    main()
diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py
index dcaef0e6..47e2c79d 100755
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@@ -4,9 +4,12 @@
 import argparse
 import logging
 import sys
+from typing import Any, Container, Iterable, List, Optional, Union
+from typing_extensions import Literal
 
 import pdfminer.high_level
-import pdfminer.layout
+from pdfminer.layout import LAParams
+from pdfminer.utils import AnyIO
 
 logging.basicConfig()
 
@@ -15,24 +18,42 @@
                 (".xml", "xml"),
                 (".tag", "tag"))
 
+FloatOrDisabled = Union[float, Literal["disabled"]]
 
-def float_or_disabled(x):
+
+def float_or_disabled(x: str) -> FloatOrDisabled:
     if x.lower().strip() == "disabled":
-        return x
+        return "disabled"
     try:
-        x = float(x)
+        return float(x)
     except ValueError:
         raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
 
 
-def extract_text(files=[], outfile='-',
-                 no_laparams=False, all_texts=None, detect_vertical=None,
-                 word_margin=None, char_margin=None, line_margin=None,
-                 boxes_flow=None, output_type='text', codec='utf-8',
-                 strip_control=False, maxpages=0, page_numbers=None,
-                 password="", scale=1.0, rotation=0, layoutmode='normal',
-                 output_dir=None, debug=False, disable_caching=False,
-                 **kwargs):
+def extract_text(
+    files: Iterable[str] = [],
+    outfile: str = '-',
+    no_laparams: bool = False,
+    all_texts: Optional[bool] = None,
+    detect_vertical: Optional[bool] = None,
+    word_margin: Optional[float] = None,
+    char_margin: Optional[float] = None,
+    line_margin: Optional[float] = None,
+    boxes_flow: Optional[FloatOrDisabled] = None,
+    output_type: str = 'text',
+    codec: str = 'utf-8',
+    strip_control: bool = False,
+    maxpages: int = 0,
+    page_numbers: Optional[Container[int]] = None,
+    password: str = "",
+    scale: float = 1.0,
+    rotation: int = 0,
+    layoutmode: str = 'normal',
+    output_dir: Optional[str] = None,
+    debug: bool = False,
+    disable_caching: bool = False,
+    **kwargs: Any
+) -> AnyIO:
     if not files:
         raise ValueError("Must provide files to work upon!")
 
@@ -40,7 +61,7 @@ def extract_text(files=[], outfile='-',
     # create an LAParams object and
     # populate with given args. Otherwise, set it to None.
     if not no_laparams:
-        laparams = pdfminer.layout.LAParams()
+        laparams: Optional[LAParams] = LAParams()
         for param in ("all_texts", "detect_vertical", "word_margin",
                       "char_margin", "line_margin", "boxes_flow"):
             paramv = locals().get(param, None)
@@ -55,8 +76,8 @@ def extract_text(files=[], outfile='-',
                 output_type = alttype
 
     if outfile == "-":
-        outfp = sys.stdout
-        if outfp.encoding is not None:
+        outfp: AnyIO = sys.stdout
+        if sys.stdout.encoding is not None:
             codec = 'utf-8'
     else:
         outfp = open(outfile, "wb")
@@ -67,7 +88,7 @@ def extract_text(files=[], outfile='-',
     return outfp
 
 
-def maketheparser():
+def maketheparser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(description=__doc__, add_help=True)
     parser.add_argument(
         "files", type=str, default=None, nargs="+",
@@ -180,7 +201,7 @@ def maketheparser():
 # main
 
 
-def main(args=None):
+def main(args: Optional[List[str]] = None) -> int:
 
     P = maketheparser()
     A = P.parse_args(args=args)
diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py
index 68478fb7..1be0723a 100644
--- a/tools/pdfdiff.py
+++ b/tools/pdfdiff.py
@@ -6,6 +6,7 @@
 import io
 import logging
 import sys
+from typing import Any, Iterable, List, Optional
 
 import pdfminer.settings
 from pdfminer import high_level, layout
@@ -16,7 +17,7 @@
 logging.basicConfig()
 
 
-def compare(file1, file2, **kwargs):
+def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]:
     # If any LAParams group arguments were passed,
     # create an LAParams object and
     # populate with given args. Otherwise, set it to None.
@@ -26,7 +27,7 @@ def compare(file1, file2, **kwargs):
                       "char_margin", "line_margin", "boxes_flow"):
             paramv = kwargs.get(param, None)
             if paramv is not None:
-                laparams[param] = paramv
+                setattr(laparams, param, paramv)
         kwargs['laparams'] = laparams
 
     s1 = io.StringIO()
@@ -40,20 +41,20 @@ def compare(file1, file2, **kwargs):
     import difflib
     s1.seek(0)
     s2.seek(0)
-    s1, s2 = s1.readlines(), s2.readlines()
+    s1_lines, s2_lines = s1.readlines(), s2.readlines()
 
     import os.path
     try:
         extension = os.path.splitext(kwargs['outfile'])[1][1:4]
         if extension.lower() == 'htm':
-            return difflib.HtmlDiff().make_file(s1, s2)
+            return difflib.HtmlDiff().make_file(s1_lines, s2_lines)
     except KeyError:
         pass
-    return difflib.unified_diff(s1, s2, n=kwargs['context_lines'])
+    return difflib.unified_diff(s1_lines, s2_lines, n=kwargs['context_lines'])
 
 
 # main
-def main(args=None):
+def main(args: Optional[List[str]] = None) -> int:
     import argparse
     P = argparse.ArgumentParser(description=__doc__)
     P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
diff --git a/tools/pdfstats.py b/tools/pdfstats.py
index 943574d8..9bf34720 100755
--- a/tools/pdfstats.py
+++ b/tools/pdfstats.py
@@ -7,10 +7,11 @@
 import sys
 import os
 import collections
+from typing import Any, Counter, Iterator, List
 
 from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
+from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
+from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
 from pdfminer.converter import PDFPageAggregator
 from pdfminer.layout import LAParams, LTContainer
@@ -19,18 +20,18 @@
 _, SCRIPT = os.path.split(__file__)
 
 
-def msg(*args, **kwargs):
+def msg(*args: object, **kwargs: Any) -> None:
     print(' '.join(map(str, args)), **kwargs)  # noqa E999
 
 
-def flat_iter(obj):
+def flat_iter(obj: object) -> Iterator[object]:
     yield obj
     if isinstance(obj, LTContainer):
         for ob in obj:
             yield from flat_iter(ob)
 
 
-def main(args):
+def main(args: List[str]) -> int:
     msg(SCRIPT, args)
 
     if len(args) != 1:
@@ -40,7 +41,7 @@ def main(args):
 
     infilename, = args
 
-    lt_types = collections.Counter()
+    lt_types: Counter[str] = collections.Counter()
 
     with open(infilename, 'rb') as pdf_file:
 
@@ -77,6 +78,8 @@ def main(args):
     msg('page_count', page_count)
     msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items()))
 
+    return 0
+
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv[1:]))
diff --git a/tools/prof.py b/tools/prof.py
index 1654a985..18803a7c 100644
--- a/tools/prof.py
+++ b/tools/prof.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 import sys
+from typing import List
 
 
-def prof_main(argv):
-    import hotshot.stats
+def prof_main(argv: List[str]) -> int:
+    import hotshot.stats  # type: ignore[import]
 
-    def usage():
+    def usage() -> int:
         print('usage: %s module.function [args ...]' % argv[0])
         return 100
     args = argv[1:]
@@ -15,19 +16,24 @@ def usage():
     prof = name+'.prof'
     i = name.rindex('.')
     (modname, funcname) = (name[:i], name[i+1:])
-    module = __import__(modname, fromlist=1)
+
+    # Type error: fromlist expects sequence of strings; presumably the intent
+    # is to retrieve the named module rather than a top-level package (as in
+    # "when a non-empty fromlist argument is given...").
+    module = __import__(modname, fromlist=1)  # type: ignore[arg-type]
+
     func = getattr(module, funcname)
     if args:
         args.insert(0, argv[0])
-        prof = hotshot.Profile(prof)
-        prof.runcall(lambda: func(args))
-        prof.close()
+        profile = hotshot.Profile(prof)
+        profile.runcall(lambda: func(args))
+        profile.close()
     else:
         stats = hotshot.stats.load(prof)
         stats.strip_dirs()
         stats.sort_stats('time', 'calls')
         stats.print_stats(1000)
-    return
+    return 0
 
 
 if __name__ == '__main__':
diff --git a/tox.ini b/tox.ini
index 1908d96d..2a25d505 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,6 +9,7 @@ whitelist_externals =
     flake8
 commands =
     flake8 pdfminer/ tools/ tests/ --count --statistics
+    mypy --install-types --non-interactive --show-error-codes .
     nosetests --nologcapture
     python -m sphinx -b html docs/source docs/build/html
     python -m sphinx -b doctest docs/source docs/build/doctest