Skip to content

Commit

Permalink
WIP on type annotations
Browse files Browse the repository at this point in the history
With the possible exception of psparser.py, this is far from complete.

$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
  • Loading branch information
0xabu committed Aug 20, 2021
1 parent 19c1372 commit 17d59f4
Show file tree
Hide file tree
Showing 10 changed files with 270 additions and 221 deletions.
11 changes: 6 additions & 5 deletions pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pickle as pickle
import struct
import logging
from typing import Any, Dict, List
from .psparser import PSStackParser
from .psparser import PSSyntaxError
from .psparser import PSEOF
Expand Down Expand Up @@ -209,14 +210,14 @@ def __init__(self, name, module, vertical):

class CMapDB:

_cmap_cache = {}
_umap_cache = {}
_cmap_cache: Dict[str, PyCMap] = {}
_umap_cache: Dict[str, List[PyUnicodeMap]] = {}

class CMapNotFound(CMapError):
pass

@classmethod
def _load_data(cls, name):
def _load_data(cls, name: str) -> Any:
name = name.replace("\0", "")
filename = '%s.pickle.gz' % name
log.info('loading: %r', name)
Expand All @@ -234,7 +235,7 @@ def _load_data(cls, name):
raise CMapDB.CMapNotFound(name)

@classmethod
def get_cmap(cls, name):
def get_cmap(cls, name: str) -> CMapBase:
if name == 'Identity-H':
return IdentityCMap(WMode=0)
elif name == 'Identity-V':
Expand All @@ -252,7 +253,7 @@ def get_cmap(cls, name):
return cmap

@classmethod
def get_unicode_map(cls, name, vertical=False):
def get_unicode_map(cls, name: str, vertical: bool = False) -> UnicodeMap:
try:
return cls._umap_cache[name][vertical]
except KeyError:
Expand Down
32 changes: 19 additions & 13 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import io
import logging
from pdfminer.pdftypes import PDFStream
from typing import List
from pdfminer.pdfpage import PDFPage
import re
import sys

from . import utils
from .layout import LTChar
from .layout import LTChar, LTLayoutContainer
from .layout import LTContainer
from .layout import LTCurve
from .layout import LTFigure
Expand All @@ -18,8 +21,9 @@
from .layout import LTTextGroup
from .layout import LTTextLine
from .pdfdevice import PDFTextDevice
from .pdffont import PDFUnicodeNotDefined
from .utils import apply_matrix_pt
from .pdffont import PDFFont, PDFUnicodeNotDefined
from .pdfinterp import PDFResourceManager
from .utils import Matrix, Rect, apply_matrix_pt
from .utils import bbox2str
from .utils import enc
from .utils import mult_matrix
Expand All @@ -28,23 +32,25 @@


class PDFLayoutAnalyzer(PDFTextDevice):
cur_item: LTLayoutContainer
ctm: Matrix

def __init__(self, rsrcmgr, pageno=1, laparams=None):
def __init__(self, rsrcmgr: PDFResourceManager, pageno=1, laparams=None):
PDFTextDevice.__init__(self, rsrcmgr)
self.pageno = pageno
self.laparams = laparams
self._stack = []
self._stack: List[LTLayoutContainer] = []
return

def begin_page(self, page, ctm):
def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
(x0, y0, x1, y1) = page.mediabox
(x0, y0) = apply_matrix_pt(ctm, (x0, y0))
(x1, y1) = apply_matrix_pt(ctm, (x1, y1))
mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
self.cur_item = LTPage(self.pageno, mediabox)
return

def end_page(self, page):
def end_page(self, page: PDFPage) -> None:
assert not self._stack, str(len(self._stack))
assert isinstance(self.cur_item, LTPage), str(type(self.cur_item))
if self.laparams is not None:
Expand All @@ -53,19 +59,19 @@ def end_page(self, page):
self.receive_layout(self.cur_item)
return

def begin_figure(self, name, bbox, matrix):
def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
self._stack.append(self.cur_item)
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
return

def end_figure(self, _):
def end_figure(self, _) -> None:
fig = self.cur_item
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
self.cur_item = self._stack.pop()
self.cur_item.add(fig)
return

def render_image(self, name, stream):
def render_image(self, name: str, stream: PDFStream) -> None:
assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
item = LTImage(name, stream,
(self.cur_item.x0, self.cur_item.y0,
Expand Down Expand Up @@ -124,7 +130,7 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
gstate.scolor, gstate.ncolor)
self.cur_item.add(curve)

def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
def render_char(self, matrix: Matrix, font: PDFFont, fontsize, scaling, rise, cid, ncs,
graphicstate):
try:
text = font.to_unichr(cid)
Expand All @@ -138,11 +144,11 @@ def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
self.cur_item.add(item)
return item.adv

def handle_undefined_char(self, font, cid):
def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
log.info('undefined: %r, %r', font, cid)
return '(cid:%d)' % cid

def receive_layout(self, ltpage):
def receive_layout(self, ltpage: LTPage) -> None:
return


Expand Down
Loading

0 comments on commit 17d59f4

Please sign in to comment.