Skip to content

Commit

Permalink
reformat all multi-line function defs to one-arg-per-line
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu committed Sep 7, 2021
1 parent 3fe2b69 commit eaab3c6
Show file tree
Hide file tree
Showing 15 changed files with 734 additions and 310 deletions.
186 changes: 126 additions & 60 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,12 @@ class PDFLayoutAnalyzer(PDFTextDevice):
cur_item: LTLayoutContainer
ctm: Matrix

def __init__(self, rsrcmgr: PDFResourceManager, pageno: int = 1,
laparams: Optional[LAParams] = None) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
pageno: int = 1,
laparams: Optional[LAParams] = None
) -> None:
PDFTextDevice.__init__(self, rsrcmgr)
self.pageno = pageno
self.laparams = laparams
Expand Down Expand Up @@ -87,8 +91,14 @@ def render_image(self, name: str, stream: PDFStream) -> None:
self.cur_item.add(item)
return

def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
evenodd: bool, path: Sequence[PathSegment]) -> None:
def paint_path(
self,
gstate: PDFGraphicState,
stroke: bool,
fill: bool,
evenodd: bool,
path: Sequence[PathSegment]
) -> None:
"""Paint paths described in section 4.4 of the PDF reference manual"""
shape = ''.join(x[0] for x in path)

Expand Down Expand Up @@ -140,9 +150,17 @@ def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
gstate.scolor, gstate.ncolor)
self.cur_item.add(curve)

def render_char(self, matrix: Matrix, font: PDFFont, fontsize: float,
scaling: float, rise: float, cid: int, ncs: PDFColorSpace,
graphicstate: PDFGraphicState) -> float:
def render_char(
self,
matrix: Matrix,
font: PDFFont,
fontsize: float,
scaling: float,
rise: float,
cid: int,
ncs: PDFColorSpace,
graphicstate: PDFGraphicState
) -> float:
try:
text = font.to_unichr(cid)
assert isinstance(text, str), str(type(text))
Expand All @@ -164,10 +182,12 @@ def receive_layout(self, ltpage: LTPage) -> None:


class PDFPageAggregator(PDFLayoutAnalyzer):
def __init__(self,
rsrcmgr: PDFResourceManager,
pageno: int = 1,
laparams: Optional[LAParams] = None) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
pageno: int = 1,
laparams: Optional[LAParams] = None
) -> None:
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
laparams=laparams)
self.result: Optional[LTPage] = None
Expand All @@ -187,12 +207,14 @@ def get_result(self) -> LTPage:


class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
def __init__(self,
rsrcmgr: PDFResourceManager,
outfp: IOType,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
outfp: IOType,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None
) -> None:
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
laparams=laparams)
self.outfp: IOType = outfp
Expand All @@ -216,14 +238,16 @@ def _is_binary_stream(outfp: AnyIO) -> bool:


class TextConverter(PDFConverter[AnyIO]):
def __init__(self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
showpageno: bool = False,
imagewriter: Optional[ImageWriter] = None) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
showpageno: bool = False,
imagewriter: Optional[ImageWriter] = None
) -> None:
super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno,
laparams=laparams)
self.showpageno = showpageno
Expand Down Expand Up @@ -265,8 +289,14 @@ def render_image(self, name: str, stream: PDFStream) -> None:
PDFConverter.render_image(self, name, stream)
return

def paint_path(self, gstate: PDFGraphicState, stroke: bool, fill: bool,
evenodd: bool, path: Sequence[PathSegment]) -> None:
def paint_path(
self,
gstate: PDFGraphicState,
stroke: bool,
fill: bool,
evenodd: bool,
path: Sequence[PathSegment]
) -> None:
return


Expand All @@ -285,21 +315,23 @@ class HTMLConverter(PDFConverter[AnyIO]):
'char': 'black',
}

def __init__(self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
scale: float = 1,
fontscale: float = 1.0,
layoutmode: str = 'normal',
showpageno: bool = True,
pagemargin: int = 50,
imagewriter: Optional[ImageWriter] = None,
debug: int = 0,
rect_colors: Optional[Dict[str, str]] = None,
text_colors: Optional[Dict[str, str]] = None) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
scale: float = 1,
fontscale: float = 1.0,
layoutmode: str = 'normal',
showpageno: bool = True,
pagemargin: int = 50,
imagewriter: Optional[ImageWriter] = None,
debug: int = 0,
rect_colors: Optional[Dict[str, str]] = None,
text_colors: Optional[Dict[str, str]] = None
) -> None:
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
laparams=laparams)

Expand Down Expand Up @@ -360,8 +392,15 @@ def write_text(self, text: str) -> None:
self.write(enc(text))
return

def place_rect(self, color: str, borderwidth: int, x: float, y: float,
w: float, h: float) -> None:
def place_rect(
self,
color: str,
borderwidth: int,
x: float,
y: float,
w: float,
h: float
) -> None:
color2 = self.rect_colors.get(color)
if color2 is not None:
s = '<span style="position:absolute; border: %s %dpx solid; ' \
Expand All @@ -373,14 +412,25 @@ def place_rect(self, color: str, borderwidth: int, x: float, y: float,
s)
return

def place_border(self, color: str, borderwidth: int, item: LTComponent
) -> None:
def place_border(
self,
color: str,
borderwidth: int,
item: LTComponent
) -> None:
self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
item.height)
return

def place_image(self, item: LTImage, borderwidth: int, x: float, y: float,
w: float, h: float) -> None:
def place_image(
self,
item: LTImage,
borderwidth: int,
x: float,
y: float,
w: float,
h: float
) -> None:
if self.imagewriter is not None:
name = self.imagewriter.export_image(item)
s = '<img src="%s" border="%d" style="position:absolute; ' \
Expand All @@ -391,8 +441,14 @@ def place_image(self, item: LTImage, borderwidth: int, x: float, y: float,
self.write(s)
return

def place_text(self, color: str, text: str, x: float, y: float, size: float
) -> None:
def place_text(
self,
color: str,
text: str,
x: float,
y: float,
size: float
) -> None:
color2 = self.text_colors.get(color)
if color2 is not None:
s = '<span style="position:absolute; color:%s; left:%dpx; ' \
Expand All @@ -404,8 +460,16 @@ def place_text(self, color: str, text: str, x: float, y: float, size: float
self.write('</span>\n')
return

def begin_div(self, color: str, borderwidth: int, x: float, y: float,
w: float, h: float, writing_mode: str = 'False') -> None:
def begin_div(
self,
color: str,
borderwidth: int,
x: float,
y: float,
w: float,
h: float,
writing_mode: str = 'False'
) -> None:
self._fontstack.append(self._font)
self._font = None
s = '<div style="position:absolute; border: %s %dpx solid; ' \
Expand Down Expand Up @@ -523,14 +587,16 @@ class XMLConverter(PDFConverter[AnyIO]):

CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')

def __init__(self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
imagewriter: Optional[ImageWriter] = None,
stripcontrol: bool = False) -> None:
def __init__(
self,
rsrcmgr: PDFResourceManager,
outfp: AnyIO,
codec: str = 'utf-8',
pageno: int = 1,
laparams: Optional[LAParams] = None,
imagewriter: Optional[ImageWriter] = None,
stripcontrol: bool = False
) -> None:
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
laparams=laparams)

Expand Down
7 changes: 5 additions & 2 deletions pdfminer/encodingdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ class EncodingDB:
}

@classmethod
def get_encoding(cls, name: str, diff: Optional[Iterable[object]] = None
) -> Dict[int, str]:
def get_encoding(
cls,
name: str,
diff: Optional[Iterable[object]] = None
) -> Dict[int, str]:
cid2unicode = cls.encodings.get(name, cls.std2unicode)
if diff:
cid2unicode = cid2unicode.copy()
Expand Down
64 changes: 35 additions & 29 deletions pdfminer/high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,24 @@
from .utils import open_filename, FileOrName, AnyIO


def extract_text_to_fp(inf: BinaryIO,
outfp: AnyIO,
output_type: str = 'text',
codec: str = 'utf-8',
laparams: Optional[LAParams] = None,
maxpages: int = 0,
page_numbers: Optional[Container[int]] = None,
password: str = "",
scale: float = 1.0,
rotation: int = 0,
layoutmode: str = 'normal',
output_dir: Optional[str] = None,
strip_control: bool = False,
debug: bool = False,
disable_caching: bool = False,
**kwargs: Any) -> None:
def extract_text_to_fp(
inf: BinaryIO,
outfp: AnyIO,
output_type: str = 'text',
codec: str = 'utf-8',
laparams: Optional[LAParams] = None,
maxpages: int = 0,
page_numbers: Optional[Container[int]] = None,
password: str = "",
scale: float = 1.0,
rotation: int = 0,
layoutmode: str = 'normal',
output_dir: Optional[str] = None,
strip_control: bool = False,
debug: bool = False,
disable_caching: bool = False,
**kwargs: Any
) -> None:
"""Parses text from inf-file and writes to outfp file-like object.
Takes loads of optional arguments but the defaults are somewhat sane.
Expand Down Expand Up @@ -109,13 +111,15 @@ def extract_text_to_fp(inf: BinaryIO,
device.close()


def extract_text(pdf_file: FileOrName,
password: str = '',
page_numbers: Optional[Container[int]] = None,
maxpages: int = 0,
caching: bool = True,
codec: str = 'utf-8',
laparams: Optional[LAParams] = None) -> str:
def extract_text(
pdf_file: FileOrName,
password: str = '',
page_numbers: Optional[Container[int]] = None,
maxpages: int = 0,
caching: bool = True,
codec: str = 'utf-8',
laparams: Optional[LAParams] = None
) -> str:
"""Parse and return the text contained in a PDF file.
:param pdf_file: Either a file path or a file-like object for the PDF file
Expand Down Expand Up @@ -151,12 +155,14 @@ def extract_text(pdf_file: FileOrName,
return output_string.getvalue()


def extract_pages(pdf_file: FileOrName,
password: str = '',
page_numbers: Optional[Container[int]] = None,
maxpages: int = 0,
caching: bool = True,
laparams: Optional[LAParams] = None) -> Iterator[LTPage]:
def extract_pages(
pdf_file: FileOrName,
password: str = '',
page_numbers: Optional[Container[int]] = None,
maxpages: int = 0,
caching: bool = True,
laparams: Optional[LAParams] = None
) -> Iterator[LTPage]:
"""Extract and yield LTPage objects
:param pdf_file: Either a file path or a file-like object for the PDF file
Expand Down
Loading

0 comments on commit eaab3c6

Please sign in to comment.