Skip to content

Commit

Permalink
some code cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
JorjMcKie committed May 15, 2024
1 parent 7d8d41c commit fecacd3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
14 changes: 10 additions & 4 deletions src/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,7 +1269,7 @@ def bbox_to_corners(bbox) -> tuple:
return _sorted


class CellGroup():
class CellGroup:
def __init__(self, cells):
self.cells = cells
self.bbox = (
Expand All @@ -1284,7 +1284,7 @@ class TableRow(CellGroup):
pass


class TableHeader():
class TableHeader:
"""PyMuPDF extension containing the identified table header."""

def __init__(self, bbox, cells, names, above):
Expand All @@ -1294,7 +1294,7 @@ def __init__(self, bbox, cells, names, above):
self.external = above


class Table():
class Table:
def __init__(self, page, cells):
self.page = page
self.cells = cells
Expand Down Expand Up @@ -1726,7 +1726,7 @@ def resolve(cls, settings=None):
raise ValueError(f"Cannot resolve settings: {settings}")


class TableFinder():
class TableFinder:
"""
Given a PDF page, find plausible table structures.
Expand Down Expand Up @@ -1772,6 +1772,8 @@ def get_edges(self) -> list:

if v_strat == "text" or h_strat == "text":
words = extract_words(CHARS, **(settings.text_settings or {}))
else:
words = []

v_explicit = []
for desc in settings.explicit_vertical_lines or []:
Expand Down Expand Up @@ -1799,6 +1801,8 @@ def get_edges(self) -> list:
v_base = words_to_edges_v(words, word_threshold=settings.min_words_vertical)
elif v_strat == "explicit":
v_base = []
else:
v_base = []

v = v_base + v_explicit

Expand Down Expand Up @@ -1830,6 +1834,8 @@ def get_edges(self) -> list:
)
elif h_strat == "explicit":
h_base = []
else:
h_base = []

h = h_base + h_explicit

Expand Down
1 change: 1 addition & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4034,6 +4034,7 @@ def pixlen(x):
text_t = text.splitlines() # split text in lines again
just_tab[-1] = False # never justify last line
for i, t in enumerate(text_t):
spacing = 0
pl = maxwidth - pixlen(t) # length of empty line part
pnt = point + c_pnt * (i * lheight_factor) # text start of line
if align == 1: # center: right shift by half width
Expand Down

0 comments on commit fecacd3

Please sign in to comment.