Skip to content

Commit

Permalink
initial support for parsing tables
Browse files Browse the repository at this point in the history
  • Loading branch information
karlicoss committed Nov 6, 2020
1 parent 3067189 commit 5e0921c
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 4 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ jobs:

- uses: actions/upload-artifact@v2
with:
name: .mypy-coverage_${{ matrix.platform }}_${{ matrix.python-version }}
path: .mypy-coverage/
name: .coverage.mypy_${{ matrix.platform }}_${{ matrix.python-version }}
path: .coverage.mypy/


pypi:
Expand Down
74 changes: 74 additions & 0 deletions orgparse/extra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import re
from typing import List, Sequence, Dict, Iterator, Iterable, Union, Optional


RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|')
STRIP_CELL_WHITESPACE = True


Row = Sequence[str]

class Table:
def __init__(self, lines: List[str]) -> None:
self._lines = lines

@property
def blocks(self) -> Iterator[Sequence[Row]]:
group: List[Row] = []
first = True
for r in self._pre_rows():
if r is None:
if not first or len(group) > 0:
yield group
first = False
group = []
else:
group.append(r)
if len(group) > 0:
yield group

def __iter__(self) -> Iterator[Row]:
return self.rows

@property
def rows(self) -> Iterator[Row]:
for r in self._pre_rows():
if r is not None:
yield r

def _pre_rows(self) -> Iterator[Optional[Row]]:
for l in self._lines:
if RE_TABLE_SEPARATOR.match(l):
yield None
else:
pr = l.strip().strip('|').split('|')
if STRIP_CELL_WHITESPACE:
pr = [x.strip() for x in pr]
yield pr
# TODO use iparse helper?

@property
def as_dicts(self) -> 'AsDictHelper':
bl = list(self.blocks)
if len(bl) != 2:
raise RuntimeError('Need two-block table to non-ambiguously guess column names')
hrows = bl[0]
if len(hrows) != 1:
raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}')
columns = hrows[0]
assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}'
return AsDictHelper(
columns=columns,
rows=bl[1],
)



class AsDictHelper:
def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None:
self.columns = columns
self._rows = rows

def __iter__(self) -> Iterator[Dict[str, str]]:
for x in self._rows:
yield {k: v for k, v in zip(self.columns, x)}
63 changes: 63 additions & 0 deletions orgparse/tests/test_rich.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
'''
Tests for rich formatting: tables etc.
'''
from .. import load, loads
from ..extra import Table

import pytest # type: ignore


def test_table() -> None:
root = loads('''
| | | |
| | "heading" | |
| | | |
|-------+-----------+-----|
| reiwf | fef | |
|-------+-----------+-----|
|-------+-----------+-----|
| aba | caba | 123 |
| yeah | | X |
|------------------------+-------|
| when | count |
| datetime | int |
|------------------------+-------|
| | -1 |
| [2020-11-05 Thu 23:44] | |
| [2020-11-06 Fri 01:00] | 1 |
|------------------------+-------|
some irrelevant text
| simple |
|--------|
| value1 |
| value2 |
''')

# FIXME need to parse properly
t1 = Table(root._lines[1:10])
t2 = Table(root._lines[11:19])
t3 = Table(root._lines[22:26])

assert ilen(t1.blocks) == 4
assert list(t1.blocks)[2] == []
assert ilen(t1.rows) == 6

with pytest.raises(RuntimeError):
list(t1.as_dicts) # not sure what should it be

assert ilen(t2.blocks) == 2
assert ilen(t2.rows) == 5
assert list(t2.rows)[3] == ['[2020-11-05 Thu 23:44]', '']


assert ilen(t3.blocks) == 2
assert list(t3.rows) == [['simple'], ['value1'], ['value2']]
assert t3.as_dicts.columns == ['simple']
assert list(t3.as_dicts) == [{'simple': 'value1'}, {'simple': 'value2'}]


def ilen(x) -> int:
return len(list(x))
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ commands =
pip install -e .[linting]
python -m mypy orgparse \
# txt report is a bit more convenient to view on CI
--txt-report .mypy-coverage \
--html-report .mypy-coverage \
--txt-report .coverage.mypy \
--html-report .coverage.mypy \
{posargs}

0 comments on commit 5e0921c

Please sign in to comment.