Skip to content

Commit

Permalink
feat(flatten): implement only option to specify list of output cols
Browse files Browse the repository at this point in the history
  • Loading branch information
yshalenyk committed Apr 22, 2021
1 parent 2c76ac7 commit a57200b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
24 changes: 22 additions & 2 deletions spoonbill/flatten.py
Expand Up @@ -29,6 +29,7 @@ class TableFlattenConfig:
only: List[str] = field(default_factory=list)
repeat: List[str] = field(default_factory=list)
unnest: List[str] = field(default_factory=list)
only: List[str] = field(default_factory=list)
name: str = ""


Expand Down Expand Up @@ -58,7 +59,7 @@ class Flattener:
In order to export data correctly Flattener requires previously analyzed tables data.
During the process flattener could add columns not based on schema analysis, such as
`itemsCount`.
In every generated row, depending on table type, flattener will always few add augenerated columns.
In every generated row, depending on table type, flattener will always few add autogenerated columns.
For root table:
* rowID
* id
Expand Down Expand Up @@ -100,12 +101,15 @@ def _init_table_cache(self, tables, table):

def _init_options(self, tables):
for table in tables.values():

name = table.name
count = self.options.count
options = self.options.selection[name]
unnest = options.unnest
split = options.split
repeat = options.repeat
only = options.only

if count:
for array in table.arrays:
parts = array.split("/")
Expand Down Expand Up @@ -142,15 +146,31 @@ def _init_options(self, tables):
child_table = self.tables.get(c_name)
child_table.columns[col_id] = col
child_table.titles[col_id] = title
if only:
if split:
table.columns = {c_id: c for c_id, c in table.columns.items() if c_id in only}
else:
table.combined_columns = {c_id: c for c_id, c in table.combined_columns.items() if c_id in only}

def _only(self, table, only, split):
table.types = {c_id: c for c_id, c in table.types.items() if c_id in only}

if split:
table.columns = {c_id: c for c_id, c in table.columns.items() if c_id in only}
return
table.combined_columns = {c_id: c for c_id, c in table.combined_columns.items() if c_id in only}

def _init(self):
# init cache and filter only selected tables
tables = {}
for name, table in self.tables.items():
if name not in self.options.selection:
continue
options = self.options.selection[name]
split = options.split
if options.only:
self._only(table, options.only, split)
self._init_table_cache(tables, table)
split = self.options.selection[name].split
if split:
for c_name in table.child_tables:
if c_name in self.options.exclude:
Expand Down
13 changes: 11 additions & 2 deletions tests/test_flatten.py
Expand Up @@ -120,11 +120,20 @@ def test_flatten_with_exclude(spec_analyzed, releases):


def test_flatten_with_only(spec_analyzed, releases):
options = FlattenOptions(**{"selection": {"tenders": {"split": True}}, "exclude": "tender_items"})
options = FlattenOptions(**{"selection": {"tenders": {"split": True, "only": ["/tender/id"]}}})
flattener = Flattener(options, spec_analyzed.tables)
all_rows = defaultdict(list)
for count, flat in flattener.flatten(releases):
for name, rows in flat.items():
all_rows[name].extend(rows)
for row in all_rows["tenders"]:
assert row == ["/tender/id"]

assert "tender_items" not in all_rows
options = FlattenOptions(**{"selection": {"tenders": {"split": False, "only": ["/tender/id"]}}})
flattener = Flattener(options, spec_analyzed.tables)
all_rows = defaultdict(list)
for count, flat in flattener.flatten(releases):
for name, rows in flat.items():
all_rows[name].extend(rows)
for row in all_rows["tenders"]:
assert row == ["/tender/id"]

0 comments on commit a57200b

Please sign in to comment.