Skip to content

Commit

Permalink
Allow the **data_loader** access to **extras**.
Browse files Browse the repository at this point in the history
Because the extras could contain information on how to parse the data
file, e.g. which machine generated the data, what file format was used,
etc.
  • Loading branch information
kalekundert committed May 29, 2020
1 parent 2dab75d commit 5706864
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
14 changes: 11 additions & 3 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ def test_one_well():
]
)
def test_one_well_with_extras(extras_arg, expected):

def data_loader(path, extras):
assert extras == expected
return pd.read_csv(path)

# No data
labels, extras = wellmap.load(
DIR/'one_well_xy_extras.toml',
extras=extras_arg,
Expand All @@ -118,9 +124,10 @@ def test_one_well_with_extras(extras_arg, expected):
y=1,
)

# Load labels and data, but don't merge.
labels, data, extras = wellmap.load(
DIR/'one_well_xy_extras.toml',
data_loader=pd.read_csv,
data_loader=data_loader,
path_guess='{0.stem}.csv',
extras=extras_arg,
)
Expand All @@ -140,7 +147,7 @@ def test_one_well_with_extras(extras_arg, expected):
Data='xy',
)

# Merged data
# Automatic merge
a1_expected = dict(
path=DIR/'one_well_xy_extras.csv',
well='A1',
Expand All @@ -155,7 +162,7 @@ def test_one_well_with_extras(extras_arg, expected):

df, extras = wellmap.load(
DIR/'one_well_xy_extras.toml',
data_loader=pd.read_csv,
data_loader=data_loader,
merge_cols={'well': 'Well'},
path_guess='{0.stem}.csv',
extras=extras_arg,
Expand Down Expand Up @@ -277,3 +284,4 @@ def test_bad_args():
merge_cols={'well': 'xxx'},
)


32 changes: 26 additions & 6 deletions wellmap/load.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3

import toml
import re, itertools
import re, itertools, inspect
import pandas as pd
from pathlib import Path
from inform import plural
Expand Down Expand Up @@ -45,7 +45,9 @@ def load(toml_path, data_loader=None, merge_cols=None,
associated with the plate layout, in addition to loading the layout
itself. The argument should be a function that takes a `pathlib.Path`
to a data file, parses it, and returns a `pandas.DataFrame` containing
the parsed data. Note that specifying a data loader implies that
the parsed data. The function may also take an argument named
"extras", in which case the **extras** return value will be provided to
the data loader. Note that specifying a data loader implies that
**path_required** is True.
:param bool,dict merge_cols:
Expand Down Expand Up @@ -200,12 +202,30 @@ def add_extras(*args):
in the TOML file that wouldn't otherwise be parsed).
"""
if len(extras) == 1:
args += extras.popitem()[1],
args += list(extras.values())[0],
if len(extras) > 1:
args += extras,

return args if len(args) != 1 else args[0]

def get_extras_kwarg():
"""
Helper function to determine whether or not to pass any "extras"
(i.e. key/value pairs in the TOML file requested by the caller) to
the **data_loader** function.
"""
if not extras:
return {}

sig = inspect.signature(data_loader)

if 'extras' not in sig.parameters:
return {}
if sig.parameters['extras'].kind != inspect.Parameter.POSITIONAL_OR_KEYWORD:
return {}

return {'extras': add_extras()}

layout = table_from_config(config, paths)
layout = pd.concat([layout, *concats], sort=False)

Expand All @@ -225,9 +245,9 @@ def add_extras(*args):
data = pd.DataFrame()

for path in layout['path'].unique():
df = data_loader(path)
df['path'] = path
data = data.append(df, sort=False)
df = data_loader(path, **get_extras_kwarg())
df['path'] = path
data = data.append(df, sort=False)

## Merge the layout and the data into a single data frame:
if not merge_cols:
Expand Down

0 comments on commit 5706864

Please sign in to comment.