Allow the **data_loader** access to **extras**.

Because the extras could contain information on how to parse the data file, e.g. which machine generated the data, what file format was used, etc.
kalekundert · May 29, 2020 · 5706864 · 5706864
1 parent 2dab75d
commit 5706864
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 9 deletions.
diff --git a/tests/test_load.py b/tests/test_load.py
@@ -104,6 +104,12 @@ def test_one_well():
         ]
 )
 def test_one_well_with_extras(extras_arg, expected):
+
+    def data_loader(path, extras):
+        assert extras == expected
+        return pd.read_csv(path)
+
+    # No data
     labels, extras = wellmap.load(
             DIR/'one_well_xy_extras.toml',
             extras=extras_arg,
@@ -118,9 +124,10 @@ def test_one_well_with_extras(extras_arg, expected):
             y=1,
     )
 
+    # Load labels and data, but don't merge.
     labels, data, extras = wellmap.load(
             DIR/'one_well_xy_extras.toml',
-            data_loader=pd.read_csv,
+            data_loader=data_loader,
             path_guess='{0.stem}.csv',
             extras=extras_arg,
     )
@@ -140,7 +147,7 @@ def test_one_well_with_extras(extras_arg, expected):
             Data='xy',
     )
 
-    # Merged data
+    # Automatic merge
     a1_expected = dict(
             path=DIR/'one_well_xy_extras.csv',
             well='A1',
@@ -155,7 +162,7 @@ def test_one_well_with_extras(extras_arg, expected):
 
     df, extras = wellmap.load(
             DIR/'one_well_xy_extras.toml',
-            data_loader=pd.read_csv,
+            data_loader=data_loader,
             merge_cols={'well': 'Well'},
             path_guess='{0.stem}.csv',
             extras=extras_arg,
@@ -277,3 +284,4 @@ def test_bad_args():
                 merge_cols={'well': 'xxx'},
         )
 
+
diff --git a/wellmap/load.py b/wellmap/load.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 import toml
-import re, itertools
+import re, itertools, inspect
 import pandas as pd
 from pathlib import Path
 from inform import plural
@@ -45,7 +45,9 @@ def load(toml_path, data_loader=None, merge_cols=None,
         associated with the plate layout, in addition to loading the layout 
         itself.  The argument should be a function that takes a `pathlib.Path` 
         to a data file, parses it, and returns a `pandas.DataFrame` containing 
-        the parsed data.  Note that specifying a data loader implies that 
+        the parsed data.  The function may also take an argument named 
+        "extras", in which case the **extras** return value will be provided to 
+        the data loader.  Note that specifying a data loader implies that 
         **path_required** is True.
 
     :param bool,dict merge_cols:
@@ -200,12 +202,30 @@ def add_extras(*args):
             in the TOML file that wouldn't otherwise be parsed).
             """
             if len(extras) == 1:
-                args += extras.popitem()[1],
+                args += list(extras.values())[0],
             if len(extras) > 1:
                 args += extras,
 
             return args if len(args) != 1 else args[0]
 
+        def get_extras_kwarg():
+            """
+            Helper function to determine whether or not to pass any "extras" 
+            (i.e. key/value pairs in the TOML file requested by the caller) to 
+            the **data_loader** function.
+            """
+            if not extras:
+                return {}
+
+            sig = inspect.signature(data_loader)
+
+            if 'extras' not in sig.parameters:
+                return {}
+            if sig.parameters['extras'].kind != inspect.Parameter.POSITIONAL_OR_KEYWORD:
+                return {}
+
+            return {'extras': add_extras()}
+
         layout = table_from_config(config, paths)
         layout = pd.concat([layout, *concats], sort=False)
 
@@ -225,9 +245,9 @@ def add_extras(*args):
         data = pd.DataFrame()
 
         for path in layout['path'].unique():
-           df = data_loader(path)
-           df['path'] = path
-           data = data.append(df, sort=False)
+            df = data_loader(path, **get_extras_kwarg())
+            df['path'] = path
+            data = data.append(df, sort=False)
 
         ## Merge the layout and the data into a single data frame:
         if not merge_cols: