Merge pull request #70 from oscarbranson/dev

0.3.25 - filtering and export fixes
oscarbranson · Jul 30, 2021 · ccdaa9b · ccdaa9b
2 parents d6125df + 85a0810
commit ccdaa9b
Show file tree

Hide file tree

Showing 6 changed files with 128 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,14 @@
 # Changelog
 All significant changes to the software will be documented here.
 
+## [0.3.25] - 30/07/2021
+
+### Changed
+- Improvements to analyte handling in filt_obj to play nicely with custom denominators.
+- Improved robustness of `sample_stats` and `export_traces` at all stages
+- Added tests for `sample_stats`, `export_traces` and `minimal_export`
+
+
 ## [0.3.24] 29/07/2021
 
 ### Changed
@@ -13,6 +21,7 @@ All significant changes to the software will be documented here.
 
 ### Changed
 - Fixes to handling of SRMs with missing analytes.
+- moved tests to GitHub Workflows
 
 ## [0.3.22] 27/05/2021
 

diff --git a/latools/D_obj.py b/latools/D_obj.py
@@ -166,9 +166,9 @@ def __init__(self, data_file=None, dataformat=None, errorhunt=False, cmap=None,
         self.bkgrng = np.array([]).reshape(0, 2)
         self.sigrng = np.array([]).reshape(0, 2)
 
-        # set up filtering environment
+        # set up blank filtering object
+        self._init_filts()
         # self.filt = filt(self.Time.size, self.analytes)
-        self.filt = None
 
         if errorhunt:
             print('   -> OK')
@@ -184,7 +184,7 @@ def _analyte_checker(self, analytes=None, check_ratios=True, single=False, focus
     def analytes_sorted(self, analytes=None, check_ratios=True, single=False, focus_stage=None):
         return sorted(self._analyte_checker(analytes=analytes, check_ratios=check_ratios, single=single, focus_stage=focus_stage), key=analyte_sort_fn)
 
-    def _init_filts(self, analytes):
+    def _init_filts(self, analytes=None):
         self.filt = filt(self.Time.size, analytes)
 
     @_log
@@ -592,9 +592,10 @@ def calibrate(self, calib_ps, analyte_ratios=None):
                 c = 0
 
             self.data['calibrated'][a] = self.data['ratios'][a] * m + c
-
+            self.filt.add_to_table(a)
+
         # initialise filtering framework
-        self._init_filts(self.analyte_ratios)
+        # self._init_filts(self.analyte_ratios)
 
         self.setfocus('calibrated')
         return
@@ -666,6 +667,8 @@ def sample_stats(self, analytes=None, filt=True,
             for n, f in stat_fns.items():
                 self.stats[n] = []
                 for a in analytes:
+                    if a not in self.data[focus_stage]:
+                        continue 
                     ind = self.filt.grab_filt(filt, a)
                     dat = nominal_values(self.data[focus_stage][a])
                     if eachtrace:

diff --git a/latools/__init__.py b/latools/__init__.py
@@ -17,7 +17,7 @@
 from .helpers import chemistry
 from . import preprocessing
 
-__version__ = '0.3.24'
+__version__ = '0.3.25'
 
 def cite(output='text'):
     """

diff --git a/latools/filtering/filt_obj.py b/latools/filtering/filt_obj.py
@@ -53,12 +53,12 @@ class filt(object):
 
     def __init__(self, size, analytes):
         self.size = size
-        self.analytes = analytes
         self.maxset = -1
 
         findex = pd.MultiIndex(levels=[[], []], codes=[[], []], names=['N', 'filter'])
         self.fnames = []
-        self.filter_table = pd.DataFrame(index=findex, columns=self.analytes)
+        self.filter_table = pd.DataFrame(index=findex, columns=analytes)
+        self.analytes = self.filter_table.columns
         self.filter_components = pd.DataFrame(index=np.arange(size), columns=findex)
 
         self.param = Bunch()
@@ -67,6 +67,58 @@ def __init__(self, size, analytes):
 
         self.N = 0
 
+    def check_analytes(self, analytes=None, single=False, allow_multiples=False):
+        """
+        Checks analyte name and matches it to correct filter.
+        
+        Necessary because of distinction between analyte and ratio names.
+
+        Parameters
+        ==========
+        analytes : str or array-like
+            The analyte(s) to check.
+        single : bool
+            If true a single analyte is returned as a string
+        allow_multiples : bool
+            If True, analytes that link to multiple possible filter
+            analytes are allowed.
+
+        Returns
+        =======
+        set : containing valid analytes
+        """
+        if analytes is None:
+            return set(self.analytes.values)
+
+        if isinstance(analytes, str):
+            analytes = [analytes]
+
+        valid = set()
+
+        for analyte in analytes:
+            if analyte in self.analytes:
+                valid.update([analyte])
+
+            if '_' not in analyte:
+                candidates = set()
+                for a in self.analytes:
+                    if analyte in a:
+                        candidates.update([analyte])
+                if len(candidates) == 1:
+                    valid.update([candidates.pop()])
+                elif len(candidates) > 1:
+                    if allow_multiples:
+                        valid.update(candidates)
+                        # valid.update([analyte])
+                    else:
+                        raise ValueError(f'{analyte} matches one than one analyte name: {candidates}. Please be more specific.')
+        if single:
+            if len(valid) == 0:
+                return analytes[0]
+            return valid.pop()
+        else:
+            return valid
+
     def add(self, name, filt, info='', params=(), setn=None):
         """
         Add filter.
@@ -129,6 +181,7 @@ def add_to_table(self, analyte, mode='all'):
             self.filter_table.loc[:, analyte] = True
         else:
             self.filter_table.loc[:, analyte] = False
+        self.analytes = self.filter_table.columns
 
     def clear(self):
         """
@@ -155,10 +208,7 @@ def on(self, analyte=None, filt=None):
         -------
         None
         """
-        if isinstance(analyte, str):
-            analyte = [analyte]
-        if analyte is None:
-            analyte = self.analytes
+        analyte = self.check_analytes(analyte)
 
         if isinstance(filt, str):
             # find filter name
@@ -182,10 +232,7 @@ def off(self, analyte=None, filt=None):
         -------
         None
         """
-        if isinstance(analyte, str):
-            analyte = [analyte]
-        if analyte is None:
-            analyte = self.analytes
+        analyte = self.check_analytes(analyte)
 
         if isinstance(filt, str):
             # find filter name
@@ -238,10 +285,7 @@ def make_analyte(self, analyte):
         array_like
             boolean filter
         """
-        if isinstance(analyte, str):
-            analyte = [analyte]
-        elif analyte is None:
-            analyte = self.analytes
+        analyte = self.check_analytes(analyte)
 
         key = []
         for n, f in self.filter_table[analyte].index[self.filter_table[analyte].any(1)]:
@@ -296,24 +340,21 @@ def make_keydict(self, analyte=None):
         dict
             containing the logical filter expression for each analyte.
         """
-        if isinstance(analyte, str):
-            analyte = [analyte]
-        elif analyte is None:
-            analyte = self.analytes
+        analyte = self.check_analytes(analyte)
 
         for a in analyte:
             key = []
             for n, f in self.filter_table[a].index[self.filter_table[a]]:
                 key.append(f'{n}:{f}')
             self.keydict[a] = ' & '.join(key)
 
-    def grab_filt(self, filt, analyte=None):
+    def grab_filt(self, filt, analyte=None, allow_multiples=True):
         """
         Flexible access to specific filter using any key format.
 
         Parameters
         ----------
-        f : str, dict or bool
+        filt : str, dict or bool
             either logical filter expression, dict of expressions,
             or a boolean
         analyte : str
@@ -324,6 +365,15 @@ def grab_filt(self, filt, analyte=None):
         array_like
             boolean filter
         """
+        analyte = self.check_analytes(analyte, single=True)
+
+        if len(analyte) == 0:
+            return np.ones(self.size, dtype=bool)
+
+        if analyte not in self.analytes:
+            return np.ones(self.size, dtype=bool)
+            # print(f'Warning: {analyte} is not in filter table. No filters applied.')
+
         if isinstance(filt, str):
             if filt in self.fnames:
                 fkey = self.fuzzmatch(filt)
@@ -348,7 +398,7 @@ def grab_filt(self, filt, analyte=None):
         elif filt:
             ind = self.make_analyte(analyte)
         else:
-            ind = ~np.zeros(self.size, dtype=bool)
+            ind = np.ones(self.size, dtype=bool)
         return ind
 
     def get_components(self, analyte):

diff --git a/latools/latools.py b/latools/latools.py
@@ -3833,6 +3833,9 @@ def sample_stats(self, analytes=None, filt=True,
             Adds dict to analyse object containing samples, analytes and
             functions and data.
         """
+        if 'autorange' not in self.stages_complete:
+            raise RuntimeError('Cannot calculate statistics until autorange has been run.')
+
         analytes = self.analytes_sorted(analytes, focus_stage=focus_stage)
 
         if focus_stage is None:
@@ -4154,17 +4157,20 @@ def export_traces(self, outdir=None, focus_stage=None, analytes=None,
               'despiked': 'counts',
               'bkgsub': 'background corrected counts',
               'ratios': 'counts/count',
-              'calibrated': 'mol/mol'}
+              'calibrated': 'mol/mol',
+              'mass_fraction': 'mass fraction'}
 
         if not os.path.isdir(outdir):
             os.mkdir(outdir)
 
         for s in samples:
             d = self.data[s].data[focus_stage]
-            ind = self.data[s].filt.grab_filt(filt)
             out = Bunch()
 
             for a in analytes:
+                if a not in d:
+                    continue
+                ind = self.data[s].filt.grab_filt(filt, a)
                 out[a] = nominal_values(d[a][ind])
                 if focus_stage not in ['rawdata', 'despiked']:
                     out[a + '_std'] = std_devs(d[a][ind])

diff --git a/tests/test_latools.py b/tests/test_latools.py
@@ -3,6 +3,16 @@
 import pandas as pd
 import latools as la
 
+def test_export_fns(d, stage=''):
+    for fs in d.stages_complete:
+        if fs == 'autorange':
+            continue
+        try:
+            d.sample_stats(focus_stage=fs)
+        except Exception as e:
+            raise Exception(f"sample_stats failed after {stage} at focus_stage={fs}") from e
+        d.export_traces(focus_stage=fs)
+    d.minimal_export()
 
 class test_latools(unittest.TestCase):
     """
@@ -20,6 +30,9 @@ class test_latools(unittest.TestCase):
     d.autorange(on_mult=[1.5, 0.8],
                 off_mult=[0.8, 1.5])
 
+    # test export functions
+    test_export_fns(d, 'applying autorange')
+
     # trace plotting
     d.trace_plots(ranges=True)
 
@@ -32,15 +45,24 @@ class test_latools(unittest.TestCase):
     # subtract background
     d.bkg_subtract()
 
+    # test export functions
+    test_export_fns(d, 'background subtraction')
+
     # ratio
     d.ratio()
 
+    # test export functions
+    test_export_fns(d, 'calculating ratios')
+
     # calibrate
     d.calibrate(drift_correct=False, n_min=10,
                 srms_used=['NIST610', 'NIST612', 'NIST614'])
     # calibration plot
     fig, axs = d.calibration_plot()
 
+    # test export functions
+    test_export_fns(d, 'calibration')
+
     # crossplot
     fig, axs = d.crossplot(save=True)
 
@@ -55,9 +77,15 @@ class test_latools(unittest.TestCase):
 
     d.filter_on('Albelow')
 
+    # test export functions
+    test_export_fns(d, 'applying filters')
+
     # test custom denominator
     d.ratio('Ba137', 'Ba138')
 
+    # test export functions
+    test_export_fns(d, 'calculating a custom ratio')
+
     # calculate stats
     d.sample_stats(stats=['mean', 'std', 'se', 'H15_mean', 'H15_std', 'H15_se'], filt=True)
     s = d.getstats()
@@ -67,6 +95,9 @@ class test_latools(unittest.TestCase):
     d.internal_standard_concs = pd.DataFrame(0.6, index=d.samples, columns=['int_stand_massfrac'])
     d.calculate_mass_fraction()
 
+    # test export functions
+    test_export_fns(d, 'calculating mass fractions')
+
     # minimal export
     d.minimal_export()