Merge pull request #188 from pysat/bug/186_ace

BUG/ENH: ACE Meta data and unused time dimensions
pysat · Jun 13, 2023 · 77fa1c1 · 77fa1c1
2 parents 9e0930c + 9fbecf7
commit 77fa1c1
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 37 deletions.
diff --git a/pysatNASA/instruments/ace_epam_l2.py b/pysatNASA/instruments/ace_epam_l2.py
@@ -51,7 +51,6 @@
 inst_ids = {'12sec': ['base'],
             '5min': ['key', 'base'],
             '1hr': ['key', 'base']}
-pandas_format = False
 
 # ----------------------------------------------------------------------------
 # Instrument test attributes
@@ -88,11 +87,7 @@
                                supported_tags=supported_tags)
 
 # Set the load routine
-meta_translation = {'CATDESC': 'desc', 'FILLVAL': 'fill',
-                    'LABLAXIS': 'plot_label', 'VALIDMAX': 'value_max',
-                    'VALIDMIN': 'value_min', 'VAR_NOTES': 'notes'}
-load = functools.partial(cdw.load, pandas_format=pandas_format,
-                         meta_translation=meta_translation, use_cdflib=True)
+load = functools.partial(mm_ace.load, to_pandas=True)
 
 # Set the download routine
 download_tags = {'12sec': {'base': 'AC_H3_EPM'},

diff --git a/pysatNASA/instruments/ace_mag_l2.py b/pysatNASA/instruments/ace_mag_l2.py
@@ -67,6 +67,27 @@
 # Use standard init routine
 init = functools.partial(mm_nasa.init, module=mm_ace, name=name)
 
+
+def preprocess(self):
+    """Adjust dimensionality of metadata."""
+
+    # TODO(https://github.com/pysat/pysat/issues/1078): Update the metadata by
+    # removing value_min0, value_min1, etc, once possible
+    self.meta['BGSEc'] = {'value_min': min([self.meta['BGSEc']['value_min0'],
+                                            self.meta['BGSEc']['value_min1'],
+                                            self.meta['BGSEc']['value_min2']]),
+                          'value_max': max([self.meta['BGSEc']['value_max0'],
+                                            self.meta['BGSEc']['value_max1'],
+                                            self.meta['BGSEc']['value_max2']]),
+                          'SCALEMIN': min([self.meta['BGSEc']['SCALEMIN0'],
+                                           self.meta['BGSEc']['SCALEMIN1'],
+                                           self.meta['BGSEc']['SCALEMIN2']]),
+                          'SCALEMAX': max([self.meta['BGSEc']['SCALEMAX0'],
+                                           self.meta['BGSEc']['SCALEMAX1'],
+                                           self.meta['BGSEc']['SCALEMAX2']])}
+    return
+
+
 # Use default ace clean
 clean = mm_ace.clean
 
@@ -90,11 +111,7 @@
                                supported_tags=supported_tags)
 
 # Set the load routine
-meta_translation = {'CATDESC': 'desc', 'FILLVAL': 'fill',
-                    'LABLAXIS': 'plot_label', 'VALIDMAX': 'value_max',
-                    'VALIDMIN': 'value_min', 'VAR_NOTES': 'notes'}
-load = functools.partial(cdw.load, pandas_format=pandas_format,
-                         meta_translation=meta_translation, use_cdflib=True)
+load = functools.partial(mm_ace.load, to_pandas=False)
 
 # Set the download routine
 download_tags = {'1sec': {'base': 'AC_H3_MFI'},

diff --git a/pysatNASA/instruments/ace_sis_l2.py b/pysatNASA/instruments/ace_sis_l2.py
@@ -83,11 +83,7 @@
                                supported_tags=supported_tags)
 
 # Set the load routine
-meta_translation = {'CATDESC': 'desc', 'FILLVAL': 'fill',
-                    'LABLAXIS': 'plot_label', 'VALIDMAX': 'value_max',
-                    'VALIDMIN': 'value_min', 'VAR_NOTES': 'notes'}
-load = functools.partial(cdw.load, pandas_format=pandas_format,
-                         meta_translation=meta_translation, use_cdflib=True)
+load = functools.partial(mm_ace.load, to_pandas=False)
 
 # Set the download routine
 download_tags = {'256sec': {'base': 'AC_H1_SIS'},

diff --git a/pysatNASA/instruments/ace_swepam_l2.py b/pysatNASA/instruments/ace_swepam_l2.py
@@ -51,7 +51,6 @@
 inst_ids = {'64sec': ['base'],
             '5min': ['key'],
             '1hr': ['key', 'base']}
-pandas_format = False
 
 # ----------------------------------------------------------------------------
 # Instrument test attributes
@@ -85,12 +84,9 @@
 list_files = functools.partial(mm_gen.list_files,
                                supported_tags=supported_tags)
 
+
 # Set the load routine
-meta_translation = {'CATDESC': 'desc', 'FILLVAL': 'fill',
-                    'LABLAXIS': 'plot_label', 'VALIDMAX': 'value_max',
-                    'VALIDMIN': 'value_min', 'VAR_NOTES': 'notes'}
-load = functools.partial(cdw.load, pandas_format=pandas_format,
-                         meta_translation=meta_translation, use_cdflib=True)
+load = functools.partial(mm_ace.load, to_pandas=True)
 
 # Set the download routine
 download_tags = {'64sec': {'base': 'AC_H0_SWE'},

diff --git a/pysatNASA/instruments/methods/_cdf.py b/pysatNASA/instruments/methods/_cdf.py
@@ -376,9 +376,9 @@ def load_variables(self):
     def to_pysat(self, flatten_twod=True,
                  labels={'units': ('Units', str), 'name': ('Long_Name', str),
                          'notes': ('Var_Notes', str), 'desc': ('CatDesc', str),
-                         'min_val': ('ValidMin', (int, float)),
-                         'max_val': ('ValidMax', (int, float)),
-                         'fill_val': ('FillVal', (int, float))}):
+                         'min_val': ('ValidMin', (float, int, str)),
+                         'max_val': ('ValidMax', (float, int, str)),
+                         'fill_val': ('FillVal', (float, int, str))}):
         """Export loaded CDF data into data, meta for pysat module.
 
         Parameters
@@ -398,9 +398,9 @@ def to_pysat(self, flatten_twod=True,
             that order.
             (default={'units': ('units', str), 'name': ('long_name', str),
                       'notes': ('notes', str), 'desc': ('desc', str),
-                      'min_val': ('value_min', (int, float)),
-                      'max_val': ('value_max', (int, float))
-                      'fill_val': ('fill', (int, float))})
+                      'min_val': ('value_min', (float, int, str)),
+                      'max_val': ('value_max', (float, int, str))
+                      'fill_val': ('fill', (float, int, str))})
 
         Returns
         -------

diff --git a/pysatNASA/instruments/methods/ace.py b/pysatNASA/instruments/methods/ace.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 
+from pysatNASA.instruments.methods import cdaweb as cdw
+
 ackn_str = ' '.join(("Please acknowledge the NASA National Space Science Data",
                      "Center, the Space Physics Data Facility, and the ACE",
                      "Principal Investigator, Edward C. Stone of the",
@@ -34,6 +36,59 @@
         }
 
 
+def load(fnames, tag='', inst_id='', to_pandas=False):
+    """Load ACE data via xarray and convert to pandas if needed.
+
+    This routine is called as needed by pysat. It is not intended
+    for direct user interaction.
+
+    Parameters
+    ----------
+    fnames : array-like
+        Iterable of filename strings, full path, to data files to be loaded.
+        This input is nominally provided by pysat itself.
+    tag : str
+        Tag name used to identify particular data set to be loaded.
+        This input is nominally provided by pysat itself. (default='')
+    inst_id : str
+        Instrument ID used to identify particular data set to be loaded.
+        This input is nominally provided by pysat itself. (default='')
+    to_pandas : bool
+        If True, convert to pandas. If False, leave as xarray. (default=False)
+
+    Returns
+    -------
+    data : pds.DataFrame or xr.Dataset
+        A pandas DataFrame or xarray Dataset with data prepared for the
+        `pysat.Instrument`.
+    meta : pysat.Meta
+        Metadata formatted for a pysat.Instrument object.
+
+    Note
+    ----
+    Several variables relating to time stored in different formats are dropped.
+    These are redundant and complicate the load procedure.
+
+    """
+
+    meta_translation = {'CATDESC': 'desc', 'FILLVAL': 'fill',
+                        'LABLAXIS': 'plot_label', 'VALIDMAX': 'value_max',
+                        'VALIDMIN': 'value_min', 'VAR_NOTES': 'notes'}
+    data, meta = cdw.load(fnames, tag=tag, inst_id=inst_id, pandas_format=False,
+                          meta_translation=meta_translation,
+                          drop_dims=['dim_empty', 'dim0', 'unit_time'],
+                          use_cdflib=True)
+
+    if to_pandas:
+        if hasattr(data, 'to_pandas'):
+            data = data.to_pandas()
+        else:
+            # xarray 0.16 support required for operational server
+            data = data.to_dataframe()
+
+    return data, meta
+
+
 def clean(self):
     """Clean ACE data to the specified level.
 
@@ -43,8 +98,15 @@ def clean(self):
 
     """
 
+    # Get a list of coords for the data
+    if self.pandas_format:
+        coords = [self.data.index.name]
+    else:
+        coords = [key for key in self.data.coords.keys()]
+
     for key in self.variables:
-        if key != 'time':
+        # Skip over the coordinates when cleaning
+        if key not in coords:
             fill = self.meta[key, self.meta.labels.fill_val]
 
             # Replace fill with nan

diff --git a/pysatNASA/instruments/methods/cdaweb.py b/pysatNASA/instruments/methods/cdaweb.py
@@ -276,9 +276,9 @@ def load_xarray(fnames, tag='', inst_id='',
                 file_cadence=dt.timedelta(days=1),
                 labels={'units': ('Units', str), 'name': ('Long_Name', str),
                         'notes': ('Var_Notes', str), 'desc': ('CatDesc', str),
-                        'min_val': ('ValidMin', (float, int)),
-                        'max_val': ('ValidMax', (float, int)),
-                        'fill_val': ('FillVal', (float, int))},
+                        'min_val': ('ValidMin', (float, int, str)),
+                        'max_val': ('ValidMax', (float, int, str)),
+                        'fill_val': ('FillVal', (float, int, str))},
                 epoch_name='Epoch', drop_dims=None, var_translation=None,
                 meta_processor=None, meta_translation=None,
                 drop_meta_labels=None):
@@ -300,11 +300,11 @@ def load_xarray(fnames, tag='', inst_id='',
     labels : dict
         Dict where keys are the label attribute names and the values are tuples
         that have the label values and value types in that order.
-        (default={'units': ('units', str), 'name': ('long_name', str),
-        'notes': ('notes', str), 'desc': ('desc', str),
-        'min_val': ('value_min', np.float64),
-        'max_val': ('value_max', np.float64),
-        'fill_val': ('fill', np.float64)})
+        (default={'units': ('Units', str), 'name': ('Long_Name', str),
+        'notes': ('Var_Notes', str), 'desc': ('CatDesc', str),
+        'min_val': ('ValidMin', (float, int, str)),
+        'max_val': ('ValidMax', (float, int, str)),
+        'fill_val': ('FillVal', (float, int, str))})
     epoch_name : str
         Data key for epoch variable.  The epoch variable is expected to be an
         array of integer or float values denoting time elapsed from an origin