Merge pull request #198 from pysat/guvi_imaging_fix

GUVI imaging updates
pysat · Mar 7, 2024 · 2d33371 · 2d33371
2 parents 0b7d9b9 + 483c8e1
commit 2d33371
Show file tree

Hide file tree

Showing 5 changed files with 212 additions and 46 deletions.
diff --git a/.github/workflows/pysat_rc.yml b/.github/workflows/pysat_rc.yml
@@ -40,9 +40,22 @@ jobs:
         python -c "import pysat; pysat.params['data_dirs'] = 'pysatData'"
 
     - name: Test with pytest
-      run: pytest -vs --cov=pysatNASA/
+      run: pytest
 
     - name: Publish results to coveralls
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      run: coveralls --rcfile=setup.cfg --service=github
+        COVERALLS_PARALLEL: true
+      run: coveralls --rcfile=pyproject.toml --service=github
+
+  finish:
+    name: Finish Coverage Analysis
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+    - name: Coveralls Finished
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        pip install --upgrade coveralls
+        coveralls --service=github --finish
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,11 +11,15 @@ This project adheres to [Semantic Versioning](https://semver.org/).
   * REACH Dosimeter
 * New Features
   * Allow files to be unzipped after download
+  * Added custom `concat_data` method to TIMED-GUVI data
+  * Added cleaning to TIMED-GUVI SDR imaging data
 * Bug Fixes
   * Fix general clean routine to skip transformation matrices
   * New window needs to be integer for calculate_imf_steadiness
   * Fixed version import
   * Fixed a bug when data fails to load for CDF pandas objects
+  * Allow graceful failure with no files in jhuapl load functions
+  * New window needs to be integer for calculate_imf_steadiness
   * Fixed a bug where cdas_download may drop the requested end date file
 * Documentation
   * Added example of how to export data for archival

diff --git a/docs/conf.py b/docs/conf.py
@@ -175,3 +175,6 @@
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'https://docs.python.org/': None}
+
+# Links to ignore, as they require human interaction
+linkcheck_ignore = [r'https://saber.gats-inc.com/temp_errors.php']
diff --git a/pysatNASA/instruments/methods/jhuapl.py b/pysatNASA/instruments/methods/jhuapl.py
@@ -6,6 +6,7 @@
 import pandas as pds
 import xarray as xr
 
+import pysat
 from pysat.utils.coords import expand_xarray_dims
 from pysat.utils.io import load_netcdf
 
@@ -42,14 +43,15 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'):
                 for i, sec in enumerate(data[skey].values)]
         secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60)))
                 for i, sec in enumerate(data[skey].values)]
+        microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60
+                                   - secs[i]) * 1.0e6))
+                     for i, sec in enumerate(data[skey].values)]
         dtimes = [
             dt.datetime.strptime(
-                "{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06.0f}".format(
+                "{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format(
                     int(data[ykey].values[i]), int(data[dkey].values[i]),
-                    hours[i], mins[i], secs[i],
-                    (sec - hours[i] * 3600 - mins[i] * 60 - secs[i]) * 1.0e6),
-                '%Y-%j-%H-%M-%S-%f')
-            for i, sec in enumerate(data[skey].values)]
+                    hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
+            for i, microsec in enumerate(microsecs)]
     else:
         dtimes = [
             dt.datetime.strptime("{:4d}-{:03d}".format(
@@ -100,8 +102,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
         inst.load(2003, 1)
 
     """
+    # Initialize the output
+    mdata = pysat.Meta()
+    data = xr.Dataset()
+
     # Define the input variables
-    labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
+    labels = {mdata.labels.units: ('UNITS', str),
+              mdata.labels.desc: ('TITLE', str)}
 
     # CDAWeb stores these files in the NetCDF format instead of the CDF format
     single_data = list()
@@ -140,12 +147,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
         # Update the fill value, using information from the global header
         mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}
 
-    # After loading all the data, determine which dimensions need to be
-    # expanded. Pad the data so that all dimensions are the same shape.
-    single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)
+    if len(single_data) > 0:
+        # After loading all the data, determine which dimensions need to be
+        # expanded. Pad the data so that all dimensions are the same shape.
+        single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)
 
-    # Combine all the data, indexing along time
-    data = xr.combine_by_coords(single_data)
+        # Combine all the data, indexing along time
+        data = xr.combine_by_coords(single_data)
 
     return data, mdata
 
@@ -168,7 +176,7 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
     strict_dim_check : bool
         Used for xarray data (`pandas_format` is False). If True, warn the user
         that the desired epoch, 'TIME_DAY', is not present as a dimension in the
-        NetCDF file.  If False, no warning is raised. (default=True)```
+        NetCDF file.  If False, no warning is raised. (default=True)
     combine_times : bool
         For SDR data, optionally combine the different datetime coordinates
         into a single time coordinate (default=False)
@@ -193,8 +201,13 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
         inst.load(2003, 1)
 
     """
+    # Initialize the output
+    mdata = pysat.Meta()
+    data = xr.Dataset()
+
     # Define the input variables and working variables
-    labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
+    labels = {mdata.labels.units: ('UNITS', str),
+              mdata.labels.desc: ('TITLE', str)}
     load_time = 'TIME_DAY'
     time_vars = ['YEAR_DAY', 'DOY_DAY', 'TIME_EPOCH_DAY', 'YEAR_NIGHT',
                  'DOY_NIGHT', 'TIME_NIGHT', 'TIME_EPOCH_NIGHT']
@@ -304,32 +317,39 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
         # Update the fill value, using information from the global header
         mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}
 
-    # Combine all time dimensions
-    if combine_times:
-        data_list = expand_xarray_dims([inners[dim] if dim == 'time' else
-                                        inners[dim].rename_dims({dim: 'time'})
-                                        for dim in time_dims], mdata,
-                                       dims_equal=False)
-    else:
-        data_list = [inners[dim] for dim in time_dims]
+    # Add metadata for 'time_auroral' and 'nCross' variables
+    mdata['time_auroral'] = {'desc': 'Auroral time index'}
+    mdata['nCross'] = {'desc': 'Number of cross-track observations'}
 
-    # Combine all the data, indexing along time
-    data = xr.merge(data_list)
+    # Combine all time dimensions
+    if inners is not None:
+        if combine_times:
+            data_list = expand_xarray_dims(
+                [inners[dim] if dim == 'time' else
+                 inners[dim].rename_dims({dim: 'time'})
+                 for dim in time_dims], mdata, dims_equal=False)
+        else:
+            data_list = [inners[dim] for dim in time_dims]
 
-    # Set additional coordinates
-    data = data.set_coords(coords).assign_coords({'time': data['time']})
-    if tag == 'sdr-imaging':
-        data = data.assign_coords(
-            {'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort", "LBHlong"],
-             "nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
-                          "LBHlong"],
-             "nCross": sdata.nCross.data,
-             "nCrossDayAur": sdata.nCrossDayAur.data})
-    elif tag == 'sdr-spectrograph':
-        data = data.assign_coords({"nchan": ["121.6nm", "130.4nm", "135.6nm",
-                                             "LBHshort", "LBHlong", "?"]})
+        # Combine all the data, indexing along time
+        data = xr.merge(data_list)
 
-    # Ensure the data is ordered correctly
-    data = data.sortby('time')
+        # Set additional coordinates
+        data = data.set_coords(coords).assign_coords({'time': data['time']})
+        if tag == 'sdr-imaging':
+            data = data.assign_coords(
+                {'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
+                           "LBHlong"],
+                 "nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
+                              "LBHlong"],
+                 "nCross": sdata.nCross.data,
+                 "nCrossDayAur": sdata.nCrossDayAur.data})
+        elif tag == 'sdr-spectrograph':
+            data = data.assign_coords({"nchan": ["121.6nm", "130.4nm",
+                                                 "135.6nm", "LBHshort",
+                                                 "LBHlong", "?"]})
+
+        # Ensure the data is ordered correctly
+        data = data.sortby('time')
 
     return data, mdata
diff --git a/pysatNASA/instruments/timed_guvi.py b/pysatNASA/instruments/timed_guvi.py
@@ -60,7 +60,9 @@
 
 import datetime as dt
 import functools
+import xarray as xr
 
+import pysat
 from pysat.instruments.methods import general as mm_gen
 
 from pysatNASA.instruments.methods import cdaweb as cdw
@@ -86,26 +88,150 @@
 # ----------------------------------------------------------------------------
 # Instrument test attributes
 
-_test_dates = {iid: {tag: dt.datetime(2005, 6, 28) for tag in inst_ids[iid]}
-               for iid in inst_ids.keys()}
+_test_dates = {
+    iid: {tag: dt.datetime(2007 if tag.find('spectrograph') > 0 else 2005, 12,
+                           13) for tag in inst_ids[iid]}
+    for iid in inst_ids.keys()}
 _test_load_opt = {iid: {tag: {'combine_times': True}
                         for tag in inst_ids[iid]}
                   for iid in ['high_res', 'low_res']}
 # TODO(#218): Remove when compliant with multi-day load tests
-_new_tests = {iid: {tag: False for tag in inst_ids[iid]}
-              for iid in ['high_res', 'low_res']}
+_new_tests = {'high_res': {tag: False for tag in inst_ids['high_res']}}
 _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings
-                         for tag in inst_ids[inst_id]}
+                         for tag in inst_ids[inst_id] if tag != 'sdr-imaging'}
                for inst_id in inst_ids.keys()}
+for inst_id in ['high_res', 'low_res']:
+    _clean_warn[inst_id]['sdr-imaging'] = {'dirty': mm_nasa.clean_warnings[
+        'dirty']}
 
 # ----------------------------------------------------------------------------
 # Instrument methods
 
 # Use standard init routine
 init = functools.partial(mm_nasa.init, module=mm_timed, name=name)
 
-# No cleaning, use standard warning function instead
-clean = mm_nasa.clean_warn
+
+def clean(self):
+    """Clean TIMED GUVI imaging data.
+
+    Note
+    ----
+        Supports 'clean', 'dusty', 'dirty', 'none'. Method is
+        not called by pysat if clean_level is None or 'none'.
+
+    """
+    if self.tag == "sdr-imaging" and self.clean_level in ['clean', 'dusty']:
+        # Find the flag variables
+        dqi_vars = [var for var in self.variables if var.find('DQI') == 0]
+
+        # Find the variables affected by each flag
+        dat_vars = {dqi: [var for var in self.variables if var.find(dqi) > 0]
+                    if dqi.find('AURORAL') >= 0 else
+                    [var for var in self.variables if var.find('AURORAL') < 0
+                     and var.find(dqi) > 0] for dqi in dqi_vars}
+
+        for dqi in dqi_vars:
+            if self.clean_level == 'clean':
+                # For clean, require DQI of zero (MeV noise only)
+                dqi_bad = self.data[dqi].values > 0
+            else:
+                # For dusty, allow the SAA region as well
+                dqi_bad = self.data[dqi].values > 1
+
+            # Apply the DQI mask to the data, replacing bad values with
+            # appropriate fill values
+            for dat_var in dat_vars[dqi]:
+                if self.data[dat_var].shape == dqi_bad.shape or self.data[
+                        dat_var].shape[:-1] == dqi_bad.shape:
+                    # Only apply to data with the correct dimensions
+                    fill_val = self.meta[dat_var, self.meta.labels.fill_val]
+                    self.data[dat_var].values[dqi_bad] = fill_val
+    else:
+        # Follow the same warning format as the general clean warning, but
+        # with additional information.
+        pysat.logger.warning(' '.join(['No cleaning routines available for',
+                                       self.platform, self.name, self.tag,
+                                       self.inst_id, 'at clean level',
+                                       self.clean_level]))
+    return
+
+
+def concat_data(self, new_data, combine_times=False, **kwargs):
+    """Concatonate data to self.data for TIMED GUVI data.
+
+    Parameters
+    ----------
+    new_data : xarray.Dataset or list of such objects
+        New data objects to be concatonated
+    combine_times : bool
+        For SDR data, optionally combine the different datetime coordinates
+        into a single time coordinate (default=False)
+    **kwargs : dict
+        Optional keyword arguments passed to xr.concat
+
+    Note
+    ----
+    For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
+    except if the user includes a value for dim as a keyword argument.
+
+    """
+    # Establish the time dimensions by data type
+    time_dims = [self.index.name]
+
+    if self.tag == 'sdr-imaging':
+        time_dims.append('time_auroral')
+    elif self.tag == 'sdr-spectrograph':
+        time_dims.extend(['time_gaim_day', 'time_gaim_night'])
+
+    # Concatonate using the appropriate method for the number of time
+    # dimensions
+    if len(time_dims) == 1:
+        # There is only one time dimensions, but other dimensions may
+        # need to be adjusted
+        new_data = pysat.utils.coords.expand_xarray_dims(
+            new_data, self.meta, exclude_dims=time_dims)
+
+        # Combine the data
+        self.data = xr.combine_by_coords(new_data, **kwargs)
+    else:
+        inners = None
+        for ndata in new_data:
+            # Separate into inner datasets
+            inner_keys = {dim: [key for key in ndata.keys()
+                                if dim in ndata[key].dims] for dim in time_dims}
+            inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}
+
+            # Add 'single_var's into 'time' dataset to keep track
+            sv_keys = [val.name for val in ndata.values()
+                       if 'single_var' in val.dims]
+            singlevar_set = ndata.get(sv_keys)
+            inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name],
+                                                   singlevar_set])
+
+            # Concatenate along desired dimension with previous data
+            if inners is None:
+                # No previous data, assign the data separated by dimension
+                inners = dict(inner_dat)
+            else:
+                # Concatenate with existing data
+                inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
+                                         dim=dim) for dim in time_dims}
+
+        # Combine all time dimensions
+        if inners is not None:
+            if combine_times:
+                data_list = pysat.utils.coords.expand_xarray_dims(
+                    [inners[dim] if dim == self.index.name else
+                     inners[dim].rename_dims({dim: self.index.name})
+                     for dim in time_dims if len(inners[dim].dims) > 0],
+                    self.meta, dims_equal=False)
+            else:
+                data_list = [inners[dim] for dim in time_dims]
+
+            # Combine all the data, indexing along time
+            self.data = xr.merge(data_list)
+    return
+
 
 # ----------------------------------------------------------------------------
 # Instrument functions