Merge pull request #136 from pysat/14-enh-timed-see-and-xarray

ENH: timed see and xarray
pysat · Apr 3, 2023 · 1bd714c · 1bd714c
2 parents cd01061 + a44cdda
commit 1bd714c
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 18 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
   * Updated platform methods to follow a consistent style and work with the
     general `init` function
   * Added unit tests for the different platform method attributes
+  * xarray support for TIMED SEE
 * Maintenance
   * Added a version cap for numpy (required for cdf interface, revisit before
     release)

diff --git a/pysatNASA/instruments/methods/cdaweb.py b/pysatNASA/instruments/methods/cdaweb.py
@@ -9,6 +9,7 @@
 
 import cdflib
 import datetime as dt
+import numpy as np
 import os
 import pandas as pds
 import requests
@@ -140,6 +141,7 @@ def load(fnames, tag='', inst_id='', file_cadence=dt.timedelta(days=1),
 
         data, meta = load_xarray(fnames, tag=tag, inst_id=inst_id,
                                  epoch_name=epoch_name,
+                                 file_cadence=file_cadence,
                                  meta_processor=meta_processor,
                                  meta_translation=meta_translation,
                                  drop_meta_labels=drop_meta_labels)
@@ -254,13 +256,12 @@ def load_pandas(fnames, tag='', inst_id='', file_cadence=dt.timedelta(days=1),
 
 
 def load_xarray(fnames, tag='', inst_id='',
-                labels={'units': ('units', str), 'name': ('long_name', str),
-                        'notes': ('notes', str), 'desc': ('desc', str),
-                        'plot': ('plot_label', str), 'axis': ('axis', str),
-                        'scale': ('scale', str),
-                        'min_val': ('value_min', float),
-                        'max_val': ('value_max', float),
-                        'fill_val': ('fill', float)},
+                file_cadence=dt.timedelta(days=1),
+                labels={'units': ('Units', str), 'name': ('Long_Name', str),
+                        'notes': ('Var_Notes', str), 'desc': ('CatDesc', str),
+                        'min_val': ('ValidMin', float),
+                        'max_val': ('ValidMax', float),
+                        'fill_val': ('FillVal', float)},
                 epoch_name='Epoch', meta_processor=None,
                 meta_translation=None, drop_meta_labels=None):
     """Load NASA CDAWeb CDF files into an xarray Dataset.
@@ -273,6 +274,11 @@ def load_xarray(fnames, tag='', inst_id='',
         Data product tag (default='')
     inst_id : str
         Instrument ID (default='')
+    file_cadence : dt.timedelta or pds.DateOffset
+        pysat assumes a daily file cadence, but some instrument data files
+        contain longer periods of time.  This parameter allows the specification
+        of regular file cadences greater than or equal to a day (e.g., weekly,
+        monthly, or yearly). (default=dt.timedelta(days=1))
     labels : dict
         Dict where keys are the label attribute names and the values are tuples
         that have the label values and value types in that order.
@@ -338,7 +344,16 @@ def load_xarray(fnames, tag='', inst_id='',
         # metadata for pysat using some assumptions. Depending upon your needs
         # the resulting pandas DataFrame may need modification.
         ldata = []
-        for lfname in fnames:
+
+        # Find unique files for monthly / yearly cadence.
+        # Arbitrary timestamp needed for comparison.
+        t0 = dt.datetime(2009, 1, 1)
+        if (t0 + file_cadence) > (t0 + dt.timedelta(days=1)):
+            lfnames = list(np.unique([fname[:-11] for fname in fnames]))
+        else:
+            lfnames = fnames
+
+        for lfname in lfnames:
             temp_data = cdflib.cdf_to_xarray(lfname, to_datetime=True)
             ldata.append(temp_data)
 
@@ -426,6 +441,7 @@ def load_xarray(fnames, tag='', inst_id='',
     return data, meta
 
 
+# TODO(#103): Include support to unzip / untar files after download.
 def download(date_array, tag='', inst_id='', supported_tags=None,
              remote_url='https://cdaweb.gsfc.nasa.gov', data_path=None):
     """Download NASA CDAWeb data.

diff --git a/pysatNASA/instruments/timed_saber.py b/pysatNASA/instruments/timed_saber.py
@@ -88,6 +88,8 @@
 #
 # Use the default CDAWeb and pysat methods
 
+# TODO(#104): Switch to netCDF4 files once unzip (#103) is supported.
+
 # Set the list_files routine
 fname = ''.join(('timed_l2a_saber_{year:04d}{month:02d}{day:02d}',
                  '{hour:02d}{minute:02d}_v{version:02d}-{revision:02d}-',

diff --git a/pysatNASA/instruments/timed_see.py b/pysatNASA/instruments/timed_see.py
@@ -17,19 +17,12 @@
     None
 inst_id
     None supported
-flatten_twod
-    If True, then two dimensional data is flattened across
-    columns. Name mangling is used to group data, first column
-    is 'name', last column is 'name_end'. In between numbers are
-    appended 'name_1', 'name_2', etc. All data for a given 2D array
-    may be accessed via, data.loc[:, 'item':'item_end']
-    If False, then 2D data is stored as a series of DataFrames,
-    indexed by Epoch. data.loc[0, 'item']
-    (default=True)
 
 Note
 ----
 - no tag required
+- cdflib load routine raises ISTP Compliance Warnings for several variables.
+  This is due to how the Epoch is listed in the original files.
 
 Warnings
 --------
@@ -54,6 +47,7 @@
 name = 'see'
 tags = {'': ''}
 inst_ids = {'': [tag for tag in tags.keys()]}
+pandas_format = False
 
 # ----------------------------------------------------------------------------
 # Instrument test attributes
@@ -73,6 +67,8 @@
 #
 # Use the default CDAWeb and pysat methods
 
+# TODO(#104): Switch to netCDF4 files once unzip (#103) is supported.
+
 # Set the list_files routine
 fname = 'timed_l3a_see_{year:04d}{month:02d}{day:02d}_v{version:02d}.cdf'
 supported_tags = {'': {'': fname}}
@@ -81,7 +77,8 @@
                                file_cadence=pds.DateOffset(months=1))
 
 # Set the load routine
-load = functools.partial(cdw.load, file_cadence=pds.DateOffset(months=1))
+load = functools.partial(cdw.load, pandas_format=pandas_format,
+                         file_cadence=pds.DateOffset(months=1))
 
 # Set the download routine
 download_tags = {'': {'': 'TIMED_L3A_SEE'}}