Skip to content

Commit

Permalink
Merge pull request #198 from pysat/guvi_imaging_fix
Browse files Browse the repository at this point in the history
GUVI imaging updates
  • Loading branch information
aburrell committed Mar 7, 2024
2 parents 0b7d9b9 + 483c8e1 commit 2d33371
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 46 deletions.
17 changes: 15 additions & 2 deletions .github/workflows/pysat_rc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,22 @@ jobs:
python -c "import pysat; pysat.params['data_dirs'] = 'pysatData'"
- name: Test with pytest
run: pytest -vs --cov=pysatNASA/
run: pytest

- name: Publish results to coveralls
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: coveralls --rcfile=setup.cfg --service=github
COVERALLS_PARALLEL: true
run: coveralls --rcfile=pyproject.toml --service=github

finish:
name: Finish Coverage Analysis
needs: build
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
pip install --upgrade coveralls
coveralls --service=github --finish
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ This project adheres to [Semantic Versioning](https://semver.org/).
* REACH Dosimeter
* New Features
* Allow files to be unzipped after download
* Added custom `concat_data` method to TIMED-GUVI data
* Added cleaning to TIMED-GUVI SDR imaging data
* Bug Fixes
* Fix general clean routine to skip transformation matrices
* New window needs to be integer for calculate_imf_steadiness
* Fixed version import
* Fixed a bug when data fails to load for CDF pandas objects
* Allow graceful failure with no files in jhuapl load functions
* New window needs to be integer for calculate_imf_steadiness
* Fixed a bug where cdas_download may drop the requested end date file
* Documentation
* Added example of how to export data for archival
Expand Down
3 changes: 3 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,6 @@

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}

# Links to ignore, as they require human interaction
linkcheck_ignore = [r'https://saber.gats-inc.com/temp_errors.php']
94 changes: 57 additions & 37 deletions pysatNASA/instruments/methods/jhuapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pds
import xarray as xr

import pysat
from pysat.utils.coords import expand_xarray_dims
from pysat.utils.io import load_netcdf

Expand Down Expand Up @@ -42,14 +43,15 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'):
for i, sec in enumerate(data[skey].values)]
secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60)))
for i, sec in enumerate(data[skey].values)]
microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60
- secs[i]) * 1.0e6))
for i, sec in enumerate(data[skey].values)]
dtimes = [
dt.datetime.strptime(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06.0f}".format(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format(
int(data[ykey].values[i]), int(data[dkey].values[i]),
hours[i], mins[i], secs[i],
(sec - hours[i] * 3600 - mins[i] * 60 - secs[i]) * 1.0e6),
'%Y-%j-%H-%M-%S-%f')
for i, sec in enumerate(data[skey].values)]
hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
for i, microsec in enumerate(microsecs)]
else:
dtimes = [
dt.datetime.strptime("{:4d}-{:03d}".format(
Expand Down Expand Up @@ -100,8 +102,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
inst.load(2003, 1)
"""
# Initialize the output
mdata = pysat.Meta()
data = xr.Dataset()

# Define the input variables
labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
labels = {mdata.labels.units: ('UNITS', str),
mdata.labels.desc: ('TITLE', str)}

# CDAWeb stores these files in the NetCDF format instead of the CDF format
single_data = list()
Expand Down Expand Up @@ -140,12 +147,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
# Update the fill value, using information from the global header
mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}

# After loading all the data, determine which dimensions need to be
# expanded. Pad the data so that all dimensions are the same shape.
single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)
if len(single_data) > 0:
# After loading all the data, determine which dimensions need to be
# expanded. Pad the data so that all dimensions are the same shape.
single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)

# Combine all the data, indexing along time
data = xr.combine_by_coords(single_data)
# Combine all the data, indexing along time
data = xr.combine_by_coords(single_data)

return data, mdata

Expand All @@ -168,7 +176,7 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
strict_dim_check : bool
Used for xarray data (`pandas_format` is False). If True, warn the user
that the desired epoch, 'TIME_DAY', is not present as a dimension in the
NetCDF file. If False, no warning is raised. (default=True)```
NetCDF file. If False, no warning is raised. (default=True)
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
Expand All @@ -193,8 +201,13 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
inst.load(2003, 1)
"""
# Initialize the output
mdata = pysat.Meta()
data = xr.Dataset()

# Define the input variables and working variables
labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
labels = {mdata.labels.units: ('UNITS', str),
mdata.labels.desc: ('TITLE', str)}
load_time = 'TIME_DAY'
time_vars = ['YEAR_DAY', 'DOY_DAY', 'TIME_EPOCH_DAY', 'YEAR_NIGHT',
'DOY_NIGHT', 'TIME_NIGHT', 'TIME_EPOCH_NIGHT']
Expand Down Expand Up @@ -304,32 +317,39 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
# Update the fill value, using information from the global header
mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}

# Combine all time dimensions
if combine_times:
data_list = expand_xarray_dims([inners[dim] if dim == 'time' else
inners[dim].rename_dims({dim: 'time'})
for dim in time_dims], mdata,
dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]
# Add metadata for 'time_auroral' and 'nCross' variables
mdata['time_auroral'] = {'desc': 'Auroral time index'}
mdata['nCross'] = {'desc': 'Number of cross-track observations'}

# Combine all the data, indexing along time
data = xr.merge(data_list)
# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = expand_xarray_dims(
[inners[dim] if dim == 'time' else
inners[dim].rename_dims({dim: 'time'})
for dim in time_dims], mdata, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Set additional coordinates
data = data.set_coords(coords).assign_coords({'time': data['time']})
if tag == 'sdr-imaging':
data = data.assign_coords(
{'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort", "LBHlong"],
"nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nCross": sdata.nCross.data,
"nCrossDayAur": sdata.nCrossDayAur.data})
elif tag == 'sdr-spectrograph':
data = data.assign_coords({"nchan": ["121.6nm", "130.4nm", "135.6nm",
"LBHshort", "LBHlong", "?"]})
# Combine all the data, indexing along time
data = xr.merge(data_list)

# Ensure the data is ordered correctly
data = data.sortby('time')
# Set additional coordinates
data = data.set_coords(coords).assign_coords({'time': data['time']})
if tag == 'sdr-imaging':
data = data.assign_coords(
{'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nCross": sdata.nCross.data,
"nCrossDayAur": sdata.nCrossDayAur.data})
elif tag == 'sdr-spectrograph':
data = data.assign_coords({"nchan": ["121.6nm", "130.4nm",
"135.6nm", "LBHshort",
"LBHlong", "?"]})

# Ensure the data is ordered correctly
data = data.sortby('time')

return data, mdata
140 changes: 133 additions & 7 deletions pysatNASA/instruments/timed_guvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@

import datetime as dt
import functools
import xarray as xr

import pysat
from pysat.instruments.methods import general as mm_gen

from pysatNASA.instruments.methods import cdaweb as cdw
Expand All @@ -86,26 +88,150 @@
# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {iid: {tag: dt.datetime(2005, 6, 28) for tag in inst_ids[iid]}
for iid in inst_ids.keys()}
_test_dates = {
iid: {tag: dt.datetime(2007 if tag.find('spectrograph') > 0 else 2005, 12,
13) for tag in inst_ids[iid]}
for iid in inst_ids.keys()}
_test_load_opt = {iid: {tag: {'combine_times': True}
for tag in inst_ids[iid]}
for iid in ['high_res', 'low_res']}
# TODO(#218): Remove when compliant with multi-day load tests
_new_tests = {iid: {tag: False for tag in inst_ids[iid]}
for iid in ['high_res', 'low_res']}
_new_tests = {'high_res': {tag: False for tag in inst_ids['high_res']}}
_clean_warn = {inst_id: {tag: mm_nasa.clean_warnings
for tag in inst_ids[inst_id]}
for tag in inst_ids[inst_id] if tag != 'sdr-imaging'}
for inst_id in inst_ids.keys()}
for inst_id in ['high_res', 'low_res']:
_clean_warn[inst_id]['sdr-imaging'] = {'dirty': mm_nasa.clean_warnings[
'dirty']}

# ----------------------------------------------------------------------------
# Instrument methods

# Use standard init routine
init = functools.partial(mm_nasa.init, module=mm_timed, name=name)

# No cleaning, use standard warning function instead
clean = mm_nasa.clean_warn

def clean(self):
"""Clean TIMED GUVI imaging data.
Note
----
Supports 'clean', 'dusty', 'dirty', 'none'. Method is
not called by pysat if clean_level is None or 'none'.
"""
if self.tag == "sdr-imaging" and self.clean_level in ['clean', 'dusty']:
# Find the flag variables
dqi_vars = [var for var in self.variables if var.find('DQI') == 0]

# Find the variables affected by each flag
dat_vars = {dqi: [var for var in self.variables if var.find(dqi) > 0]
if dqi.find('AURORAL') >= 0 else
[var for var in self.variables if var.find('AURORAL') < 0
and var.find(dqi) > 0] for dqi in dqi_vars}

for dqi in dqi_vars:
if self.clean_level == 'clean':
# For clean, require DQI of zero (MeV noise only)
dqi_bad = self.data[dqi].values > 0
else:
# For dusty, allow the SAA region as well
dqi_bad = self.data[dqi].values > 1

# Apply the DQI mask to the data, replacing bad values with
# appropriate fill values
for dat_var in dat_vars[dqi]:
if self.data[dat_var].shape == dqi_bad.shape or self.data[
dat_var].shape[:-1] == dqi_bad.shape:
# Only apply to data with the correct dimensions
fill_val = self.meta[dat_var, self.meta.labels.fill_val]
self.data[dat_var].values[dqi_bad] = fill_val
else:
# Follow the same warning format as the general clean warning, but
# with additional information.
pysat.logger.warning(' '.join(['No cleaning routines available for',
self.platform, self.name, self.tag,
self.inst_id, 'at clean level',
self.clean_level]))
return


def concat_data(self, new_data, combine_times=False, **kwargs):
"""Concatonate data to self.data for TIMED GUVI data.
Parameters
----------
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat
Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.
"""
# Establish the time dimensions by data type
time_dims = [self.index.name]

if self.tag == 'sdr-imaging':
time_dims.append('time_auroral')
elif self.tag == 'sdr-spectrograph':
time_dims.extend(['time_gaim_day', 'time_gaim_night'])

# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, self.meta, exclude_dims=time_dims)

# Combine the data
self.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == self.index.name else
inners[dim].rename_dims({dim: self.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
self.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
self.data = xr.merge(data_list)
return


# ----------------------------------------------------------------------------
# Instrument functions
Expand Down

0 comments on commit 2d33371

Please sign in to comment.