Skip to content

Commit

Permalink
Merge pull request #1422 from djhoese/bugfix-hy2-reader
Browse files Browse the repository at this point in the history
Fix HDF5 utility file handler not decoding byte arrays consistently
  • Loading branch information
mraspaud committed Nov 16, 2020
2 parents 01c27e4 + 3765b95 commit 1922d97
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 48 deletions.
16 changes: 10 additions & 6 deletions satpy/readers/hdf5_utils.py
Expand Up @@ -38,6 +38,7 @@ def __init__(self, filename, filename_info, filetype_info):
super(HDF5FileHandler, self).__init__(
filename, filename_info, filetype_info)
self.file_content = {}
self._attrs_cache = {}

try:
file_handle = h5py.File(self.filename, 'r')
Expand All @@ -51,20 +52,22 @@ def __init__(self, filename, filename_info, filetype_info):
file_handle.close()

def _collect_attrs(self, name, attrs):
attrs_cache = self._attrs_cache.setdefault(name, {})
for key, value in attrs.items():
value = np.squeeze(value)
fc_key = "{}/attr/{}".format(name, key)
try:
self.file_content[fc_key] = np2str(value)
value = np2str(value)
except ValueError:
self.file_content[fc_key] = value
# use the original value
pass
except AttributeError:
# A HDF5 reference ?
value = self.get_reference(name, key)
if value is None:
LOG.warning("Value cannot be converted - skip setting attribute %s", fc_key)
else:
self.file_content[fc_key] = value
continue
self.file_content[fc_key] = attrs_cache[key] = value

def get_reference(self, name, key):
"""Get reference."""
Expand Down Expand Up @@ -94,9 +97,10 @@ def __getitem__(self, key):
# these datasets are closed and inaccessible when the file is closed, need to reopen
dset = h5py.File(self.filename, 'r')[key]
dset_data = da.from_array(dset, chunks=CHUNK_SIZE)
attrs = self._attrs_cache.get(key, dset.attrs)
if dset.ndim == 2:
return xr.DataArray(dset_data, dims=['y', 'x'], attrs=dset.attrs)
return xr.DataArray(dset_data, attrs=dset.attrs)
return xr.DataArray(dset_data, dims=['y', 'x'], attrs=attrs)
return xr.DataArray(dset_data, attrs=attrs)

return val

Expand Down
14 changes: 4 additions & 10 deletions satpy/readers/hy2_scat_l2b_h5.py
Expand Up @@ -18,8 +18,6 @@

import numpy as np
import xarray as xr
import dask.array as da
from satpy import CHUNK_SIZE
from datetime import datetime

from satpy.readers.hdf5_utils import HDF5FileHandler
Expand Down Expand Up @@ -92,16 +90,12 @@ def get_dataset(self, key, info):
dims = ['y', 'x']
if self[key['name']].ndim == 3:
dims = ['y', 'x', 'selection']
data = self[key['name']]
if key['name'] in 'wvc_row_time':
data = xr.DataArray(da.from_array(self[key['name']][:]),
attrs={'fill_value': self[key['name']].attrs['fill_value']},
name=key['name'],
dims=['y', ])
data = data.rename({data.dims[0]: 'y'})
else:
data = xr.DataArray(da.from_array(self[key['name']][:],
chunks=CHUNK_SIZE),
name=key['name'], dims=dims)

dim_map = {curr_dim: new_dim for curr_dim, new_dim in zip(data.dims, dims)}
data = data.rename(dim_map)
data = self._mask_data(key['name'], data)
data = self._scale_data(key['name'], data)

Expand Down
18 changes: 11 additions & 7 deletions satpy/readers/mersi2_l1b.py
Expand Up @@ -61,18 +61,22 @@ def sensor_name(self):
}.get(file_sensor, file_sensor)
return sensor

def _get_single_slope_intercept(self, slope, intercept, cal_index):
try:
# convert scalar arrays to scalar
return slope.item(), intercept.item()
except ValueError:
# numpy array but has more than one element
return slope[cal_index], intercept[cal_index]
return slope, intercept

def _get_coefficients(self, cal_key, cal_index):
coeffs = self[cal_key][cal_index]
slope = coeffs.attrs.pop('Slope', None)
intercept = coeffs.attrs.pop('Intercept', None)
if slope is not None:
# sometimes slope has multiple elements
if hasattr(slope, '__len__') and len(slope) == 1:
slope = slope[0]
intercept = intercept[0]
elif hasattr(slope, '__len__'):
slope = slope[cal_index]
intercept = intercept[cal_index]
slope, intercept = self._get_single_slope_intercept(
slope, intercept, cal_index)
coeffs = coeffs * slope + intercept
return coeffs

Expand Down
24 changes: 18 additions & 6 deletions satpy/tests/reader_tests/test_hdf5_utils.py
Expand Up @@ -91,18 +91,22 @@ def setUp(self):
# Add attributes
# shows up as a scalar array of bytes (shape=(), size=1)
h.attrs['test_attr_str'] = 'test_string'
h.attrs['test_attr_byte'] = b'test_byte'
h.attrs['test_attr_int'] = 0
h.attrs['test_attr_float'] = 1.2
# shows up as a numpy bytes object
h.attrs['test_attr_str_arr'] = np.array(b"test_string2")
g1.attrs['test_attr_str'] = 'test_string'
g1.attrs['test_attr_byte'] = b'test_byte'
g1.attrs['test_attr_int'] = 0
g1.attrs['test_attr_float'] = 1.2
for d in [ds1_f, ds1_i, ds2_f, ds2_i]:
d.attrs['test_attr_str'] = 'test_string'
d.attrs['test_attr_byte'] = b'test_byte'
d.attrs['test_attr_int'] = 0
d.attrs['test_attr_float'] = 1.2
d.attrs['test_ref'] = d.ref
self.var_attrs = list(d.attrs.keys())

h.close()

Expand All @@ -116,14 +120,22 @@ def test_all_basic(self):
import xarray as xr
file_handler = HDF5FileHandler('test.h5', {}, {})

for ds in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
self.assertEqual(file_handler[ds].dtype, np.float32 if ds.endswith('f') else np.int32)
self.assertTupleEqual(file_handler[ds + '/shape'], (10, 100))
self.assertEqual(file_handler[ds + '/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler[ds + '/attr/test_attr_int'], 0)
self.assertEqual(file_handler[ds + '/attr/test_attr_float'], 1.2)
for ds_name in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
ds = file_handler[ds_name]
attrs = ds.attrs
self.assertEqual(ds.dtype, np.float32 if ds_name.endswith('f') else np.int32)
self.assertTupleEqual(file_handler[ds_name + '/shape'], (10, 100))
self.assertEqual(attrs['test_attr_str'], 'test_string')
self.assertEqual(attrs['test_attr_byte'], 'test_byte')
self.assertEqual(attrs['test_attr_int'], 0)
self.assertEqual(attrs['test_attr_float'], 1.2)
self.assertEqual(file_handler[ds_name + '/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler[ds_name + '/attr/test_attr_byte'], 'test_byte')
self.assertEqual(file_handler[ds_name + '/attr/test_attr_int'], 0)
self.assertEqual(file_handler[ds_name + '/attr/test_attr_float'], 1.2)

self.assertEqual(file_handler['/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler['/attr/test_attr_byte'], 'test_byte')
self.assertEqual(file_handler['/attr/test_attr_str_arr'], 'test_string2')
self.assertEqual(file_handler['/attr/test_attr_int'], 0)
self.assertEqual(file_handler['/attr/test_attr_float'], 1.2)
Expand Down
38 changes: 19 additions & 19 deletions satpy/tests/reader_tests/test_mersi2_l1b.py
Expand Up @@ -39,12 +39,12 @@ def _get_calibration(self, num_scans, rows_per_scan):
'Calibration/VIS_Cal_Coeff':
xr.DataArray(
da.ones((19, 3), chunks=1024),
attrs={'Slope': [1.] * 19, 'Intercept': [0.] * 19},
attrs={'Slope': np.array([1.] * 19), 'Intercept': np.array([0.] * 19)},
dims=('_bands', '_coeffs')),
'Calibration/IR_Cal_Coeff':
xr.DataArray(
da.ones((6, 4, num_scans), chunks=1024),
attrs={'Slope': [1.] * 6, 'Intercept': [0.] * 6},
attrs={'Slope': np.array([1.] * 6), 'Intercept': np.array([0.] * 6)},
dims=('_bands', '_coeffs', '_scans')),
}
return calibration
Expand All @@ -56,7 +56,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((15, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 15, 'Intercept': [0.] * 15,
'Slope': np.array([1.] * 15), 'Intercept': np.array([0.] * 15),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -68,7 +68,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 4, 'Intercept': [0.] * 4,
'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 25000],
Expand All @@ -81,7 +81,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 4, 'Intercept': [0.] * 4,
'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -94,7 +94,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((2, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 2, 'Intercept': [0.] * 2,
'Slope': np.array([1.] * 2), 'Intercept': np.array([0.] * 2),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -112,7 +112,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -123,7 +123,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -134,7 +134,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -145,7 +145,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -156,7 +156,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -167,7 +167,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -182,7 +182,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [-90, 90],
},
Expand All @@ -191,7 +191,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [-180, 180],
},
Expand All @@ -200,7 +200,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [.01] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([.01] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [0, 28000],
},
Expand All @@ -225,12 +225,12 @@ def get_test_content(self, filename, filename_info, filetype_info):
data = {}
if self.filetype_info['file_type'] == 'mersi2_l1b_1000':
data = self._get_1km_data(num_scans, rows_per_scan, num_cols)
global_attrs['/attr/TBB_Trans_Coefficient_A'] = [1.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([1.0] * 6)
global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
elif self.filetype_info['file_type'] == 'mersi2_l1b_250':
data = self._get_250m_data(num_scans, rows_per_scan, num_cols * 2)
global_attrs['/attr/TBB_Trans_Coefficient_A'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([0.0] * 6)
global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
elif self.filetype_info['file_type'] == 'mersi2_l1b_1000_geo':
data = self._get_geo_data(num_scans, rows_per_scan, num_cols)
elif self.filetype_info['file_type'] == 'mersi2_l1b_250_geo':
Expand Down

0 comments on commit 1922d97

Please sign in to comment.