Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix HDF5 utility file handler not decoding byte arrays consistently #1422

Merged
merged 5 commits into from Nov 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 10 additions & 6 deletions satpy/readers/hdf5_utils.py
Expand Up @@ -38,6 +38,7 @@ def __init__(self, filename, filename_info, filetype_info):
super(HDF5FileHandler, self).__init__(
filename, filename_info, filetype_info)
self.file_content = {}
self._attrs_cache = {}

try:
file_handle = h5py.File(self.filename, 'r')
Expand All @@ -51,20 +52,22 @@ def __init__(self, filename, filename_info, filetype_info):
file_handle.close()

def _collect_attrs(self, name, attrs):
attrs_cache = self._attrs_cache.setdefault(name, {})
for key, value in attrs.items():
value = np.squeeze(value)
fc_key = "{}/attr/{}".format(name, key)
try:
self.file_content[fc_key] = np2str(value)
value = np2str(value)
except ValueError:
self.file_content[fc_key] = value
# use the original value
pass
except AttributeError:
# A HDF5 reference ?
value = self.get_reference(name, key)
if value is None:
LOG.warning("Value cannot be converted - skip setting attribute %s", fc_key)
else:
self.file_content[fc_key] = value
continue
self.file_content[fc_key] = attrs_cache[key] = value

def get_reference(self, name, key):
"""Get reference."""
Expand Down Expand Up @@ -94,9 +97,10 @@ def __getitem__(self, key):
# these datasets are closed and inaccessible when the file is closed, need to reopen
dset = h5py.File(self.filename, 'r')[key]
dset_data = da.from_array(dset, chunks=CHUNK_SIZE)
attrs = self._attrs_cache.get(key, dset.attrs)
if dset.ndim == 2:
return xr.DataArray(dset_data, dims=['y', 'x'], attrs=dset.attrs)
return xr.DataArray(dset_data, attrs=dset.attrs)
return xr.DataArray(dset_data, dims=['y', 'x'], attrs=attrs)
return xr.DataArray(dset_data, attrs=attrs)

return val

Expand Down
14 changes: 4 additions & 10 deletions satpy/readers/hy2_scat_l2b_h5.py
Expand Up @@ -18,8 +18,6 @@

import numpy as np
import xarray as xr
import dask.array as da
from satpy import CHUNK_SIZE
from datetime import datetime

from satpy.readers.hdf5_utils import HDF5FileHandler
Expand Down Expand Up @@ -92,16 +90,12 @@ def get_dataset(self, key, info):
dims = ['y', 'x']
if self[key['name']].ndim == 3:
dims = ['y', 'x', 'selection']
data = self[key['name']]
if key['name'] in 'wvc_row_time':
data = xr.DataArray(da.from_array(self[key['name']][:]),
attrs={'fill_value': self[key['name']].attrs['fill_value']},
name=key['name'],
dims=['y', ])
data = data.rename({data.dims[0]: 'y'})
else:
data = xr.DataArray(da.from_array(self[key['name']][:],
chunks=CHUNK_SIZE),
name=key['name'], dims=dims)

dim_map = {curr_dim: new_dim for curr_dim, new_dim in zip(data.dims, dims)}
data = data.rename(dim_map)
data = self._mask_data(key['name'], data)
data = self._scale_data(key['name'], data)

Expand Down
18 changes: 11 additions & 7 deletions satpy/readers/mersi2_l1b.py
Expand Up @@ -61,18 +61,22 @@ def sensor_name(self):
}.get(file_sensor, file_sensor)
return sensor

def _get_single_slope_intercept(self, slope, intercept, cal_index):
try:
# convert scalar arrays to scalar
return slope.item(), intercept.item()
except ValueError:
# numpy array but has more than one element
return slope[cal_index], intercept[cal_index]
return slope, intercept

def _get_coefficients(self, cal_key, cal_index):
coeffs = self[cal_key][cal_index]
slope = coeffs.attrs.pop('Slope', None)
intercept = coeffs.attrs.pop('Intercept', None)
if slope is not None:
# sometimes slope has multiple elements
if hasattr(slope, '__len__') and len(slope) == 1:
slope = slope[0]
intercept = intercept[0]
elif hasattr(slope, '__len__'):
slope = slope[cal_index]
intercept = intercept[cal_index]
slope, intercept = self._get_single_slope_intercept(
slope, intercept, cal_index)
coeffs = coeffs * slope + intercept
return coeffs

Expand Down
24 changes: 18 additions & 6 deletions satpy/tests/reader_tests/test_hdf5_utils.py
Expand Up @@ -91,18 +91,22 @@ def setUp(self):
# Add attributes
# shows up as a scalar array of bytes (shape=(), size=1)
h.attrs['test_attr_str'] = 'test_string'
h.attrs['test_attr_byte'] = b'test_byte'
h.attrs['test_attr_int'] = 0
h.attrs['test_attr_float'] = 1.2
# shows up as a numpy bytes object
h.attrs['test_attr_str_arr'] = np.array(b"test_string2")
g1.attrs['test_attr_str'] = 'test_string'
g1.attrs['test_attr_byte'] = b'test_byte'
g1.attrs['test_attr_int'] = 0
g1.attrs['test_attr_float'] = 1.2
for d in [ds1_f, ds1_i, ds2_f, ds2_i]:
d.attrs['test_attr_str'] = 'test_string'
d.attrs['test_attr_byte'] = b'test_byte'
d.attrs['test_attr_int'] = 0
d.attrs['test_attr_float'] = 1.2
d.attrs['test_ref'] = d.ref
self.var_attrs = list(d.attrs.keys())

h.close()

Expand All @@ -116,14 +120,22 @@ def test_all_basic(self):
import xarray as xr
file_handler = HDF5FileHandler('test.h5', {}, {})

for ds in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
self.assertEqual(file_handler[ds].dtype, np.float32 if ds.endswith('f') else np.int32)
self.assertTupleEqual(file_handler[ds + '/shape'], (10, 100))
self.assertEqual(file_handler[ds + '/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler[ds + '/attr/test_attr_int'], 0)
self.assertEqual(file_handler[ds + '/attr/test_attr_float'], 1.2)
for ds_name in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
ds = file_handler[ds_name]
attrs = ds.attrs
self.assertEqual(ds.dtype, np.float32 if ds_name.endswith('f') else np.int32)
self.assertTupleEqual(file_handler[ds_name + '/shape'], (10, 100))
self.assertEqual(attrs['test_attr_str'], 'test_string')
self.assertEqual(attrs['test_attr_byte'], 'test_byte')
self.assertEqual(attrs['test_attr_int'], 0)
self.assertEqual(attrs['test_attr_float'], 1.2)
self.assertEqual(file_handler[ds_name + '/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler[ds_name + '/attr/test_attr_byte'], 'test_byte')
self.assertEqual(file_handler[ds_name + '/attr/test_attr_int'], 0)
self.assertEqual(file_handler[ds_name + '/attr/test_attr_float'], 1.2)

self.assertEqual(file_handler['/attr/test_attr_str'], 'test_string')
self.assertEqual(file_handler['/attr/test_attr_byte'], 'test_byte')
self.assertEqual(file_handler['/attr/test_attr_str_arr'], 'test_string2')
self.assertEqual(file_handler['/attr/test_attr_int'], 0)
self.assertEqual(file_handler['/attr/test_attr_float'], 1.2)
Expand Down
38 changes: 19 additions & 19 deletions satpy/tests/reader_tests/test_mersi2_l1b.py
Expand Up @@ -39,12 +39,12 @@ def _get_calibration(self, num_scans, rows_per_scan):
'Calibration/VIS_Cal_Coeff':
xr.DataArray(
da.ones((19, 3), chunks=1024),
attrs={'Slope': [1.] * 19, 'Intercept': [0.] * 19},
attrs={'Slope': np.array([1.] * 19), 'Intercept': np.array([0.] * 19)},
dims=('_bands', '_coeffs')),
'Calibration/IR_Cal_Coeff':
xr.DataArray(
da.ones((6, 4, num_scans), chunks=1024),
attrs={'Slope': [1.] * 6, 'Intercept': [0.] * 6},
attrs={'Slope': np.array([1.] * 6), 'Intercept': np.array([0.] * 6)},
dims=('_bands', '_coeffs', '_scans')),
}
return calibration
Expand All @@ -56,7 +56,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((15, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 15, 'Intercept': [0.] * 15,
'Slope': np.array([1.] * 15), 'Intercept': np.array([0.] * 15),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -68,7 +68,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 4, 'Intercept': [0.] * 4,
'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 25000],
Expand All @@ -81,7 +81,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 4, 'Intercept': [0.] * 4,
'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -94,7 +94,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
da.ones((2, num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 2, 'Intercept': [0.] * 2,
'Slope': np.array([1.] * 2), 'Intercept': np.array([0.] * 2),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -112,7 +112,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -123,7 +123,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -134,7 +134,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -145,7 +145,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'NO',
'valid_range': [0, 4095],
Expand All @@ -156,7 +156,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -167,7 +167,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
dtype=np.uint16),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'FillValue': 65535,
'units': 'mW/ (m2 cm-1 sr)',
'valid_range': [0, 4095],
Expand All @@ -182,7 +182,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [-90, 90],
},
Expand All @@ -191,7 +191,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [1.] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [-180, 180],
},
Expand All @@ -200,7 +200,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
xr.DataArray(
da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
attrs={
'Slope': [.01] * 1, 'Intercept': [0.] * 1,
'Slope': np.array([.01] * 1), 'Intercept': np.array([0.] * 1),
'units': 'degree',
'valid_range': [0, 28000],
},
Expand All @@ -225,12 +225,12 @@ def get_test_content(self, filename, filename_info, filetype_info):
data = {}
if self.filetype_info['file_type'] == 'mersi2_l1b_1000':
data = self._get_1km_data(num_scans, rows_per_scan, num_cols)
global_attrs['/attr/TBB_Trans_Coefficient_A'] = [1.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([1.0] * 6)
global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
elif self.filetype_info['file_type'] == 'mersi2_l1b_250':
data = self._get_250m_data(num_scans, rows_per_scan, num_cols * 2)
global_attrs['/attr/TBB_Trans_Coefficient_A'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([0.0] * 6)
global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
elif self.filetype_info['file_type'] == 'mersi2_l1b_1000_geo':
data = self._get_geo_data(num_scans, rows_per_scan, num_cols)
elif self.filetype_info['file_type'] == 'mersi2_l1b_250_geo':
Expand Down