pytroll · mraspaud · Nov 16, 2020 · Nov 4, 2020 · Nov 4, 2020 · Nov 4, 2020
diff --git a/satpy/readers/hdf5_utils.py b/satpy/readers/hdf5_utils.py
@@ -38,6 +38,7 @@ def __init__(self, filename, filename_info, filetype_info):
         super(HDF5FileHandler, self).__init__(
             filename, filename_info, filetype_info)
         self.file_content = {}
+        self._attrs_cache = {}
 
         try:
             file_handle = h5py.File(self.filename, 'r')
@@ -51,20 +52,22 @@ def __init__(self, filename, filename_info, filetype_info):
         file_handle.close()
 
     def _collect_attrs(self, name, attrs):
+        attrs_cache = self._attrs_cache.setdefault(name, {})
         for key, value in attrs.items():
             value = np.squeeze(value)
             fc_key = "{}/attr/{}".format(name, key)
             try:
-                self.file_content[fc_key] = np2str(value)
+                value = np2str(value)
             except ValueError:
-                self.file_content[fc_key] = value
+                # use the original value
+                pass
             except AttributeError:
                 # A HDF5 reference ?
                 value = self.get_reference(name, key)
                 if value is None:
                     LOG.warning("Value cannot be converted - skip setting attribute %s", fc_key)
-                else:
-                    self.file_content[fc_key] = value
+                    continue
+            self.file_content[fc_key] = attrs_cache[key] = value
 
     def get_reference(self, name, key):
         """Get reference."""
@@ -94,9 +97,10 @@ def __getitem__(self, key):
             # these datasets are closed and inaccessible when the file is closed, need to reopen
             dset = h5py.File(self.filename, 'r')[key]
             dset_data = da.from_array(dset, chunks=CHUNK_SIZE)
+            attrs = self._attrs_cache.get(key, dset.attrs)
             if dset.ndim == 2:
-                return xr.DataArray(dset_data, dims=['y', 'x'], attrs=dset.attrs)
-            return xr.DataArray(dset_data, attrs=dset.attrs)
+                return xr.DataArray(dset_data, dims=['y', 'x'], attrs=attrs)
+            return xr.DataArray(dset_data, attrs=attrs)
 
         return val
 

diff --git a/satpy/readers/hy2_scat_l2b_h5.py b/satpy/readers/hy2_scat_l2b_h5.py
@@ -18,8 +18,6 @@
 
 import numpy as np
 import xarray as xr
-import dask.array as da
-from satpy import CHUNK_SIZE
 from datetime import datetime
 
 from satpy.readers.hdf5_utils import HDF5FileHandler
@@ -92,16 +90,12 @@ def get_dataset(self, key, info):
         dims = ['y', 'x']
         if self[key['name']].ndim == 3:
             dims = ['y', 'x', 'selection']
+        data = self[key['name']]
         if key['name'] in 'wvc_row_time':
-            data = xr.DataArray(da.from_array(self[key['name']][:]),
-                                attrs={'fill_value': self[key['name']].attrs['fill_value']},
-                                name=key['name'],
-                                dims=['y', ])
+            data = data.rename({data.dims[0]: 'y'})
         else:
-            data = xr.DataArray(da.from_array(self[key['name']][:],
-                                              chunks=CHUNK_SIZE),
-                                name=key['name'], dims=dims)
-
+            dim_map = {curr_dim: new_dim for curr_dim, new_dim in zip(data.dims, dims)}
+            data = data.rename(dim_map)
             data = self._mask_data(key['name'], data)
             data = self._scale_data(key['name'], data)
 

diff --git a/satpy/readers/mersi2_l1b.py b/satpy/readers/mersi2_l1b.py
@@ -61,18 +61,22 @@ def sensor_name(self):
         }.get(file_sensor, file_sensor)
         return sensor
 
+    def _get_single_slope_intercept(self, slope, intercept, cal_index):
+        try:
+            # convert scalar arrays to scalar
+            return slope.item(), intercept.item()
+        except ValueError:
+            # numpy array but has more than one element
+            return slope[cal_index], intercept[cal_index]
+        return slope, intercept
+
     def _get_coefficients(self, cal_key, cal_index):
         coeffs = self[cal_key][cal_index]
         slope = coeffs.attrs.pop('Slope', None)
         intercept = coeffs.attrs.pop('Intercept', None)
         if slope is not None:
-            # sometimes slope has multiple elements
-            if hasattr(slope, '__len__') and len(slope) == 1:
-                slope = slope[0]
-                intercept = intercept[0]
-            elif hasattr(slope, '__len__'):
-                slope = slope[cal_index]
-                intercept = intercept[cal_index]
+            slope, intercept = self._get_single_slope_intercept(
+                slope, intercept, cal_index)
             coeffs = coeffs * slope + intercept
         return coeffs
 

diff --git a/satpy/tests/reader_tests/test_hdf5_utils.py b/satpy/tests/reader_tests/test_hdf5_utils.py
@@ -91,18 +91,22 @@ def setUp(self):
         # Add attributes
         # shows up as a scalar array of bytes (shape=(), size=1)
         h.attrs['test_attr_str'] = 'test_string'
+        h.attrs['test_attr_byte'] = b'test_byte'
         h.attrs['test_attr_int'] = 0
         h.attrs['test_attr_float'] = 1.2
         # shows up as a numpy bytes object
         h.attrs['test_attr_str_arr'] = np.array(b"test_string2")
         g1.attrs['test_attr_str'] = 'test_string'
+        g1.attrs['test_attr_byte'] = b'test_byte'
         g1.attrs['test_attr_int'] = 0
         g1.attrs['test_attr_float'] = 1.2
         for d in [ds1_f, ds1_i, ds2_f, ds2_i]:
             d.attrs['test_attr_str'] = 'test_string'
+            d.attrs['test_attr_byte'] = b'test_byte'
             d.attrs['test_attr_int'] = 0
             d.attrs['test_attr_float'] = 1.2
             d.attrs['test_ref'] = d.ref
+        self.var_attrs = list(d.attrs.keys())
 
         h.close()
 
@@ -116,14 +120,22 @@ def test_all_basic(self):
         import xarray as xr
         file_handler = HDF5FileHandler('test.h5', {}, {})
 
-        for ds in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
-            self.assertEqual(file_handler[ds].dtype, np.float32 if ds.endswith('f') else np.int32)
-            self.assertTupleEqual(file_handler[ds + '/shape'], (10, 100))
-            self.assertEqual(file_handler[ds + '/attr/test_attr_str'], 'test_string')
-            self.assertEqual(file_handler[ds + '/attr/test_attr_int'], 0)
-            self.assertEqual(file_handler[ds + '/attr/test_attr_float'], 1.2)
+        for ds_name in ('test_group/ds1_f', 'test_group/ds1_i', 'ds2_f', 'ds2_i'):
+            ds = file_handler[ds_name]
+            attrs = ds.attrs
+            self.assertEqual(ds.dtype, np.float32 if ds_name.endswith('f') else np.int32)
+            self.assertTupleEqual(file_handler[ds_name + '/shape'], (10, 100))
+            self.assertEqual(attrs['test_attr_str'], 'test_string')
+            self.assertEqual(attrs['test_attr_byte'], 'test_byte')
+            self.assertEqual(attrs['test_attr_int'], 0)
+            self.assertEqual(attrs['test_attr_float'], 1.2)
+            self.assertEqual(file_handler[ds_name + '/attr/test_attr_str'], 'test_string')
+            self.assertEqual(file_handler[ds_name + '/attr/test_attr_byte'], 'test_byte')
+            self.assertEqual(file_handler[ds_name + '/attr/test_attr_int'], 0)
+            self.assertEqual(file_handler[ds_name + '/attr/test_attr_float'], 1.2)
 
         self.assertEqual(file_handler['/attr/test_attr_str'], 'test_string')
+        self.assertEqual(file_handler['/attr/test_attr_byte'], 'test_byte')
         self.assertEqual(file_handler['/attr/test_attr_str_arr'], 'test_string2')
         self.assertEqual(file_handler['/attr/test_attr_int'], 0)
         self.assertEqual(file_handler['/attr/test_attr_float'], 1.2)

diff --git a/satpy/tests/reader_tests/test_mersi2_l1b.py b/satpy/tests/reader_tests/test_mersi2_l1b.py
@@ -39,12 +39,12 @@ def _get_calibration(self, num_scans, rows_per_scan):
             'Calibration/VIS_Cal_Coeff':
                 xr.DataArray(
                     da.ones((19, 3), chunks=1024),
-                    attrs={'Slope': [1.] * 19, 'Intercept': [0.] * 19},
+                    attrs={'Slope': np.array([1.] * 19), 'Intercept': np.array([0.] * 19)},
                     dims=('_bands', '_coeffs')),
             'Calibration/IR_Cal_Coeff':
                 xr.DataArray(
                     da.ones((6, 4, num_scans), chunks=1024),
-                    attrs={'Slope': [1.] * 6, 'Intercept': [0.] * 6},
+                    attrs={'Slope': np.array([1.] * 6), 'Intercept': np.array([0.] * 6)},
                     dims=('_bands', '_coeffs', '_scans')),
         }
         return calibration
@@ -56,7 +56,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((15, num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 15, 'Intercept': [0.] * 15,
+                        'Slope': np.array([1.] * 15), 'Intercept': np.array([0.] * 15),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -68,7 +68,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 4, 'Intercept': [0.] * 4,
+                        'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
                         'FillValue': 65535,
                         'units': 'mW/ (m2 cm-1 sr)',
                         'valid_range': [0, 25000],
@@ -81,7 +81,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((4, num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 4, 'Intercept': [0.] * 4,
+                        'Slope': np.array([1.] * 4), 'Intercept': np.array([0.] * 4),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -94,7 +94,7 @@ def _get_1km_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((2, num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 2, 'Intercept': [0.] * 2,
+                        'Slope': np.array([1.] * 2), 'Intercept': np.array([0.] * 2),
                         'FillValue': 65535,
                         'units': 'mW/ (m2 cm-1 sr)',
                         'valid_range': [0, 4095],
@@ -112,7 +112,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -123,7 +123,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -134,7 +134,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -145,7 +145,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'NO',
                         'valid_range': [0, 4095],
@@ -156,7 +156,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'mW/ (m2 cm-1 sr)',
                         'valid_range': [0, 4095],
@@ -167,7 +167,7 @@ def _get_250m_data(self, num_scans, rows_per_scan, num_cols):
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024,
                             dtype=np.uint16),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'FillValue': 65535,
                         'units': 'mW/ (m2 cm-1 sr)',
                         'valid_range': [0, 4095],
@@ -182,7 +182,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
                 xr.DataArray(
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'units': 'degree',
                         'valid_range': [-90, 90],
                     },
@@ -191,7 +191,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
                 xr.DataArray(
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
                     attrs={
-                        'Slope': [1.] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([1.] * 1), 'Intercept': np.array([0.] * 1),
                         'units': 'degree',
                         'valid_range': [-180, 180],
                     },
@@ -200,7 +200,7 @@ def _get_geo_data(self, num_scans, rows_per_scan, num_cols, prefix='Geolocation/
                 xr.DataArray(
                     da.ones((num_scans * rows_per_scan, num_cols), chunks=1024),
                     attrs={
-                        'Slope': [.01] * 1, 'Intercept': [0.] * 1,
+                        'Slope': np.array([.01] * 1), 'Intercept': np.array([0.] * 1),
                         'units': 'degree',
                         'valid_range': [0, 28000],
                     },
@@ -225,12 +225,12 @@ def get_test_content(self, filename, filename_info, filetype_info):
         data = {}
         if self.filetype_info['file_type'] == 'mersi2_l1b_1000':
             data = self._get_1km_data(num_scans, rows_per_scan, num_cols)
-            global_attrs['/attr/TBB_Trans_Coefficient_A'] = [1.0] * 6
-            global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
+            global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([1.0] * 6)
+            global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
         elif self.filetype_info['file_type'] == 'mersi2_l1b_250':
             data = self._get_250m_data(num_scans, rows_per_scan, num_cols * 2)
-            global_attrs['/attr/TBB_Trans_Coefficient_A'] = [0.0] * 6
-            global_attrs['/attr/TBB_Trans_Coefficient_B'] = [0.0] * 6
+            global_attrs['/attr/TBB_Trans_Coefficient_A'] = np.array([0.0] * 6)
+            global_attrs['/attr/TBB_Trans_Coefficient_B'] = np.array([0.0] * 6)
         elif self.filetype_info['file_type'] == 'mersi2_l1b_1000_geo':
             data = self._get_geo_data(num_scans, rows_per_scan, num_cols)
         elif self.filetype_info['file_type'] == 'mersi2_l1b_250_geo':