Skip to content

Commit

Permalink
Fix-uniqueness-of-hdf5-based-dask-arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
mraspaud committed Jun 13, 2024
1 parent 6ed8698 commit b31209e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
12 changes: 11 additions & 1 deletion satpy/readers/hdf5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Helpers for reading hdf5-based files."""

import hashlib
import logging
import os

import dask.array as da
import h5py
Expand Down Expand Up @@ -102,7 +104,7 @@ def __getitem__(self, key):
# these datasets are closed and inaccessible when the file is closed, need to reopen
f_obj = open_file_or_filename(self.filename)
dset = h5py.File(f_obj, "r")[key]
dset_data = da.from_array(dset, chunks=CHUNK_SIZE)
dset_data = from_h5_array(dset)
attrs = self._attrs_cache.get(key, dset.attrs)
if dset.ndim == 2:
return xr.DataArray(dset_data, dims=["y", "x"], attrs=attrs)
Expand All @@ -120,3 +122,11 @@ def get(self, item, default=None):
return self[item]
else:
return default


def from_h5_array(h5dset):
"""Create a dask array from an h5py dataset, ensuring uniqueness of the dask array name."""
name_str = os.fspath(h5dset.file.filename) + "-" + h5dset.name
name = hashlib.md5(name_str.encode(), usedforsecurity=False).hexdigest()

Check warning on line 130 in satpy/readers/hdf5_utils.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/readers/hdf5_utils.py#L130

Use of insecure MD2, MD4, MD5, or SHA1 hash function. (B303)
dset_data = da.from_array(h5dset, chunks=CHUNK_SIZE, name=name)
return dset_data
9 changes: 9 additions & 0 deletions satpy/tests/reader_tests/test_hdf5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,12 @@ def test_all_basic(self):
assert "fake_ds" not in file_handler

assert isinstance(file_handler["ds2_f/attr/test_ref"], np.ndarray)

def test_array_name_uniqueness(self):
"""Test the dask array generated from an hdf5 dataset stay constant and unique."""
from satpy.readers.hdf5_utils import HDF5FileHandler
file_handler = HDF5FileHandler("test.h5", {}, {})

dsname = "test_group/ds1_f"

assert file_handler[dsname].data.name == file_handler[dsname].data.name

0 comments on commit b31209e

Please sign in to comment.