nasa · owenlittlejohns · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · owenlittlejohns
@@ -5,7 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [vX.Y.Z] - Unreleased
+## [v1.2.0] - 2025-10-10
+
+### Added
+
+- `matches_reference_hash_file` has been added to the top-level API. This function
+  will try to infer the file type from the path of the file under comparison and
+  then use the relevant comparison functionality for HDF-5, netCDF4 or GeoTIFF
+  files.
 
 ### Changed
 

@@ -135,6 +135,38 @@ assert geotiff_matches_reference_hash_file(
 )
 ```
 
+### A single entry point for comparison
+
+For convenience, you can use the `matches_reference_hash_file` for all of the
+file types previously discussed. Each call will accept the paths to the binary
+file and JSON hash file, along with appropriate optional kwargs relevant to
+the file type.
+
+```python
+from earthdata_hashdiff import matches_reference_hash_file
+
+assert matches_reference_hash_file(
+    'path/to/netcdf/file.nc4',
+    'path/to/json/with/hashes.json',
+)
+
+assert matches_reference_hash_file(
+    'path/to/netcdf/file.nc4',
+    'path/to/json/with/hashes.json',
+    skipped_metadata_attributes={'attribute_name_one', 'attribute_name_two'},
+)
+
+assert geotiff_matches_reference_hash_file(
+    'path/to/geotiff/file.tif',
+    'path/to/json/with/hash.json',
+)
+
+assert geotiff_matches_reference_hash_file(
+    'path/to/geotiff/file.tif',
+    'path/to/json/with/hash.json',
+    skipped_metadata_tags={'tag_name_one'},
+)
+```
 ## Installing
 
 ### Using pip

@@ -473,6 +473,45 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dda44bf9-1a3e-4702-b10b-f6b401e2217a",
+   "metadata": {},
+   "source": [
+    "## A single comparison entry point\n",
+    "\n",
+    "For convenience, you can use the `matches_reference_hash_file` for all of the file types previously discussed. Each call will accept the paths to the binary file and JSON hash file, along with appropriate optional kwargs relevant to the file type."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba1196f0-1dc6-4c17-97d8-7b7b8f047c73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from earthdata_hashdiff import matches_reference_hash_file\n",
+    "\n",
+    "# GeoTIFF example\n",
+    "assert matches_reference_hash_file(\n",
+    "    ecostress_granule,\n",
+    "    f'{ecostress_granule}.json',\n",
+    ")\n",
+    "\n",
+    "# HDF-5 example\n",
+    "assert matches_reference_hash_file(\n",
+    "    gpm_3imerghh_granule_one,\n",
+    "    f'{gpm_3imerghh_granule_one}.json',\n",
+    "), 'Binary file did not match previously generated hashes.'\n",
+    "\n",
+    "# HDF-5 example with kwargs\n",
+    "assert matches_reference_hash_file(\n",
+    "    gpm_3imerghh_granule_one,\n",
+    "    f'{gpm_3imerghh_granule_one}.decode.json',\n",
+    "    skipped_variables_or_groups={'/Grid/time', '/Grid/time_bnds'},\n",
+    "), 'Binary file did not match previously generated hashes.'"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "c61a5f43-2bf2-42f6-8c39-abeef381816f",

@@ -1,4 +1,4 @@
 # These packages are required to run the documentation Jupyter notebook.
-earthdata-hashdiff ~= 1.1.0
+earthdata-hashdiff ~= 1.2.0
 notebook ~= 7.4.5
 requests ~= 2.32.4
@@ -1,3 +1,3 @@
 """Version for the package - only edit when intending to release."""
 
-version = '1.1.0'
+version = '1.2.0'
@@ -4,6 +4,7 @@
 from earthdata_hashdiff.compare import (
     geotiff_matches_reference_hash_file,
     h5_matches_reference_hash_file,
+    matches_reference_hash_file,
     nc4_matches_reference_hash_file,
 )
 from earthdata_hashdiff.generate import (
@@ -26,5 +27,6 @@
     'get_hashes_from_nc4_file',
     'geotiff_matches_reference_hash_file',
     'h5_matches_reference_hash_file',
+    'matches_reference_hash_file',
     'nc4_matches_reference_hash_file',
 ]
@@ -6,6 +6,9 @@
 """
 
 import json
+from collections.abc import Callable
+from os.path import splitext
+from typing import Literal, TypedDict, overload
 
 from earthdata_hashdiff.generate import (
     GEOTIFF_HASH_KEY,
@@ -14,6 +17,89 @@
     get_hashes_from_xarray_input,
 )
 
+HashedFileTypes = Literal['GeoTIFF', 'HDF-5', 'netCDF4']
+
+
+class XarrayParams(TypedDict):
+    """Typing for unique inputs to matches_reference_hash_file_using_xarray."""
+
+    skipped_variables_or_groups: set[str]
+    skipped_metadata_attributes: set[str]
+    xarray_kwargs: dict
+
+
+class GeoTIFFParams(TypedDict):
+    """Typing for unique inputs to geotiff_matches_reference_hash_file."""
+
+    skipped_metadata_tags: set[str]
+
+
+@overload
+def matches_reference_hash_file(
+    binary_file_path: str,
+    reference_file_path: str,
+    **kwargs: XarrayParams,
+) -> bool: ...
+
+
+@overload
+def matches_reference_hash_file(
+    binary_file_path: str,
+    reference_file_path: str,
+    **kwargs: GeoTIFFParams,
+) -> bool: ...
+
+
+def matches_reference_hash_file(
+    binary_file_path: str,
+    reference_file_path: str,
+    **kwargs: XarrayParams | GeoTIFFParams,
+) -> bool:
+    """Generate hashes for request output and compare to reference file.
+
+    Possible kwargs:
+
+    * skipped_variables_or_groups - For netCDF4 or HDF-5 files.
+    * skipped_metadata_attributes - For netCDF4 or HDF-5 files.
+    * xarray_kwargs - For netCDF4 or HDF-5 files.
+    * skipped_metadata_tags - For GeoTIFF files.
+
+    """
+    file_type_comparisons: dict[HashedFileTypes, Callable[..., bool]] = {
+        'GeoTIFF': geotiff_matches_reference_hash_file,
+        'HDF-5': h5_matches_reference_hash_file,
+        'netCDF4': nc4_matches_reference_hash_file,
+    }
+
+    file_type = guess_file_type(binary_file_path)
+
+    comparison_function = file_type_comparisons.get(file_type)
+
+    if comparison_function is None:
+        raise ValueError('file_type not recognised: {file_type}')
+
+    return comparison_function(binary_file_path, reference_file_path, **kwargs)
+
+
+def guess_file_type(file_path: str) -> HashedFileTypes:
+    """Return a file type guessed based on the extension for the file.
+
+    If the extension is an entirely unrecognised one, a `ValueError` is raised.
+
+    """
+    file_extension = splitext(file_path)[-1].lower()
+
+    if file_extension in ['.tif', '.tiff']:
+        file_type: HashedFileTypes = 'GeoTIFF'
+    elif file_extension in ['.h5', '.hdf', '.hdf5']:
+        file_type = 'HDF-5'
+    elif file_extension in ['.nc', '.nc4']:
+        file_type = 'netCDF4'
+    else:
+        raise ValueError(f'File extension not recognised: "{file_extension}"')
+
+    return file_type
+
 
 def matches_reference_hash_file_using_xarray(
     binary_file_path: str,