pytroll · djhoese · Apr 8, 2022 · Mar 11, 2022 · Mar 11, 2022 · Mar 11, 2022
@@ -91,7 +91,7 @@ def _find_and_run_interpolation(interpolation_functions, src_resolution, dst_res
 class HDFEOSBaseFileReader(BaseFileHandler):
     """Base file handler for HDF EOS data for both L1b and L2 products."""
 
-    def __init__(self, filename, filename_info, filetype_info):
+    def __init__(self, filename, filename_info, filetype_info, **kwargs):
         """Initialize the base reader."""
         BaseFileHandler.__init__(self, filename, filename_info, filetype_info)
         try:
@@ -291,9 +291,9 @@ class HDFEOSGeoReader(HDFEOSBaseFileReader):
         'solar_zenith_angle': ('SolarZenith', 'Solar_Zenith'),
     }
 
-    def __init__(self, filename, filename_info, filetype_info):
+    def __init__(self, filename, filename_info, filetype_info, **kwargs):
         """Initialize the geographical reader."""
-        HDFEOSBaseFileReader.__init__(self, filename, filename_info, filetype_info)
+        HDFEOSBaseFileReader.__init__(self, filename, filename_info, filetype_info, **kwargs)
         self.cache = {}
 
     @staticmethod

@@ -25,11 +25,39 @@
 a pattern similar to the following one:
 
 .. parsed-literal::
+
     M[O/Y]D02[1/H/Q]KM.A[date].[time].[collection].[processing_time].hdf
 
 Other patterns where "collection" and/or "proccessing_time" are missing might also work
 (see the readers yaml file for details). Geolocation files (MOD03) are also supported.
-
+The IMAPP direct broadcast naming format is also supported with names like:
+``a1.12226.1846.1000m.hdf``.
+
+Saturation Handling
+-------------------
+
+Band 2 of the MODIS sensor is available in 250m, 500m, and 1km resolutions.
+The band data may include a special fill value to indicate when the detector
+was saturated in the 250m version of the data. When the data is aggregated to
+coarser resolutions this saturation fill value is converted to a
+"can't aggregate" fill value. By default, Satpy will replace these fill values
+with NaN to indicate they are invalid. This is typically undesired when
+generating images for the data as they appear as "holes" in bright clouds.
+To control this the keyword argument ``mask_saturated`` can be passed and set
+to ``False`` to set these two fill values to the maximum valid value.
+
+.. code-block:: python
+
+    scene = satpy.Scene(filenames=filenames,
+                        reader='modis_l1b',
+                        reader_kwargs={'mask_saturated': False})
+    scene.load(['2'])
+
+Note that the saturation fill value can appear in other bands (ex. bands 7-19)
+in addition to band 2. Also, the "can't aggregate" fill value is a generic
+"catch all" for any problems encountered when aggregating high resolution bands
+to lower resolutions. Filling this with the max valid value could replace
+non-saturated invalid pixels with valid values.
 
 Geolocation files
 -----------------
@@ -62,130 +90,164 @@ class HDFEOSBandReader(HDFEOSBaseFileReader):
            "Q": 250,
            "H": 500}
 
-    def __init__(self, filename, filename_info, filetype_info):
+    res_to_possible_variable_names = {
+        1000: ['EV_250_Aggr1km_RefSB',
+               'EV_500_Aggr1km_RefSB',
+               'EV_1KM_RefSB',
+               'EV_1KM_Emissive'],
+        500: ['EV_250_Aggr500_RefSB',
+              'EV_500_RefSB'],
+        250: ['EV_250_RefSB'],
+    }
+
+    def __init__(self, filename, filename_info, filetype_info, mask_saturated=True, **kwargs):
         """Init the file handler."""
-        HDFEOSBaseFileReader.__init__(self, filename, filename_info, filetype_info)
+        HDFEOSBaseFileReader.__init__(self, filename, filename_info, filetype_info, **kwargs)
+        self._mask_saturated = mask_saturated
 
         ds = self.metadata['INVENTORYMETADATA'][
             'COLLECTIONDESCRIPTIONCLASS']['SHORTNAME']['VALUE']
         self.resolution = self.res[ds[-3]]
 
     def get_dataset(self, key, info):
         """Read data from file and return the corresponding projectables."""
-        datadict = {
-            1000: ['EV_250_Aggr1km_RefSB',
-                   'EV_500_Aggr1km_RefSB',
-                   'EV_1KM_RefSB',
-                   'EV_1KM_Emissive'],
-            500: ['EV_250_Aggr500_RefSB',
-                  'EV_500_RefSB'],
-            250: ['EV_250_RefSB']}
-
         if self.resolution != key['resolution']:
             return
-
-        datasets = datadict[self.resolution]
+        subdata, uncertainty, var_attrs, band_index = self._get_band_data_uncertainty_attrs_index(key["name"])
+        array = xr.DataArray(from_sds(subdata, chunks=CHUNK_SIZE)[band_index, :, :],
+                             dims=['y', 'x']).astype(np.float32)
+        valid_range = var_attrs['valid_range']
+        array = self._mask_invalid_or_fill_saturated(array, valid_range)
+        array = self._mask_uncertain_pixels(array, uncertainty, band_index)
+        projectable = self._calibrate_data(key, info, array, var_attrs, band_index)
+
+        # if ((platform_name == 'Aqua' and key['name'] in ["6", "27", "36"]) or
+        #         (platform_name == 'Terra' and key['name'] in ["29"])):
+        #     height, width = projectable.shape
+        #     row_indices = projectable.mask.sum(1) == width
+        #     if row_indices.sum() != height:
+        #         projectable.mask[row_indices, :] = True
+
+        # Get the orbit number
+        # if not satscene.orbit:
+        #     mda = self.data.attributes()["CoreMetadata.0"]
+        #     orbit_idx = mda.index("ORBITNUMBER")
+        #     satscene.orbit = mda[orbit_idx + 111:orbit_idx + 116]
+
+        # Trimming out dead sensor lines (detectors) on terra:
+        # (in addition channel 27, 30, 34, 35, and 36 are nosiy)
+        # if satscene.satname == "terra":
+        #     for band in ["29"]:
+        #         if not satscene[band].is_loaded() or satscene[band].data.mask.all():
+        #             continue
+        #         width = satscene[band].data.shape[1]
+        #         height = satscene[band].data.shape[0]
+        #         indices = satscene[band].data.mask.sum(1) < width
+        #         if indices.sum() == height:
+        #             continue
+        #         satscene[band] = satscene[band].data[indices, :]
+        #         satscene[band].area = geometry.SwathDefinition(
+        #             lons=satscene[band].area.lons[indices, :],
+        #             lats=satscene[band].area.lats[indices, :])
+        self._add_satpy_metadata(key, projectable)
+        return projectable
+
+    def _get_band_data_uncertainty_attrs_index(self, band_name):
+        datasets = self.res_to_possible_variable_names[self.resolution]
         for dataset in datasets:
             subdata = self.sd.select(dataset)
             var_attrs = subdata.attributes()
-            band_names = var_attrs["band_names"].split(",")
-
-            # get the relative indices of the desired channel
             try:
-                index = band_names.index(key['name'])
+                band_index = self._get_band_index(var_attrs, band_name)
             except ValueError:
+                # can't find band in list of bands
                 continue
             uncertainty = self.sd.select(dataset + "_Uncert_Indexes")
-            array = xr.DataArray(from_sds(subdata, chunks=CHUNK_SIZE)[index, :, :],
-                                 dims=['y', 'x']).astype(np.float32)
-            valid_range = var_attrs['valid_range']
-
-            # Fill values:
-            # Data Value Meaning
-            # 65535 Fill Value (includes reflective band data at night mode
-            # and completely missing L1A scans)
-            # 65534 L1A DN is missing within a scan
-            # 65533 Detector is saturated
-            # 65532 Cannot compute zero point DN, e.g., SV is saturated
-            # 65531 Detector is dead (see comments below)
-            # 65530 RSB dn** below the minimum of the scaling range
-            # 65529 TEB radiance or RSB dn** exceeds the maximum of the
-            # scaling range
-            # 65528 Aggregation algorithm failure
-            # 65527 Rotation of Earth view Sector from nominal science
-            # collection position
-            # 65526 Calibration coefficient b1 could not be computed
-            # 65525 Subframe is dead
-            # 65524 Both sides of the PCLW electronics on simultaneously
-            # 65501 - 65523 (reserved for future use)
-            # 65500 NAD closed upper limit
-
-            array = array.where(array >= np.float32(valid_range[0]))
-            array = array.where(array <= np.float32(valid_range[1]))
-            array = array.where(from_sds(uncertainty, chunks=CHUNK_SIZE)[index, :, :] < 15)
-
-            if key['calibration'] == 'brightness_temperature':
-                projectable = calibrate_bt(array, var_attrs, index, key['name'])
-                info.setdefault('units', 'K')
-                info.setdefault('standard_name', 'toa_brightness_temperature')
-            elif key['calibration'] == 'reflectance':
-                projectable = calibrate_refl(array, var_attrs, index)
-                info.setdefault('units', '%')
-                info.setdefault('standard_name',
-                                'toa_bidirectional_reflectance')
-            elif key['calibration'] == 'radiance':
-                projectable = calibrate_radiance(array, var_attrs, index)
-                info.setdefault('units', var_attrs.get('radiance_units'))
-                info.setdefault('standard_name',
-                                'toa_outgoing_radiance_per_unit_wavelength')
-            elif key['calibration'] == 'counts':
-                projectable = calibrate_counts(array, var_attrs, index)
-                info.setdefault('units', 'counts')
-                info.setdefault('standard_name', 'counts')  # made up
-            else:
-                raise ValueError("Unknown calibration for "
-                                 "key: {}".format(key))
-            projectable.attrs = info
-
-            # if ((platform_name == 'Aqua' and key['name'] in ["6", "27", "36"]) or
-            #         (platform_name == 'Terra' and key['name'] in ["29"])):
-            #     height, width = projectable.shape
-            #     row_indices = projectable.mask.sum(1) == width
-            #     if row_indices.sum() != height:
-            #         projectable.mask[row_indices, :] = True
-
-            # Get the orbit number
-            # if not satscene.orbit:
-            #     mda = self.data.attributes()["CoreMetadata.0"]
-            #     orbit_idx = mda.index("ORBITNUMBER")
-            #     satscene.orbit = mda[orbit_idx + 111:orbit_idx + 116]
-
-            # Trimming out dead sensor lines (detectors) on terra:
-            # (in addition channel 27, 30, 34, 35, and 36 are nosiy)
-            # if satscene.satname == "terra":
-            #     for band in ["29"]:
-            #         if not satscene[band].is_loaded() or satscene[band].data.mask.all():
-            #             continue
-            #         width = satscene[band].data.shape[1]
-            #         height = satscene[band].data.shape[0]
-            #         indices = satscene[band].data.mask.sum(1) < width
-            #         if indices.sum() == height:
-            #             continue
-            #         satscene[band] = satscene[band].data[indices, :]
-            #         satscene[band].area = geometry.SwathDefinition(
-            #             lons=satscene[band].area.lons[indices, :],
-            #             lats=satscene[band].area.lats[indices, :])
-            self._add_satpy_metadata(key, projectable)
-            return projectable
+            return subdata, uncertainty, var_attrs, band_index
+
+    def _get_band_index(self, var_attrs, band_name):
+        """Get the relative indices of the desired channel."""
+        band_names = var_attrs["band_names"].split(",")
+        index = band_names.index(band_name)
+        return index
+
+    def _mask_invalid_or_fill_saturated(self, array, valid_range):
+        """Replace fill values with NaN or replace saturation with max reflectance.
+
+        If the file handler was created with ``mask_saturated`` set to
+        ``True`` then all invalid/fill values are set to NaN. If ``False``
+        then the fill values 65528 and 65533 are set to the maximum valid
+        value. These values correspond to "can't aggregate" and "saturation".
+
+        Fill values:
+
+        * 65535 Fill Value (includes reflective band data at night mode
+          and completely missing L1A scans)
+        * 65534 L1A DN is missing within a scan
+        * 65533 Detector is saturated
+        * 65532 Cannot compute zero point DN, e.g., SV is saturated
+        * 65531 Detector is dead (see comments below)
+        * 65530 RSB dn** below the minimum of the scaling range
+        * 65529 TEB radiance or RSB dn exceeds the maximum of the scaling range
+        * 65528 Aggregation algorithm failure
+        * 65527 Rotation of Earth view Sector from nominal science collection position
+        * 65526 Calibration coefficient b1 could not be computed
+        * 65525 Subframe is dead
+        * 65524 Both sides of the PCLW electronics on simultaneously
+        * 65501 - 65523 (reserved for future use)
+        * 65500 NAD closed upper limit
+
+        """
+        valid_min = np.float32(valid_range[0])
+        valid_max = np.float32(valid_range[1])
+        array = array.where(array >= valid_min)
+        if self._mask_saturated:
+            array = array.where(array <= valid_max)
+        else:
+            array = array.where((array != 65533) & (array != 65528), valid_max)
+            array = array.where(array <= valid_max)
+        return array
+
+    def _mask_uncertain_pixels(self, array, uncertainty, band_index):
+        if not self._mask_saturated:
+            return array
+        band_uncertainty = from_sds(uncertainty, chunks=CHUNK_SIZE)[band_index, :, :]
+        array = array.where(band_uncertainty < 15)
+        return array
+
+    def _calibrate_data(self, key, info, array, var_attrs, index):
+        if key['calibration'] == 'brightness_temperature':
+            projectable = calibrate_bt(array, var_attrs, index, key['name'])
+            info.setdefault('units', 'K')
+            info.setdefault('standard_name', 'toa_brightness_temperature')
+        elif key['calibration'] == 'reflectance':
+            projectable = calibrate_refl(array, var_attrs, index)
+            info.setdefault('units', '%')
+            info.setdefault('standard_name',
+                            'toa_bidirectional_reflectance')
+        elif key['calibration'] == 'radiance':
+            projectable = calibrate_radiance(array, var_attrs, index)
+            info.setdefault('units', var_attrs.get('radiance_units'))
+            info.setdefault('standard_name',
+                            'toa_outgoing_radiance_per_unit_wavelength')
+        elif key['calibration'] == 'counts':
+            projectable = calibrate_counts(array, var_attrs, index)
+            info.setdefault('units', 'counts')
+            info.setdefault('standard_name', 'counts')  # made up
+        else:
+            raise ValueError("Unknown calibration for "
+                             "key: {}".format(key))
+        projectable.attrs = info
+        return projectable
 
 
 class MixedHDFEOSReader(HDFEOSGeoReader, HDFEOSBandReader):
     """A file handler for the files that have both regular bands and geographical information in them."""
 
-    def __init__(self, filename, filename_info, filetype_info):
+    def __init__(self, filename, filename_info, filetype_info, **kwargs):
         """Init the file handler."""
-        HDFEOSGeoReader.__init__(self, filename, filename_info, filetype_info)
-        HDFEOSBandReader.__init__(self, filename, filename_info, filetype_info)
+        HDFEOSGeoReader.__init__(self, filename, filename_info, filetype_info, **kwargs)
+        HDFEOSBandReader.__init__(self, filename, filename_info, filetype_info, **kwargs)
 
     def get_dataset(self, key, info):
         """Get the dataset."""

@@ -75,9 +75,24 @@ def _generate_angle_data(resolution: int) -> np.ndarray:
 def _generate_visible_data(resolution: int, num_bands: int, dtype=np.uint16) -> np.ndarray:
     shape = _shape_for_resolution(resolution)
     data = np.zeros((num_bands, shape[0], shape[1]), dtype=dtype)
+
+    # add fill value to every band
+    data[:, -1, -1] = 65535
+
+    # add band 2 saturation and can't aggregate fill values
+    data[1, -1, -2] = 65533
+    data[1, -1, -3] = 65528
     return data
 
 
+def _generate_visible_uncertainty_data(shape: tuple) -> np.ndarray:
+    uncertainty = np.zeros(shape, dtype=np.uint8)
+    uncertainty[:, -1, -1] = 15  # fill value
+    uncertainty[:, -1, -2] = 15  # saturated
+    uncertainty[:, -1, -3] = 15  # can't aggregate
+    return uncertainty
+
+
 def _get_lonlat_variable_info(resolution: int) -> dict:
     lon_5km, lat_5km = _generate_lonlat_data(resolution)
     return {
@@ -115,6 +130,7 @@ def _get_angles_variable_info(resolution: int) -> dict:
 def _get_visible_variable_info(var_name: str, resolution: int, bands: list[str]):
     num_bands = len(bands)
     data = _generate_visible_data(resolution, len(bands))
+    uncertainty = _generate_visible_uncertainty_data(data.shape)
     dim_factor = RES_TO_REPEAT_FACTOR[resolution] * 2
     band_dim_name = f"Band_{resolution}_{num_bands}_RefSB:MODIS_SWATH_Type_L1B"
     row_dim_name = f'{dim_factor}*nscans:MODIS_SWATH_Type_L1B'
@@ -136,7 +152,7 @@ def _get_visible_variable_info(var_name: str, resolution: int, bands: list[str])
             },
         },
         var_name + '_Uncert_Indexes': {
-            'data': np.zeros(data.shape, dtype=np.uint8),
+            'data': uncertainty,
             'type': SDC.UINT8,
             'fill_value': 255,
             'attrs': {

@@ -162,3 +162,27 @@ def test_load_vis(self, modis_l1b_nasa_mod021km_file):
         assert dataset.shape == _shape_for_resolution(1000)
         assert dataset.attrs['resolution'] == 1000
         _check_shared_metadata(dataset)
+
+    @pytest.mark.parametrize("mask_saturated", [False, True])
+    def test_load_vis_saturation(self, mask_saturated, modis_l1b_nasa_mod021km_file):
+        """Test loading visible band."""
+        scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file,
+                      reader_kwargs={"mask_saturated": mask_saturated})
+        dataset_name = '2'
+        scene.load([dataset_name])
+        dataset = scene[dataset_name]
+        assert dataset.shape == _shape_for_resolution(1000)
+        assert dataset.attrs['resolution'] == 1000
+        _check_shared_metadata(dataset)
+
+        # check saturation fill values
+        data = dataset.values
+        assert np.isnan(data[-1, -1])  # normal fill value
+        if mask_saturated:
+            assert np.isnan(data[-1, -2])  # saturation
+            assert np.isnan(data[-1, -3])  # can't aggregate
+        else:
+            # test data factor/offset are 1/0
+            # albedos are converted to %
+            assert data[-1, -2] >= 32767 * 100.0  # saturation
+            assert data[-1, -3] >= 32767 * 100.0  # can't aggregate