From 76f94f981d6f73c3cd5f28e825763212da4db423 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 10 Jul 2022 21:47:10 -0500 Subject: [PATCH 1/3] Fix MODIS readers applying add_offset incorrectly --- satpy/readers/hdfeos_base.py | 4 +- satpy/readers/modis_l2.py | 44 ++++++++++++++------- satpy/tests/reader_tests/_modis_fixtures.py | 35 ++++++++-------- satpy/tests/reader_tests/test_modis_l1b.py | 2 + satpy/tests/reader_tests/test_modis_l2.py | 21 +++++++--- 5 files changed, 69 insertions(+), 37 deletions(-) diff --git a/satpy/readers/hdfeos_base.py b/satpy/readers/hdfeos_base.py index cd47daa71f..f6a4bad566 100644 --- a/satpy/readers/hdfeos_base.py +++ b/satpy/readers/hdfeos_base.py @@ -231,9 +231,9 @@ def _scale_and_mask_data_array(self, data, is_category=False): # don't scale category products, even though scale_factor may equal 1 # we still need to convert integers to floats if scale_factor is not None and not is_category: - data = data * np.float32(scale_factor) if add_offset is not None and add_offset != 0: - data = data + add_offset + data = data - add_offset + data = data * np.float32(scale_factor) if good_mask is not None: data = data.where(good_mask, new_fill) diff --git a/satpy/readers/modis_l2.py b/satpy/readers/modis_l2.py index dd93da0806..2b595642cc 100644 --- a/satpy/readers/modis_l2.py +++ b/satpy/readers/modis_l2.py @@ -46,6 +46,7 @@ """ import logging +import dask.array as da import numpy as np import xarray as xr @@ -188,35 +189,50 @@ def _mask_with_quality_assurance_if_needed(self, dataset, dataset_info, dataset_ in zip(dataset.shape, quality_assurance.shape)] quality_assurance = np.tile(quality_assurance, duplication_factor) # Replace unassured data by NaN value - dataset[np.where(quality_assurance == 0)] = dataset.attrs["_FillValue"] + # dataset[np.where(quality_assurance == 0)] = dataset.attrs["_FillValue"] + dataset = dataset.where(quality_assurance != 0, dataset.attrs["_FillValue"]) return dataset def _extract_byte_mask(dataset, byte_information, bit_start, bit_count): + attrs = dataset.attrs.copy() + if isinstance(byte_information, int): # Only one byte: select the byte information byte_dataset = dataset[byte_information, :, :] + dataset = _bits_strip(bit_start, bit_count, byte_dataset) elif isinstance(byte_information, (list, tuple)) and len(byte_information) == 2: # Two bytes: recombine the two bytes - dataset_a = dataset[byte_information[0], :, :] - dataset_b = dataset[byte_information[1], :, :] - dataset_a = np.uint16(dataset_a) - dataset_a = np.left_shift(dataset_a, 8) # dataset_a << 8 - byte_dataset = np.bitwise_or(dataset_a, dataset_b).astype(np.uint16) - shape = byte_dataset.shape - # We replicate the concatenated byte with the right shape - byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1) - # All bits carry information, we update bit_start consequently - bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4)) - bit_start = np.tile(bit_start, (shape[0], shape[1])) + byte_mask = da.map_blocks( + _extract_two_byte_mask, + dataset.data[byte_information[0]], + dataset.data[byte_information[1]], + bit_start=bit_start, + bit_count=bit_count, + dtype=np.uint16, + meta=np.array((), dtype=np.uint16), + chunks=tuple(tuple(chunk_size * 4 for chunk_size in dim_chunks) for dim_chunks in dataset.chunks[1:]), + ) + dataset = xr.DataArray(byte_mask, dims=dataset.dims[1:]) # Compute the final bit mask - attrs = dataset.attrs.copy() - dataset = _bits_strip(bit_start, bit_count, byte_dataset) dataset.attrs = attrs return dataset +def _extract_two_byte_mask(data_a: np.ndarray, data_b: np.ndarray, bit_start: int, bit_count: int) -> np.ndarray: + data_a = data_a.astype(np.uint16, copy=False) + data_a = np.left_shift(data_a, 8) # dataset_a << 8 + byte_dataset = np.bitwise_or(data_a, data_b).astype(np.uint16) + shape = byte_dataset.shape + # We replicate the concatenated byte with the right shape + byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1) + # All bits carry information, we update bit_start consequently + bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4)) + bit_start = np.tile(bit_start, (shape[0], shape[1])) + return _bits_strip(bit_start, bit_count, byte_dataset) + + def _bits_strip(bit_start, bit_count, value): """Extract specified bit from bit representation of integer value. diff --git a/satpy/tests/reader_tests/_modis_fixtures.py b/satpy/tests/reader_tests/_modis_fixtures.py index ab09635d45..34cb6a7aab 100644 --- a/satpy/tests/reader_tests/_modis_fixtures.py +++ b/satpy/tests/reader_tests/_modis_fixtures.py @@ -36,7 +36,8 @@ AVAILABLE_QKM_PRODUCT_NAMES = ['1', '2'] SCAN_LEN_5KM = 6 # 3 scans of 5km data SCAN_WIDTH_5KM = 270 -SCALE_FACTOR = 1 +SCALE_FACTOR = 0.5 +ADD_OFFSET = -0.5 RES_TO_REPEAT_FACTOR = { 250: 20, 500: 10, @@ -74,7 +75,7 @@ def _generate_angle_data(resolution: int) -> np.ndarray: def _generate_visible_data(resolution: int, num_bands: int, dtype=np.uint16) -> np.ndarray: shape = _shape_for_resolution(resolution) - data = np.zeros((num_bands, shape[0], shape[1]), dtype=dtype) + data = np.ones((num_bands, shape[0], shape[1]), dtype=dtype) # add fill value to every band data[:, -1, -1] = 65535 @@ -118,7 +119,8 @@ def _get_angles_variable_info(resolution: int) -> dict: 'dim_labels': [ f'{dim_factor}*nscans:MODIS_SWATH_Type_L1B', '1KM_geo_dim:MODIS_SWATH_Type_L1B'], - 'scale_factor': 0.01 + 'scale_factor': 0.01, + 'add_offset': -0.01, }, } angles_info = {} @@ -146,8 +148,8 @@ def _get_visible_variable_info(var_name: str, resolution: int, bands: list[str]) row_dim_name, col_dim_name], 'valid_range': (0, 32767), - 'reflectance_scales': (1,) * num_bands, - 'reflectance_offsets': (0,) * num_bands, + 'reflectance_scales': (2.0,) * num_bands, + 'reflectance_offsets': (-0.5,) * num_bands, 'band_names': ",".join(bands), }, }, @@ -264,6 +266,7 @@ def _add_variable_to_file(h, var_name, var_info): dim_count += 1 v.setfillvalue(var_info['fill_value']) v.scale_factor = var_info['attrs'].get('scale_factor', SCALE_FACTOR) + v.add_offset = var_info['attrs'].get('add_offset', ADD_OFFSET) for attr_key, attr_val in var_info['attrs'].items(): if attr_key == 'dim_labels': continue @@ -405,7 +408,7 @@ def modis_l1b_nasa_1km_mod03_files(modis_l1b_nasa_mod021km_file, modis_l1b_nasa_ def _get_basic_variable_info(var_name: str, resolution: int) -> dict: shape = _shape_for_resolution(resolution) - data = np.zeros((shape[0], shape[1]), dtype=np.uint16) + data = np.ones((shape[0], shape[1]), dtype=np.uint16) row_dim_name = f'Cell_Along_Swath_{resolution}m:modl2' col_dim_name = f'Cell_Across_Swath_{resolution}m:modl2' return { @@ -418,8 +421,8 @@ def _get_basic_variable_info(var_name: str, resolution: int) -> dict: 'dim_labels': [row_dim_name, col_dim_name], 'valid_range': (0, 32767), - 'scale_factor': 1., - 'add_offset': 0., + 'scale_factor': 2.0, + 'add_offset': -1.0, }, }, } @@ -457,8 +460,8 @@ def _get_cloud_mask_variable_info(var_name: str, resolution: int) -> dict: col_dim_name, 'Quality_Dimension:mod35'], 'valid_range': (0, -1), - 'scale_factor': 1., - 'add_offset': 0., + 'scale_factor': 2., + 'add_offset': -0.5, }, }, } @@ -479,8 +482,8 @@ def _get_mask_byte1_variable_info() -> dict: 'dim_labels': [row_dim_name, col_dim_name], 'valid_range': (0, 4), - 'scale_factor': 1., - 'add_offset': 0., + 'scale_factor': 2, + 'add_offset': -1, }, }, @@ -493,8 +496,8 @@ def _get_mask_byte1_variable_info() -> dict: 'dim_labels': [row_dim_name, col_dim_name], 'valid_range': (0, 4), - 'scale_factor': 1., - 'add_offset': 0., + 'scale_factor': 2, + 'add_offset': -1, }, }, "MODIS_Snow_Ice_Flag": { @@ -506,8 +509,8 @@ def _get_mask_byte1_variable_info() -> dict: 'dim_labels': [row_dim_name, col_dim_name], 'valid_range': (0, 2), - 'scale_factor': 1., - 'add_offset': 0., + 'scale_factor': 2, + 'add_offset': -1, }, }, } diff --git a/satpy/tests/reader_tests/test_modis_l1b.py b/satpy/tests/reader_tests/test_modis_l1b.py index 245eb91395..c91c68336a 100644 --- a/satpy/tests/reader_tests/test_modis_l1b.py +++ b/satpy/tests/reader_tests/test_modis_l1b.py @@ -159,6 +159,7 @@ def test_load_vis(self, modis_l1b_nasa_mod021km_file): dataset_name = '1' scene.load([dataset_name]) dataset = scene[dataset_name] + assert dataset[0, 0] == 300.0 assert dataset.shape == _shape_for_resolution(1000) assert dataset.attrs['resolution'] == 1000 _check_shared_metadata(dataset) @@ -177,6 +178,7 @@ def test_load_vis_saturation(self, mask_saturated, modis_l1b_nasa_mod021km_file) # check saturation fill values data = dataset.values + assert dataset[0, 0] == 300.0 assert np.isnan(data[-1, -1]) # normal fill value if mask_saturated: assert np.isnan(data[-1, -2]) # saturation diff --git a/satpy/tests/reader_tests/test_modis_l2.py b/satpy/tests/reader_tests/test_modis_l2.py index d8acf7caca..848bd1bf05 100644 --- a/satpy/tests/reader_tests/test_modis_l2.py +++ b/satpy/tests/reader_tests/test_modis_l2.py @@ -20,6 +20,7 @@ from __future__ import annotations import dask +import dask.array as da import numpy as np import pytest from pytest_lazyfixture import lazy_fixture @@ -114,7 +115,10 @@ def test_load_category_dataset(self, input_files, loadables, request_resolution, cat_id = make_dataid(name=ds_name, resolution=exp_resolution) assert cat_id in scene cat_data_arr = scene[cat_id] + assert isinstance(cat_data_arr.data, da.Array) + cat_data_arr = cat_data_arr.compute() assert cat_data_arr.shape == _shape_for_resolution(exp_resolution) + assert cat_data_arr.values[0, 0] == 0.0 assert cat_data_arr.attrs.get('resolution') == exp_resolution # mask variables should be integers assert np.issubdtype(cat_data_arr.dtype, np.integer) @@ -136,27 +140,34 @@ def test_load_250m_cloud_mask_dataset(self, input_files, exp_area): cloud_mask_id = make_dataid(name=dataset_name, resolution=250) assert cloud_mask_id in scene cloud_mask = scene[cloud_mask_id] + assert isinstance(cloud_mask.data, da.Array) + cloud_mask = cloud_mask.compute() assert cloud_mask.shape == _shape_for_resolution(250) + assert cloud_mask.values[0, 0] == 0.0 # mask variables should be integers assert np.issubdtype(cloud_mask.dtype, np.integer) assert cloud_mask.attrs.get('_FillValue') is not None _check_shared_metadata(cloud_mask, expect_area=exp_area) @pytest.mark.parametrize( - ('input_files', 'loadables', 'exp_resolution', 'exp_area'), + ('input_files', 'loadables', 'exp_resolution', 'exp_area', 'exp_value'), [ - [lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True], - [lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False], - [lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True], + [lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True, 4.0], + # snow mask is considered a category product, factor/offset ignored + [lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False, 1.0], + [lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True, 1.0], ] ) - def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area): + def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area, exp_value): """Load and check an L2 variable.""" scene = Scene(reader='modis_l2', filenames=input_files) scene.load(loadables) for ds_name in loadables: assert ds_name in scene data_arr = scene[ds_name] + assert isinstance(data_arr.data, da.Array) + data_arr = data_arr.compute() + assert data_arr.values[0, 0] == exp_value assert data_arr.shape == _shape_for_resolution(exp_resolution) assert data_arr.attrs.get('resolution') == exp_resolution _check_shared_metadata(data_arr, expect_area=exp_area) From cdf5122de6e24aaae0cc62521da4ddae2bc59dc3 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Mon, 11 Jul 2022 07:17:02 -0500 Subject: [PATCH 2/3] Remove old commented code --- satpy/readers/modis_l2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/satpy/readers/modis_l2.py b/satpy/readers/modis_l2.py index 2b595642cc..108921e2b5 100644 --- a/satpy/readers/modis_l2.py +++ b/satpy/readers/modis_l2.py @@ -189,7 +189,6 @@ def _mask_with_quality_assurance_if_needed(self, dataset, dataset_info, dataset_ in zip(dataset.shape, quality_assurance.shape)] quality_assurance = np.tile(quality_assurance, duplication_factor) # Replace unassured data by NaN value - # dataset[np.where(quality_assurance == 0)] = dataset.attrs["_FillValue"] dataset = dataset.where(quality_assurance != 0, dataset.attrs["_FillValue"]) return dataset From 442cccd9851c53d2236bc6805ba4c1f36c6c0970 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Mon, 11 Jul 2022 07:17:21 -0500 Subject: [PATCH 3/3] Add description of odd add_offset usage in MODIS --- satpy/readers/hdfeos_base.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/satpy/readers/hdfeos_base.py b/satpy/readers/hdfeos_base.py index f6a4bad566..c5b3852b58 100644 --- a/satpy/readers/hdfeos_base.py +++ b/satpy/readers/hdfeos_base.py @@ -225,6 +225,17 @@ def load_dataset(self, dataset_name, is_category=False): return data def _scale_and_mask_data_array(self, data, is_category=False): + """Unscale byte data and mask invalid/fill values. + + MODIS requires unscaling the in-file bytes in an unexpected way:: + + data = (byte_value - add_offset) * scale_factor + + See the below L1B User's Guide Appendix C for more information: + + https://mcst.gsfc.nasa.gov/sites/default/files/file_attachments/M1054E_PUG_2017_0901_V6.2.2_Terra_V6.2.1_Aqua.pdf + + """ good_mask, new_fill = self._get_good_data_mask(data, is_category=is_category) scale_factor = data.attrs.pop('scale_factor', None) add_offset = data.attrs.pop('add_offset', None)