Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix MODIS readers applying add_offset incorrectly #2142

Merged
merged 3 commits into from Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 13 additions & 2 deletions satpy/readers/hdfeos_base.py
Expand Up @@ -225,15 +225,26 @@ def load_dataset(self, dataset_name, is_category=False):
return data

def _scale_and_mask_data_array(self, data, is_category=False):
"""Unscale byte data and mask invalid/fill values.

MODIS requires unscaling the in-file bytes in an unexpected way::

data = (byte_value - add_offset) * scale_factor

See the below L1B User's Guide Appendix C for more information:

https://mcst.gsfc.nasa.gov/sites/default/files/file_attachments/M1054E_PUG_2017_0901_V6.2.2_Terra_V6.2.1_Aqua.pdf

"""
good_mask, new_fill = self._get_good_data_mask(data, is_category=is_category)
scale_factor = data.attrs.pop('scale_factor', None)
add_offset = data.attrs.pop('add_offset', None)
# don't scale category products, even though scale_factor may equal 1
# we still need to convert integers to floats
if scale_factor is not None and not is_category:
data = data * np.float32(scale_factor)
if add_offset is not None and add_offset != 0:
data = data + add_offset
data = data - add_offset
data = data * np.float32(scale_factor)
mraspaud marked this conversation as resolved.
Show resolved Hide resolved

if good_mask is not None:
data = data.where(good_mask, new_fill)
Expand Down
43 changes: 29 additions & 14 deletions satpy/readers/modis_l2.py
Expand Up @@ -46,6 +46,7 @@
"""
import logging

import dask.array as da
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -188,35 +189,49 @@ def _mask_with_quality_assurance_if_needed(self, dataset, dataset_info, dataset_
in zip(dataset.shape, quality_assurance.shape)]
quality_assurance = np.tile(quality_assurance, duplication_factor)
# Replace unassured data by NaN value
dataset[np.where(quality_assurance == 0)] = dataset.attrs["_FillValue"]
dataset = dataset.where(quality_assurance != 0, dataset.attrs["_FillValue"])
return dataset


def _extract_byte_mask(dataset, byte_information, bit_start, bit_count):
attrs = dataset.attrs.copy()

if isinstance(byte_information, int):
# Only one byte: select the byte information
byte_dataset = dataset[byte_information, :, :]
dataset = _bits_strip(bit_start, bit_count, byte_dataset)
elif isinstance(byte_information, (list, tuple)) and len(byte_information) == 2:
# Two bytes: recombine the two bytes
dataset_a = dataset[byte_information[0], :, :]
dataset_b = dataset[byte_information[1], :, :]
dataset_a = np.uint16(dataset_a)
dataset_a = np.left_shift(dataset_a, 8) # dataset_a << 8
byte_dataset = np.bitwise_or(dataset_a, dataset_b).astype(np.uint16)
shape = byte_dataset.shape
# We replicate the concatenated byte with the right shape
byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1)
# All bits carry information, we update bit_start consequently
bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4))
bit_start = np.tile(bit_start, (shape[0], shape[1]))
byte_mask = da.map_blocks(
_extract_two_byte_mask,
dataset.data[byte_information[0]],
dataset.data[byte_information[1]],
bit_start=bit_start,
bit_count=bit_count,
dtype=np.uint16,
meta=np.array((), dtype=np.uint16),
chunks=tuple(tuple(chunk_size * 4 for chunk_size in dim_chunks) for dim_chunks in dataset.chunks[1:]),
)
dataset = xr.DataArray(byte_mask, dims=dataset.dims[1:])

# Compute the final bit mask
attrs = dataset.attrs.copy()
dataset = _bits_strip(bit_start, bit_count, byte_dataset)
dataset.attrs = attrs
return dataset


def _extract_two_byte_mask(data_a: np.ndarray, data_b: np.ndarray, bit_start: int, bit_count: int) -> np.ndarray:
data_a = data_a.astype(np.uint16, copy=False)
data_a = np.left_shift(data_a, 8) # dataset_a << 8
byte_dataset = np.bitwise_or(data_a, data_b).astype(np.uint16)
shape = byte_dataset.shape
# We replicate the concatenated byte with the right shape
byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1)
# All bits carry information, we update bit_start consequently
bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4))
bit_start = np.tile(bit_start, (shape[0], shape[1]))
return _bits_strip(bit_start, bit_count, byte_dataset)


def _bits_strip(bit_start, bit_count, value):
"""Extract specified bit from bit representation of integer value.

Expand Down
35 changes: 19 additions & 16 deletions satpy/tests/reader_tests/_modis_fixtures.py
Expand Up @@ -36,7 +36,8 @@
AVAILABLE_QKM_PRODUCT_NAMES = ['1', '2']
SCAN_LEN_5KM = 6 # 3 scans of 5km data
SCAN_WIDTH_5KM = 270
SCALE_FACTOR = 1
SCALE_FACTOR = 0.5
ADD_OFFSET = -0.5
RES_TO_REPEAT_FACTOR = {
250: 20,
500: 10,
Expand Down Expand Up @@ -74,7 +75,7 @@ def _generate_angle_data(resolution: int) -> np.ndarray:

def _generate_visible_data(resolution: int, num_bands: int, dtype=np.uint16) -> np.ndarray:
shape = _shape_for_resolution(resolution)
data = np.zeros((num_bands, shape[0], shape[1]), dtype=dtype)
data = np.ones((num_bands, shape[0], shape[1]), dtype=dtype)

# add fill value to every band
data[:, -1, -1] = 65535
Expand Down Expand Up @@ -118,7 +119,8 @@ def _get_angles_variable_info(resolution: int) -> dict:
'dim_labels': [
f'{dim_factor}*nscans:MODIS_SWATH_Type_L1B',
'1KM_geo_dim:MODIS_SWATH_Type_L1B'],
'scale_factor': 0.01
'scale_factor': 0.01,
'add_offset': -0.01,
},
}
angles_info = {}
Expand Down Expand Up @@ -146,8 +148,8 @@ def _get_visible_variable_info(var_name: str, resolution: int, bands: list[str])
row_dim_name,
col_dim_name],
'valid_range': (0, 32767),
'reflectance_scales': (1,) * num_bands,
'reflectance_offsets': (0,) * num_bands,
'reflectance_scales': (2.0,) * num_bands,
'reflectance_offsets': (-0.5,) * num_bands,
'band_names': ",".join(bands),
},
},
Expand Down Expand Up @@ -264,6 +266,7 @@ def _add_variable_to_file(h, var_name, var_info):
dim_count += 1
v.setfillvalue(var_info['fill_value'])
v.scale_factor = var_info['attrs'].get('scale_factor', SCALE_FACTOR)
v.add_offset = var_info['attrs'].get('add_offset', ADD_OFFSET)
for attr_key, attr_val in var_info['attrs'].items():
if attr_key == 'dim_labels':
continue
Expand Down Expand Up @@ -405,7 +408,7 @@ def modis_l1b_nasa_1km_mod03_files(modis_l1b_nasa_mod021km_file, modis_l1b_nasa_

def _get_basic_variable_info(var_name: str, resolution: int) -> dict:
shape = _shape_for_resolution(resolution)
data = np.zeros((shape[0], shape[1]), dtype=np.uint16)
data = np.ones((shape[0], shape[1]), dtype=np.uint16)
row_dim_name = f'Cell_Along_Swath_{resolution}m:modl2'
col_dim_name = f'Cell_Across_Swath_{resolution}m:modl2'
return {
Expand All @@ -418,8 +421,8 @@ def _get_basic_variable_info(var_name: str, resolution: int) -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 32767),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2.0,
'add_offset': -1.0,
},
},
}
Expand Down Expand Up @@ -457,8 +460,8 @@ def _get_cloud_mask_variable_info(var_name: str, resolution: int) -> dict:
col_dim_name,
'Quality_Dimension:mod35'],
'valid_range': (0, -1),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2.,
'add_offset': -0.5,
},
},
}
Expand All @@ -479,8 +482,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 4),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},

},
Expand All @@ -493,8 +496,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 4),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},
},
"MODIS_Snow_Ice_Flag": {
Expand All @@ -506,8 +509,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 2),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},
},
}
Expand Down
2 changes: 2 additions & 0 deletions satpy/tests/reader_tests/test_modis_l1b.py
Expand Up @@ -159,6 +159,7 @@ def test_load_vis(self, modis_l1b_nasa_mod021km_file):
dataset_name = '1'
scene.load([dataset_name])
dataset = scene[dataset_name]
assert dataset[0, 0] == 300.0
assert dataset.shape == _shape_for_resolution(1000)
assert dataset.attrs['resolution'] == 1000
_check_shared_metadata(dataset)
Expand All @@ -177,6 +178,7 @@ def test_load_vis_saturation(self, mask_saturated, modis_l1b_nasa_mod021km_file)

# check saturation fill values
data = dataset.values
assert dataset[0, 0] == 300.0
assert np.isnan(data[-1, -1]) # normal fill value
if mask_saturated:
assert np.isnan(data[-1, -2]) # saturation
Expand Down
21 changes: 16 additions & 5 deletions satpy/tests/reader_tests/test_modis_l2.py
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import dask
import dask.array as da
import numpy as np
import pytest
from pytest_lazyfixture import lazy_fixture
Expand Down Expand Up @@ -114,7 +115,10 @@ def test_load_category_dataset(self, input_files, loadables, request_resolution,
cat_id = make_dataid(name=ds_name, resolution=exp_resolution)
assert cat_id in scene
cat_data_arr = scene[cat_id]
assert isinstance(cat_data_arr.data, da.Array)
cat_data_arr = cat_data_arr.compute()
assert cat_data_arr.shape == _shape_for_resolution(exp_resolution)
assert cat_data_arr.values[0, 0] == 0.0
assert cat_data_arr.attrs.get('resolution') == exp_resolution
# mask variables should be integers
assert np.issubdtype(cat_data_arr.dtype, np.integer)
Expand All @@ -136,27 +140,34 @@ def test_load_250m_cloud_mask_dataset(self, input_files, exp_area):
cloud_mask_id = make_dataid(name=dataset_name, resolution=250)
assert cloud_mask_id in scene
cloud_mask = scene[cloud_mask_id]
assert isinstance(cloud_mask.data, da.Array)
cloud_mask = cloud_mask.compute()
assert cloud_mask.shape == _shape_for_resolution(250)
assert cloud_mask.values[0, 0] == 0.0
# mask variables should be integers
assert np.issubdtype(cloud_mask.dtype, np.integer)
assert cloud_mask.attrs.get('_FillValue') is not None
_check_shared_metadata(cloud_mask, expect_area=exp_area)

@pytest.mark.parametrize(
('input_files', 'loadables', 'exp_resolution', 'exp_area'),
('input_files', 'loadables', 'exp_resolution', 'exp_area', 'exp_value'),
[
[lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True],
[lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False],
[lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True],
[lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True, 4.0],
# snow mask is considered a category product, factor/offset ignored
[lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False, 1.0],
[lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True, 1.0],
]
)
def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area):
def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area, exp_value):
"""Load and check an L2 variable."""
scene = Scene(reader='modis_l2', filenames=input_files)
scene.load(loadables)
for ds_name in loadables:
assert ds_name in scene
data_arr = scene[ds_name]
assert isinstance(data_arr.data, da.Array)
data_arr = data_arr.compute()
assert data_arr.values[0, 0] == exp_value
assert data_arr.shape == _shape_for_resolution(exp_resolution)
assert data_arr.attrs.get('resolution') == exp_resolution
_check_shared_metadata(data_arr, expect_area=exp_area)