Skip to content

Commit

Permalink
Merge pull request #2142 from djhoese/bugfix-modis-scale-offset
Browse files Browse the repository at this point in the history
Fix MODIS readers applying add_offset incorrectly
  • Loading branch information
mraspaud committed Jul 11, 2022
2 parents 607c25b + 442cccd commit 15dbf3d
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 37 deletions.
15 changes: 13 additions & 2 deletions satpy/readers/hdfeos_base.py
Expand Up @@ -225,15 +225,26 @@ def load_dataset(self, dataset_name, is_category=False):
return data

def _scale_and_mask_data_array(self, data, is_category=False):
"""Unscale byte data and mask invalid/fill values.
MODIS requires unscaling the in-file bytes in an unexpected way::
data = (byte_value - add_offset) * scale_factor
See the below L1B User's Guide Appendix C for more information:
https://mcst.gsfc.nasa.gov/sites/default/files/file_attachments/M1054E_PUG_2017_0901_V6.2.2_Terra_V6.2.1_Aqua.pdf
"""
good_mask, new_fill = self._get_good_data_mask(data, is_category=is_category)
scale_factor = data.attrs.pop('scale_factor', None)
add_offset = data.attrs.pop('add_offset', None)
# don't scale category products, even though scale_factor may equal 1
# we still need to convert integers to floats
if scale_factor is not None and not is_category:
data = data * np.float32(scale_factor)
if add_offset is not None and add_offset != 0:
data = data + add_offset
data = data - add_offset
data = data * np.float32(scale_factor)

if good_mask is not None:
data = data.where(good_mask, new_fill)
Expand Down
43 changes: 29 additions & 14 deletions satpy/readers/modis_l2.py
Expand Up @@ -46,6 +46,7 @@
"""
import logging

import dask.array as da
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -188,35 +189,49 @@ def _mask_with_quality_assurance_if_needed(self, dataset, dataset_info, dataset_
in zip(dataset.shape, quality_assurance.shape)]
quality_assurance = np.tile(quality_assurance, duplication_factor)
# Replace unassured data by NaN value
dataset[np.where(quality_assurance == 0)] = dataset.attrs["_FillValue"]
dataset = dataset.where(quality_assurance != 0, dataset.attrs["_FillValue"])
return dataset


def _extract_byte_mask(dataset, byte_information, bit_start, bit_count):
attrs = dataset.attrs.copy()

if isinstance(byte_information, int):
# Only one byte: select the byte information
byte_dataset = dataset[byte_information, :, :]
dataset = _bits_strip(bit_start, bit_count, byte_dataset)
elif isinstance(byte_information, (list, tuple)) and len(byte_information) == 2:
# Two bytes: recombine the two bytes
dataset_a = dataset[byte_information[0], :, :]
dataset_b = dataset[byte_information[1], :, :]
dataset_a = np.uint16(dataset_a)
dataset_a = np.left_shift(dataset_a, 8) # dataset_a << 8
byte_dataset = np.bitwise_or(dataset_a, dataset_b).astype(np.uint16)
shape = byte_dataset.shape
# We replicate the concatenated byte with the right shape
byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1)
# All bits carry information, we update bit_start consequently
bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4))
bit_start = np.tile(bit_start, (shape[0], shape[1]))
byte_mask = da.map_blocks(
_extract_two_byte_mask,
dataset.data[byte_information[0]],
dataset.data[byte_information[1]],
bit_start=bit_start,
bit_count=bit_count,
dtype=np.uint16,
meta=np.array((), dtype=np.uint16),
chunks=tuple(tuple(chunk_size * 4 for chunk_size in dim_chunks) for dim_chunks in dataset.chunks[1:]),
)
dataset = xr.DataArray(byte_mask, dims=dataset.dims[1:])

# Compute the final bit mask
attrs = dataset.attrs.copy()
dataset = _bits_strip(bit_start, bit_count, byte_dataset)
dataset.attrs = attrs
return dataset


def _extract_two_byte_mask(data_a: np.ndarray, data_b: np.ndarray, bit_start: int, bit_count: int) -> np.ndarray:
data_a = data_a.astype(np.uint16, copy=False)
data_a = np.left_shift(data_a, 8) # dataset_a << 8
byte_dataset = np.bitwise_or(data_a, data_b).astype(np.uint16)
shape = byte_dataset.shape
# We replicate the concatenated byte with the right shape
byte_dataset = np.repeat(np.repeat(byte_dataset, 4, axis=0), 4, axis=1)
# All bits carry information, we update bit_start consequently
bit_start = np.arange(16, dtype=np.uint16).reshape((4, 4))
bit_start = np.tile(bit_start, (shape[0], shape[1]))
return _bits_strip(bit_start, bit_count, byte_dataset)


def _bits_strip(bit_start, bit_count, value):
"""Extract specified bit from bit representation of integer value.
Expand Down
35 changes: 19 additions & 16 deletions satpy/tests/reader_tests/_modis_fixtures.py
Expand Up @@ -36,7 +36,8 @@
AVAILABLE_QKM_PRODUCT_NAMES = ['1', '2']
SCAN_LEN_5KM = 6 # 3 scans of 5km data
SCAN_WIDTH_5KM = 270
SCALE_FACTOR = 1
SCALE_FACTOR = 0.5
ADD_OFFSET = -0.5
RES_TO_REPEAT_FACTOR = {
250: 20,
500: 10,
Expand Down Expand Up @@ -74,7 +75,7 @@ def _generate_angle_data(resolution: int) -> np.ndarray:

def _generate_visible_data(resolution: int, num_bands: int, dtype=np.uint16) -> np.ndarray:
shape = _shape_for_resolution(resolution)
data = np.zeros((num_bands, shape[0], shape[1]), dtype=dtype)
data = np.ones((num_bands, shape[0], shape[1]), dtype=dtype)

# add fill value to every band
data[:, -1, -1] = 65535
Expand Down Expand Up @@ -118,7 +119,8 @@ def _get_angles_variable_info(resolution: int) -> dict:
'dim_labels': [
f'{dim_factor}*nscans:MODIS_SWATH_Type_L1B',
'1KM_geo_dim:MODIS_SWATH_Type_L1B'],
'scale_factor': 0.01
'scale_factor': 0.01,
'add_offset': -0.01,
},
}
angles_info = {}
Expand Down Expand Up @@ -146,8 +148,8 @@ def _get_visible_variable_info(var_name: str, resolution: int, bands: list[str])
row_dim_name,
col_dim_name],
'valid_range': (0, 32767),
'reflectance_scales': (1,) * num_bands,
'reflectance_offsets': (0,) * num_bands,
'reflectance_scales': (2.0,) * num_bands,
'reflectance_offsets': (-0.5,) * num_bands,
'band_names': ",".join(bands),
},
},
Expand Down Expand Up @@ -264,6 +266,7 @@ def _add_variable_to_file(h, var_name, var_info):
dim_count += 1
v.setfillvalue(var_info['fill_value'])
v.scale_factor = var_info['attrs'].get('scale_factor', SCALE_FACTOR)
v.add_offset = var_info['attrs'].get('add_offset', ADD_OFFSET)
for attr_key, attr_val in var_info['attrs'].items():
if attr_key == 'dim_labels':
continue
Expand Down Expand Up @@ -405,7 +408,7 @@ def modis_l1b_nasa_1km_mod03_files(modis_l1b_nasa_mod021km_file, modis_l1b_nasa_

def _get_basic_variable_info(var_name: str, resolution: int) -> dict:
shape = _shape_for_resolution(resolution)
data = np.zeros((shape[0], shape[1]), dtype=np.uint16)
data = np.ones((shape[0], shape[1]), dtype=np.uint16)
row_dim_name = f'Cell_Along_Swath_{resolution}m:modl2'
col_dim_name = f'Cell_Across_Swath_{resolution}m:modl2'
return {
Expand All @@ -418,8 +421,8 @@ def _get_basic_variable_info(var_name: str, resolution: int) -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 32767),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2.0,
'add_offset': -1.0,
},
},
}
Expand Down Expand Up @@ -457,8 +460,8 @@ def _get_cloud_mask_variable_info(var_name: str, resolution: int) -> dict:
col_dim_name,
'Quality_Dimension:mod35'],
'valid_range': (0, -1),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2.,
'add_offset': -0.5,
},
},
}
Expand All @@ -479,8 +482,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 4),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},

},
Expand All @@ -493,8 +496,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 4),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},
},
"MODIS_Snow_Ice_Flag": {
Expand All @@ -506,8 +509,8 @@ def _get_mask_byte1_variable_info() -> dict:
'dim_labels': [row_dim_name,
col_dim_name],
'valid_range': (0, 2),
'scale_factor': 1.,
'add_offset': 0.,
'scale_factor': 2,
'add_offset': -1,
},
},
}
Expand Down
2 changes: 2 additions & 0 deletions satpy/tests/reader_tests/test_modis_l1b.py
Expand Up @@ -159,6 +159,7 @@ def test_load_vis(self, modis_l1b_nasa_mod021km_file):
dataset_name = '1'
scene.load([dataset_name])
dataset = scene[dataset_name]
assert dataset[0, 0] == 300.0
assert dataset.shape == _shape_for_resolution(1000)
assert dataset.attrs['resolution'] == 1000
_check_shared_metadata(dataset)
Expand All @@ -177,6 +178,7 @@ def test_load_vis_saturation(self, mask_saturated, modis_l1b_nasa_mod021km_file)

# check saturation fill values
data = dataset.values
assert dataset[0, 0] == 300.0
assert np.isnan(data[-1, -1]) # normal fill value
if mask_saturated:
assert np.isnan(data[-1, -2]) # saturation
Expand Down
21 changes: 16 additions & 5 deletions satpy/tests/reader_tests/test_modis_l2.py
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import dask
import dask.array as da
import numpy as np
import pytest
from pytest_lazyfixture import lazy_fixture
Expand Down Expand Up @@ -114,7 +115,10 @@ def test_load_category_dataset(self, input_files, loadables, request_resolution,
cat_id = make_dataid(name=ds_name, resolution=exp_resolution)
assert cat_id in scene
cat_data_arr = scene[cat_id]
assert isinstance(cat_data_arr.data, da.Array)
cat_data_arr = cat_data_arr.compute()
assert cat_data_arr.shape == _shape_for_resolution(exp_resolution)
assert cat_data_arr.values[0, 0] == 0.0
assert cat_data_arr.attrs.get('resolution') == exp_resolution
# mask variables should be integers
assert np.issubdtype(cat_data_arr.dtype, np.integer)
Expand All @@ -136,27 +140,34 @@ def test_load_250m_cloud_mask_dataset(self, input_files, exp_area):
cloud_mask_id = make_dataid(name=dataset_name, resolution=250)
assert cloud_mask_id in scene
cloud_mask = scene[cloud_mask_id]
assert isinstance(cloud_mask.data, da.Array)
cloud_mask = cloud_mask.compute()
assert cloud_mask.shape == _shape_for_resolution(250)
assert cloud_mask.values[0, 0] == 0.0
# mask variables should be integers
assert np.issubdtype(cloud_mask.dtype, np.integer)
assert cloud_mask.attrs.get('_FillValue') is not None
_check_shared_metadata(cloud_mask, expect_area=exp_area)

@pytest.mark.parametrize(
('input_files', 'loadables', 'exp_resolution', 'exp_area'),
('input_files', 'loadables', 'exp_resolution', 'exp_area', 'exp_value'),
[
[lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True],
[lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False],
[lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True],
[lazy_fixture('modis_l2_nasa_mod06_file'), ["surface_pressure"], 5000, True, 4.0],
# snow mask is considered a category product, factor/offset ignored
[lazy_fixture('modis_l2_imapp_snowmask_file'), ["snow_mask"], 1000, False, 1.0],
[lazy_fixture('modis_l2_imapp_snowmask_geo_files'), ["snow_mask"], 1000, True, 1.0],
]
)
def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area):
def test_load_l2_dataset(self, input_files, loadables, exp_resolution, exp_area, exp_value):
"""Load and check an L2 variable."""
scene = Scene(reader='modis_l2', filenames=input_files)
scene.load(loadables)
for ds_name in loadables:
assert ds_name in scene
data_arr = scene[ds_name]
assert isinstance(data_arr.data, da.Array)
data_arr = data_arr.compute()
assert data_arr.values[0, 0] == exp_value
assert data_arr.shape == _shape_for_resolution(exp_resolution)
assert data_arr.attrs.get('resolution') == exp_resolution
_check_shared_metadata(data_arr, expect_area=exp_area)

0 comments on commit 15dbf3d

Please sign in to comment.