pytroll · mraspaud · Apr 17, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Copyright (c) 2016-2023 Satpy developers
 #
@@ -16,13 +16,15 @@
 # You should have received a copy of the GNU General Public License along with
 # satpy.  If not, see <http://www.gnu.org/licenses/>.
 """MultiScene object to work with multiple timesteps of satellite data."""
+from __future__ import annotations
 
 import copy
 import logging
 import warnings
 from datetime import datetime
 from queue import Queue
 from threading import Thread
+from typing import Callable, Iterable, Mapping, Optional, Sequence
 
 import dask.array as da
 import numpy as np
@@ -46,66 +48,140 @@
 log = logging.getLogger(__name__)
 
 
-def stack(datasets, weights=None, combine_times=True):
-    """Overlay a series of datasets together.
+def stack(
+        datasets: Sequence[xr.DataArray],
+        weights: Optional[Sequence[xr.DataArray]] = None,
+        combine_times: bool = True,
+        blend_type: str = 'select_with_weights'
+) -> xr.DataArray:
+    """Combine a series of datasets in different ways.
 
     By default, datasets are stacked on top of each other, so the last one applied is
-    on top. If a sequence of weights arrays are provided the datasets will
-    be combined according to those weights. The result will be a composite
-    dataset where the data in each pixel is coming from the dataset having the
-    highest weight.
+    on top. If a sequence of weights (with equal shape) is provided, the datasets will
+    be combined according to those weights. Datasets can be integer category products
+    (ex. cloud type), single channels (ex. radiance), or RGB composites. In the
+    latter case, weights is applied
+    to each 'R', 'G', 'B' coordinate in the same way. The result will be a composite
+    dataset where each pixel is constructed in a way depending on ``blend_type``.
 
     """
     if weights:
-        return _stack_weighted(datasets, weights, combine_times)
-
-    base = datasets[0].copy()
-    for dataset in datasets[1:]:
+        return _stack_with_weights(datasets, weights, combine_times, blend_type)
+    return _stack_no_weights(datasets, combine_times)
+
+
+def _stack_with_weights(
+        datasets: Sequence[xr.DataArray],
+        weights: Sequence[xr.DataArray],
+        combine_times: bool,
+        blend_type: str
+) -> xr.DataArray:
+    blend_func = _get_weighted_blending_func(blend_type)
+    filled_weights = list(_fill_weights_for_invalid_dataset_pixels(datasets, weights))
+    return blend_func(datasets, filled_weights, combine_times)
+
+
+def _get_weighted_blending_func(blend_type: str) -> Callable:
+    WEIGHTED_BLENDING_FUNCS = {
+        "select_with_weights": _stack_select_by_weights,
+        "blend_with_weights": _stack_blend_by_weights,
+    }
+    blend_func = WEIGHTED_BLENDING_FUNCS.get(blend_type)
+    if blend_func is None:
+        raise ValueError(f"Unknown weighted blending type: {blend_type}."
+                         f"Expected one of: {WEIGHTED_BLENDING_FUNCS.keys()}")
+    return blend_func
+
+
+def _fill_weights_for_invalid_dataset_pixels(
+        datasets: Sequence[xr.DataArray],
+        weights: Sequence[xr.DataArray]
+) -> Iterable[xr.DataArray]:
+    """Replace weight valus with 0 where data values are invalid/null."""
+    has_bands_dims = "bands" in datasets[0].dims
+    for i, dataset in enumerate(datasets):
+        # if multi-band only use the red-band
+        compare_ds = dataset[0] if has_bands_dims else dataset
         try:
-            base = base.where(dataset == dataset.attrs["_FillValue"], dataset)
+            yield xr.where(compare_ds == compare_ds.attrs["_FillValue"], 0, weights[i])
         except KeyError:
-            base = base.where(dataset.isnull(), dataset)
+            yield xr.where(compare_ds.isnull(), 0, weights[i])
 
-    return base
 
+def _stack_blend_by_weights(
+        datasets: Sequence[xr.DataArray],
+        weights: Sequence[xr.DataArray],
+        combine_times: bool
+) -> xr.DataArray:
+    """Stack datasets blending overlap using weights."""
+    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets], combine_times)
 
-def _stack_weighted(datasets, weights, combine_times):
-    """Stack datasets using weights."""
-    weights = set_weights_to_zero_where_invalid(datasets, weights)
+    overlays = []
+    for weight, overlay in zip(weights, datasets):
+        overlays.append(overlay.fillna(0) * weight)
 
-    indices = da.argmax(da.dstack(weights), axis=-1)
-    attrs = combine_metadata(*[x.attrs for x in datasets])
+    base = sum(overlays) / sum(weights, start=1.e-9)
+
+    dims = datasets[0].dims
+    blended_array = xr.DataArray(base, dims=dims, attrs=attrs)
+    return blended_array
 
-    if combine_times:
-        if 'start_time' in attrs and 'end_time' in attrs:
-            attrs['start_time'], attrs['end_time'] = _get_combined_start_end_times(*[x.attrs for x in datasets])
 
+def _stack_select_by_weights(
+        datasets: Sequence[xr.DataArray],
+        weights: Sequence[xr.DataArray],
+        combine_times: bool
+) -> xr.DataArray:
+    """Stack datasets selecting pixels using weights."""
+    indices = da.argmax(da.dstack(weights), axis=-1)
+    if "bands" in datasets[0].dims:
+        indices = [indices] * datasets[0].sizes["bands"]
+
+    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets], combine_times)
     dims = datasets[0].dims
-    weighted_array = xr.DataArray(da.choose(indices, datasets), dims=dims, attrs=attrs)
-    return weighted_array
+    coords = datasets[0].coords
+    selected_array = xr.DataArray(da.choose(indices, datasets), dims=dims, coords=coords, attrs=attrs)
+    return selected_array
 
 
-def set_weights_to_zero_where_invalid(datasets, weights):
-    """Go through the weights and set to pixel values to zero where corresponding datasets are invalid."""
-    for i, dataset in enumerate(datasets):
+def _stack_no_weights(
+        datasets: Sequence[xr.DataArray],
+        combine_times: bool
+) -> xr.DataArray:
+    base = datasets[0].copy()
+    collected_attrs = [base.attrs]
+    for data_arr in datasets[1:]:
+        collected_attrs.append(data_arr.attrs)
         try:
-            weights[i] = xr.where(dataset == dataset.attrs["_FillValue"], 0, weights[i])
+            base = base.where(data_arr == data_arr.attrs["_FillValue"], data_arr)
         except KeyError:
-            weights[i] = xr.where(dataset.isnull(), 0, weights[i])
+            base = base.where(data_arr.isnull(), data_arr)
 
-    return weights
+    attrs = _combine_stacked_attrs(collected_attrs, combine_times)
+    base.attrs = attrs
+    return base
 
 
-def _get_combined_start_end_times(*metadata_objects):
+def _combine_stacked_attrs(collected_attrs: Sequence[Mapping], combine_times: bool) -> dict:
+    attrs = combine_metadata(*collected_attrs)
+    if combine_times and ('start_time' in attrs or 'end_time' in attrs):
+        new_start, new_end = _get_combined_start_end_times(collected_attrs)
+        if new_start:
+            attrs["start_time"] = new_start
+        if new_end:
+            attrs["end_time"] = new_end
+    return attrs
+
+
+def _get_combined_start_end_times(metadata_objects: Iterable[Mapping]) -> tuple[datetime | None, datetime | None]:
     """Get the start and end times attributes valid for the entire dataset series."""
-    start_time = datetime.now()
-    end_time = datetime.fromtimestamp(0)
+    start_time = None
+    end_time = None
     for md_obj in metadata_objects:
-        if md_obj['start_time'] < start_time:
+        if "start_time" in md_obj and (start_time is None or md_obj['start_time'] < start_time):
             start_time = md_obj['start_time']
-        if md_obj['end_time'] > end_time:
+        if "end_time" in md_obj and (end_time is None or md_obj['end_time'] > end_time):
             end_time = md_obj['end_time']
-
     return start_time, end_time
 
 

@@ -31,15 +31,13 @@
 from satpy.tests.multiscene_tests.test_utils import _create_test_area, _create_test_dataset, _create_test_int8_dataset
 from satpy.tests.utils import make_dataid
 
+NUM_TEST_ROWS = 2
+NUM_TEST_COLS = 3
+
 
 class TestBlendFuncs:
     """Test individual functions used for blending."""
 
-    def setup_method(self):
-        """Set up test functions."""
-        self._line = 2
-        self._column = 3
-
     @pytest.fixture
     def scene1_with_weights(self):
         """Create first test scene with a dataset of weights."""
@@ -54,7 +52,7 @@
         )
         scene[dsid1] = _create_test_int8_dataset(name='geo-ct', area=area, values=1)
         scene[dsid1].attrs['platform_name'] = 'Meteosat-11'
-        scene[dsid1].attrs['sensor'] = set({'seviri'})
+        scene[dsid1].attrs['sensor'] = {'seviri'}
         scene[dsid1].attrs['units'] = '1'
         scene[dsid1].attrs['long_name'] = 'NWC GEO CT Cloud Type'
         scene[dsid1].attrs['orbital_parameters'] = {'satellite_nominal_altitude': 35785863.0,
@@ -65,8 +63,8 @@
 
         wgt1 = _create_test_dataset(name='geo-ct-wgt', area=area, values=0)
 
-        wgt1[self._line, :] = 2
-        wgt1[:, self._column] = 2
+        wgt1[NUM_TEST_ROWS, :] = 2
+        wgt1[:, NUM_TEST_COLS] = 2
 
         dsid2 = make_dataid(
             name="geo-cma",
@@ -95,7 +93,7 @@
         )
         scene[dsid1] = _create_test_int8_dataset(name='polar-ct', area=area, values=3)
         scene[dsid1].attrs['platform_name'] = 'NOAA-18'
-        scene[dsid1].attrs['sensor'] = set({'avhrr-3'})
+        scene[dsid1].attrs['sensor'] = {'avhrr-3'}
         scene[dsid1].attrs['units'] = '1'
         scene[dsid1].attrs['long_name'] = 'SAFNWC PPS CT Cloud Type'
         scene[dsid1][-1, :] = scene[dsid1].attrs['_FillValue']
@@ -150,89 +148,48 @@
         expected[-1, :] = scene1['geo-ct'][-1, :]
 
         xr.testing.assert_equal(result, expected.compute())
-        assert result.attrs['platform_name'] == 'Meteosat-11'
-        assert result.attrs['sensor'] == set({'seviri'})
-        assert result.attrs['long_name'] == 'NWC GEO CT Cloud Type'
-        assert result.attrs['units'] == '1'
-        assert result.attrs['name'] == 'CloudType'
-        assert result.attrs['_FillValue'] == 255
-        assert result.attrs['valid_range'] == [1, 15]
-
+        _check_stacked_metadata(result, "CloudType")
         assert result.attrs['start_time'] == datetime(2023, 1, 16, 11, 9, 17)
-        assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 12, 22)
+        assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 28, 1, 900000)
 
+    @pytest.mark.parametrize("combine_times", False, True)
     def test_blend_two_scenes_using_stack_weighted(self, multi_scene_and_weights, groups,
-                                                   scene1_with_weights, scene2_with_weights):
+                                                   scene1_with_weights, scene2_with_weights,
+                                                   combine_times):
         """Test stacking two scenes using weights - testing that metadata are combined correctly.
 
         Here we test that the start and end times can be combined so that they
        describe the start and times of the entire data series.

        """
        from functools import partial

        multi_scene, weights = multi_scene_and_weights
        scene1, weights1 = scene1_with_weights
        scene2, weights2 = scene2_with_weights

        simple_groups = {DataQuery(name='CloudType'): groups[DataQuery(name='CloudType')]}
         multi_scene.group(simple_groups)
 
         weights = [weights[0][0], weights[1][0]]
-        stack_with_weights = partial(stack, weights=weights)
+        stack_with_weights = partial(stack, weights=weights, combine_times=combine_times)
         weighted_blend = multi_scene.blend(blend_function=stack_with_weights)
 
         expected = scene2['polar-ct']
-        expected[self._line, :] = scene1['geo-ct'][self._line, :]
-        expected[:, self._column] = scene1['geo-ct'][:, self._column]
+        expected[NUM_TEST_ROWS, :] = scene1['geo-ct'][NUM_TEST_ROWS, :]
+        expected[:, NUM_TEST_COLS] = scene1['geo-ct'][:, NUM_TEST_COLS]
         expected[-1, :] = scene1['geo-ct'][-1, :]
 
         result = weighted_blend['CloudType'].compute()
         xr.testing.assert_equal(result, expected.compute())
 
-        expected_area = _create_test_area()
-        assert result.attrs['area'] == expected_area
-        assert 'sensor' not in result.attrs
-        assert 'platform_name' not in result.attrs
-        assert 'long_name' not in result.attrs
-        assert result.attrs['units'] == '1'
-        assert result.attrs['name'] == 'CloudType'
-        assert result.attrs['_FillValue'] == 255
-        assert result.attrs['valid_range'] == [1, 15]
-
-        assert result.attrs['start_time'] == datetime(2023, 1, 16, 11, 9, 17)
-        assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 28, 1, 900000)
-
-    def test_blend_two_scenes_using_stack_weighted_no_time_combination(self, multi_scene_and_weights, groups,
-                                                                       scene1_with_weights, scene2_with_weights):
-        """Test stacking two scenes using weights - test that the start and end times are averaged and not combined."""
-        from functools import partial
-
-        multi_scene, weights = multi_scene_and_weights
-        scene1, weights1 = scene1_with_weights
-        scene2, weights2 = scene2_with_weights
-
-        simple_groups = {DataQuery(name='CloudType'): groups[DataQuery(name='CloudType')]}
-        multi_scene.group(simple_groups)
-
-        weights = [weights[0][0], weights[1][0]]
-        stack_with_weights = partial(stack, weights=weights, combine_times=False)
-        weighted_blend = multi_scene.blend(blend_function=stack_with_weights)
-
-        result = weighted_blend['CloudType'].compute()
-
-        expected_area = _create_test_area()
-        assert result.attrs['area'] == expected_area
-        assert 'sensor' not in result.attrs
-        assert 'platform_name' not in result.attrs
-        assert 'long_name' not in result.attrs
-        assert result.attrs['units'] == '1'
-        assert result.attrs['name'] == 'CloudType'
-        assert result.attrs['_FillValue'] == 255
-        assert result.attrs['valid_range'] == [1, 15]
-
-        assert result.attrs['start_time'] == datetime(2023, 1, 16, 11, 11, 7, 250000)
-        assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 20, 11, 950000)
+        _check_stacked_metadata(result, "CloudType")
+        if combine_times:
+            assert result.attrs['start_time'] == datetime(2023, 1, 16, 11, 9, 17)
+            assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 28, 1, 900000)
+        else:
+            assert result.attrs['start_time'] == datetime(2023, 1, 16, 11, 11, 7, 250000)
+            assert result.attrs['end_time'] == datetime(2023, 1, 16, 11, 20, 11, 950000)
 
     @pytest.fixture
     def datasets_and_weights(self):
@@ -294,7 +251,6 @@
         expected.attrs = combine_metadata(*[x.attrs for x in input_data['datasets'][0:3]])
 
         xr.testing.assert_equal(blend_result.compute(), expected.compute())
-
         assert expected.attrs == blend_result.attrs
 
     def test_blend_function_stack(self, datasets_and_weights):
@@ -308,8 +264,10 @@
 
         res = stack([ds1, ds2])
         expected = ds2.copy()
+        expected.attrs["start_time"] = ds1.attrs["start_time"]
 
         xr.testing.assert_equal(res.compute(), expected.compute())
+        assert expected.attrs == res.attrs
 
     def test_timeseries(self, datasets_and_weights):
         """Test the 'timeseries' function."""
@@ -329,3 +287,18 @@
         assert isinstance(res2, xr.DataArray)
         assert (2, ds1.shape[0], ds1.shape[1]) == res.shape
         assert (ds4.shape[0], ds4.shape[1]+ds5.shape[1]) == res2.shape
+
+
+def _check_stacked_metadata(data_arr: xr.DataArray, exp_name: str) -> None:
+    assert data_arr.attrs['units'] == '1'
+    assert data_arr.attrs['name'] == exp_name
+    assert data_arr.attrs['_FillValue'] == 255
+    assert data_arr.attrs['valid_range'] == [1, 15]
+
+    expected_area = _create_test_area()
+    assert data_arr.attrs['area'] == expected_area
+
+    # these metadata items don't match between all inputs
+    assert 'sensor' not in data_arr.attrs
+    assert 'platform_name' not in data_arr.attrs
+    assert 'long_name' not in data_arr.attrs