spacetelescope · hbushouse · May 7, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 29, 2024
@@ -92,6 +92,9 @@ resample
 - Remove sleep in median combination added in 8305 as it did not address
   the issue in operation [#8419]
 
+- Update variance handling to propagate resampled variance components with
+  weights that match the science `weight_type`. [#8437]
+
 residual_fringe
 ---------------
 

@@ -34,6 +34,60 @@ WCS of each input image and the WCS of the defined output product.
 This mapping function gets passed to cdriz to drive the actual
 drizzling to create the output product.
 
+
+Error Propagation
+-----------------
+
+The error associated with each resampled pixel can in principle be derived
+from the variance components associated with each input pixel, weighted by
+the square of the input user weights and the square of the overlap between
+the input and output pixels. In practice, the cdriz routine does not currently
+support propagating variance data alongside science images, so the output
+error cannot be precisely calculated.
+
+To approximate the error on a resampled pixel, the variance arrays associated
+with each input image are resampled individually, then combined with a weighted
+sum.  The process is:
+
+#. For each input image, take the square root of each of the read noise variance
+   array to make an error image.
+
+#. Drizzle the read noise error image onto the output WCS, with drizzle
+   parameters matching those used for the science data.
+
+#. Square the resampled read noise to make a variance array.
+
+   a. If the resampling `weight_type` is an inverse variance map (`ivm`), weight
+      the resampled variance by the square of its own inverse.
+
+   #. If the `weight_type` is the exposure time (`exptime`), weight the
+      resampled variance by the square of the exposure time for the image.
+
+#. Add the weighted, resampled read noise variance to a running sum across all
+   images.  Add the weights (unsquared) to a separate running sum across
+   all images.
+
+#. Perform the same steps for the Poisson noise variance and the flat variance.
+   For these components, the weight for the sum is either the resampled read
+   noise variance or else the exposure time.
+
+#. For each variance component (read noise, Poisson, and flat), divide the
+   weighted variance sum by the total weight, squared.
+
+After each variance component is resampled and summed, the final error
+array is computed as the square root of the sum of the three independent
+variance components.  This error image is stored in the ``err`` attribute
+in the output data model or the ``'ERR'`` extension of the FITS file.
+
+It is expected that the output errors computed in this way will
+generally overestimate the true error on the resampled data.  The magnitude
+of the overestimation depends on the details of the pixel weights
+and error images.  Note, however, that drizzling error images produces
+a much better estimate of the output error than directly drizzling
+the variance images, since the kernel overlap weights do not need to be
+squared for combining error values.
+
+
 Context Image
 -------------
 
@@ -93,6 +147,7 @@ context array ``con``, one can do something like this:
 For convenience, this functionality was implemented in the
 :py:func:`~jwst.resample.resample_utils.decode_context` function.
 
+
 Spectroscopic Data
 ------------------
 

@@ -1,5 +1,6 @@
 import logging
 import os
+import warnings
 
 import numpy as np
 from drizzle import util
@@ -373,20 +374,19 @@ def resample_many_to_one(self):
             )
             del data, inwht
 
-        # Resample variances array in self.input_models to output_model
-        self.resample_variance_array("var_rnoise", output_model)
-        self.resample_variance_array("var_poisson", output_model)
-        self.resample_variance_array("var_flat", output_model)
-        output_model.err = np.sqrt(
-            np.nansum(
-                [
-                    output_model.var_rnoise,
-                    output_model.var_poisson,
-                    output_model.var_flat
-                ],
-                axis=0
-            )
-        )
+        # Resample variance arrays in self.input_models to output_model
+        self.resample_variance_arrays(output_model)
+        var_components = [
+            output_model.var_rnoise,
+            output_model.var_poisson,
+            output_model.var_flat
+        ]
+        output_model.err = np.sqrt(np.nansum(var_components,axis=0))
+
+        # nansum returns zero for input that is all NaN -
+        # set those values to NaN instead
+        all_nan = np.all(np.isnan(var_components), axis=0)
+        output_model.err[all_nan] = np.nan
 
         self.update_exposure_times(output_model)
         self.output_models.append(output_model)
@@ -396,85 +396,160 @@ def resample_many_to_one(self):
 
         return self.output_models
 
-    def resample_variance_array(self, name, output_model):
-        """Resample variance arrays from self.input_models to the output_model
+    def resample_variance_arrays(self, output_model):
+        """Resample variance arrays from self.input_models to the output_model.
 
-        Resample the ``name`` variance array to the same name in output_model,
-        using a cumulative sum.
+        Variance images from each input model are resampled individually and
+        added to a weighted sum. If weight_type is 'ivm', the inverse of the
+        resampled read noise variance is used as the weight for all the variance
+        components. If weight_type is 'exptime', the exposure time is used.
 
-        This modifies output_model in-place.
+        The output_model is modified in place.
         """
-        output_wcs = output_model.meta.wcs
-        inverse_variance_sum = np.full_like(output_model.data, np.nan)
-
-        log.info(f"Resampling {name}")
+        log.info("Resampling variance components")
+        weighted_rn_var = np.full_like(output_model.data, np.nan)
+        weighted_pn_var = np.full_like(output_model.data, np.nan)
+        weighted_flat_var = np.full_like(output_model.data, np.nan)
+        total_weight_rn_var = np.zeros_like(output_model.data)
+        total_weight_pn_var = np.zeros_like(output_model.data)
+        total_weight_flat_var = np.zeros_like(output_model.data)
         for model in self.input_models:
-            variance = getattr(model, name)
-            if variance is None or variance.size == 0:
-                log.debug(
-                    f"No data for '{name}' for model "
-                    f"{repr(model.meta.filename)}. Skipping ..."
+            # Do the read noise variance first, so it can be
+            # used for weights if needed
+            rn_var = self._resample_one_variance_array(
+                "var_rnoise", model, output_model)
+
+            # Find valid weighting values in the variance
+            if rn_var is not None:
+                mask = (rn_var > 0) & np.isfinite(rn_var)
+            else:
+                mask = np.full_like(rn_var, False)
+
+            # Set the weight for the image from the weight type
+            weight = np.ones(output_model.data.shape)
+            if self.weight_type == "ivm" and rn_var is not None:
+                weight[mask] = rn_var[mask] ** -1
+            elif self.weight_type == "exptime":
+                if resample_utils.check_for_tmeasure(model):
+                    weight[:] = model.meta.exposure.measurement_time
+                else:
+                    weight[:] = model.meta.exposure.exposure_time
+
+            # Weight and add the readnoise variance
+            # Note: floating point overflow is an issue if variance weights
+            # are used - it can't be squared before multiplication
+            if rn_var is not None:
+                mask = (rn_var >= 0) & np.isfinite(rn_var) & (weight > 0)
+                weighted_rn_var[mask] = np.nansum(
+                    [weighted_rn_var[mask],
+                     rn_var[mask] * weight[mask] * weight[mask]],
+                    axis=0
                 )
-                continue
-
-            elif variance.shape != model.data.shape:
-                log.warning(
-                    f"Data shape mismatch for '{name}' for model "
-                    f"{repr(model.meta.filename)}. Skipping ..."
+                total_weight_rn_var[mask] += weight[mask]
+
+            # Now do poisson and flat variance, updating only valid new values
+            # (zero is a valid value; negative, inf, or NaN are not)
+            pn_var = self._resample_one_variance_array(
+                "var_poisson", model, output_model)
+            if pn_var is not None:
+                mask = (pn_var >= 0) & np.isfinite(pn_var) & (weight > 0)
+                weighted_pn_var[mask] = np.nansum(
+                    [weighted_pn_var[mask],
+                     pn_var[mask] * weight[mask] * weight[mask]],
+                    axis=0
                 )
-                continue
-
-            # Make input weight map of unity where there is science data
-            inwht = resample_utils.build_driz_weight(
-                model,
-                weight_type=None,
-                good_bits=self.good_bits
-            )
-
-            resampled_variance = np.zeros_like(output_model.data)
-            outwht = np.zeros_like(output_model.data)
-            outcon = np.zeros_like(output_model.con)
-
-            xmin, xmax, ymin, ymax = resample_utils._resample_range(
-                variance.shape,
-                model.meta.wcs.bounding_box
+                total_weight_pn_var[mask] += weight[mask]
+
+            flat_var = self._resample_one_variance_array(
+                "var_flat", model, output_model)
+            if flat_var is not None:
+                mask = (flat_var >= 0) & np.isfinite(flat_var) & (weight > 0)
+                weighted_flat_var[mask] = np.nansum(
+                    [weighted_flat_var[mask],
+                     flat_var[mask] * weight[mask] * weight[mask]],
+                    axis=0
+                )
+                total_weight_flat_var[mask] += weight[mask]
+
+        # We now have a sum of the weighted resampled variances.
+        # Divide by the total weights, squared, and set in the output model.
+        # Zero weight and missing values are NaN in the output.
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "invalid value*", RuntimeWarning)
+            warnings.filterwarnings("ignore", "divide by zero*", RuntimeWarning)
+
+            output_variance = (weighted_rn_var
+                               / total_weight_rn_var / total_weight_rn_var)
+            setattr(output_model, "var_rnoise", output_variance)
+
+            output_variance = (weighted_pn_var
+                               / total_weight_pn_var / total_weight_pn_var)
+            setattr(output_model, "var_poisson", output_variance)
+
+            output_variance = (weighted_flat_var
+                               / total_weight_flat_var / total_weight_flat_var)
+            setattr(output_model, "var_flat", output_variance)
+
+    def _resample_one_variance_array(self, name, input_model, output_model):
+        """Resample one variance image from an input model.
+
+        The error image is passed to drizzle instead of the variance, to
+        better match kernel overlap and user weights to the data, in the
+        pixel averaging process. The drizzled error image is squared before
+        returning.
+        """
+        variance = getattr(input_model, name)
+        if variance is None or variance.size == 0:
+            log.debug(
+                f"No data for '{name}' for model "
+                f"{repr(input_model.meta.filename)}. Skipping ..."
             )
+            return
 
-            iscale = model.meta.iscale
-
-            # Resample the variance array. Fill "unpopulated" pixels with NaNs.
-            self.drizzle_arrays(
-                variance,
-                inwht,
-                model.meta.wcs,
-                output_wcs,
-                resampled_variance,
-                outwht,
-                outcon,
-                iscale=iscale**2,
-                pixfrac=self.pixfrac,
-                kernel=self.kernel,
-                fillval=np.nan,
-                xmin=xmin,
-                xmax=xmax,
-                ymin=ymin,
-                ymax=ymax
+        elif variance.shape != input_model.data.shape:
+            log.warning(
+                f"Data shape mismatch for '{name}' for model "
+                f"{repr(input_model.meta.filename)}. Skipping ..."
             )
+            return
 
-            # Add the inverse of the resampled variance to a running sum.
-            # Update only pixels (in the running sum) with valid new values:
-            mask = resampled_variance > 0
+        # Make input weight map
+        inwht = resample_utils.build_driz_weight(
+            input_model,
+            weight_type=self.weight_type,  # weights match science
+            good_bits=self.good_bits
+        )
 
-            inverse_variance_sum[mask] = np.nansum(
-                [inverse_variance_sum[mask], np.reciprocal(resampled_variance[mask])],
-                axis=0
-            )
+        resampled_error = np.zeros_like(output_model.data)
+        outwht = np.zeros_like(output_model.data)
+        outcon = np.zeros_like(output_model.con)
 
-        # We now have a sum of the inverse resampled variances.  We need the
-        # inverse of that to get back to units of variance.
-        output_variance = np.reciprocal(inverse_variance_sum)
+        xmin, xmax, ymin, ymax = resample_utils._resample_range(
+            variance.shape,
+            input_model.meta.wcs.bounding_box
+        )
 
-        setattr(output_model, name, output_variance)
+        iscale = input_model.meta.iscale
+
+        # Resample the error array. Fill "unpopulated" pixels with NaNs.
+        self.drizzle_arrays(
+            np.sqrt(variance),
+            inwht,
+            input_model.meta.wcs,
+            output_model.meta.wcs,
+            resampled_error,
+            outwht,
+            outcon,
+            iscale=iscale,
+            pixfrac=self.pixfrac,
+            kernel=self.kernel,
+            fillval=np.nan,
+            xmin=xmin,
+            xmax=xmax,
+            ymin=ymin,
+            ymax=ymax
+        )
+        return resampled_error ** 2
 
     def update_exposure_times(self, output_model):
         """Modify exposure time metadata in-place"""
@@ -485,7 +560,7 @@ def update_exposure_times(self, output_model):
         measurement_time_failures = []
         for exposure in self.input_models.models_grouped:
             total_exposure_time += exposure[0].meta.exposure.exposure_time
-            if not resample_utils._check_for_tmeasure(exposure[0]):
+            if not resample_utils.check_for_tmeasure(exposure[0]):
                 measurement_time_failures.append(1)
             else:
                 total_measurement_time += exposure[0].meta.exposure.measurement_time

@@ -182,7 +182,7 @@ def build_driz_weight(model, weight_type=None, good_bits=None):
             inv_variance = 1.0
         result = inv_variance * dqmask
     elif weight_type == 'exptime':
-        if _check_for_tmeasure(model):
+        if check_for_tmeasure(model):
             exptime = model.meta.exposure.measurement_time
         else:
             exptime = model.meta.exposure.exposure_time
@@ -311,7 +311,7 @@ def _resample_range(data_shape, bbox=None):
     return xmin, xmax, ymin, ymax
 
 
-def _check_for_tmeasure(model):
+def check_for_tmeasure(model):
     '''
     Check if the measurement_time keyword is present in the datamodel
     for use in exptime weighting. If not, revert to using exposure_time.