Modify variance plane and optionally vary injected light

lsst · Jan 11, 2024 · 47d922d · 47d922d
1 parent b5039a9
commit 47d922d
Show file tree

Hide file tree

Showing 4 changed files with 305 additions and 4 deletions.
diff --git a/python/lsst/source/injection/inject_base.py b/python/lsst/source/injection/inject_base.py
@@ -117,6 +117,20 @@ class BaseInjectConfig(PipelineTaskConfig, pipelineConnections=BaseInjectConnect
         doc="String to prefix to the entries in the *col_stamp* column, for example, a directory path.",
         default="",
     )
+    variance_scale = Field[float](
+        doc="Number by which to multiply injected flux to obtain injected variance.",
+        default=0.0
+    )
+    add_noise = Field[bool](
+        doc="Whether to randomly vary the injected flux in each pixel by an amount consistent with "
+        "the injected variance.",
+        default=True
+    )
+    noise_seed = Field[int](
+        doc="Initial seed for random noise generation. This value increments by 1 for each injected "
+        "object, so each object has an independent noise realization.",
+        default=0
+    )
 
     # Custom column names.
     col_ra = Field[str](
@@ -267,6 +281,9 @@ def run(self, injection_catalogs, input_exposure, psf, photo_calib, wcs):
                 mask_plane_name=self.config.mask_plane_name,
                 calib_flux_radius=self.config.calib_flux_radius,
                 draw_size_max=10000,  # TODO: replace draw_size logic with GS logic.
+                variance_scale=self.config.variance_scale,
+                add_noise=self.config.add_noise,
+                noise_seed=self.config.noise_seed,
                 logger=self.log,
             )
             # Add inject_galsim_objects_into_exposure outputs into output cat.

diff --git a/python/lsst/source/injection/inject_coadd.py b/python/lsst/source/injection/inject_coadd.py
@@ -23,10 +23,16 @@
 
 __all__ = ["CoaddInjectConnections", "CoaddInjectConfig", "CoaddInjectTask"]
 
+from lsst.pex.config import Field
 from lsst.pipe.base.connectionTypes import Input, Output
 
 from .inject_base import BaseInjectConfig, BaseInjectConnections, BaseInjectTask
 
+import numpy as np
+from sklearn.linear_model import LinearRegression, RANSACRegressor
+from sklearn.cluster import KMeans
+from sklearn.metrics import mean_squared_error
+
 
 class CoaddInjectConnections(
     BaseInjectConnections,
@@ -63,15 +69,69 @@ class CoaddInjectConfig(  # type: ignore [call-arg]
 ):
     """Coadd-level configuration for source injection tasks."""
 
-    pass
-
+    n_fits_1 = Field[int](
+        doc="Perform this many RANSAC fits in the first round, to get a sample "
+        "of different slopes based on the different random samples of points used "
+        "in the fit.",
+        default=20
+    )
+    n_fits_2 = Field[int](
+        doc="Perform this many RANSAC fits in the second round, to get a sample "
+        "of different slopes based on the different random samples of points used "
+        "in the fit.",
+        default=20
+    )
+    threshold_scale_1 = Field[float](
+        doc="An outlier in the first RANSAC fit is farther from the fit line, "
+        "in terms of squared error, than this multiple of the initial linear MSE.",
+        default=0.1
+    )
+    threshold_scale_2 = Field[float](
+        doc="An outlier in the second RANSAC fit is farther from the fit line, "
+        "in terms of squared error, than this multiple of the initial linear MSE.",
+        default=0.1
+    )
+    variance_fit_seed_1 = Field[int](
+        doc="Seed for first RANSAC fit of flux vs. variance.",
+        default=0
+    )
+    variance_fit_seed_2 = Field[int](
+        doc="Seed for second RANSAC fit of flux vs. variance.",
+        default=0
+    )
+    n_clusters_1 = Field[int](
+        doc="K-means cluster the first set of RANSAC fits using this many clusters, "
+        "in order to find the most stable slope (biggest cluster).",
+        default=4
+    )
+    n_clusters_2 = Field[int](
+        doc="K-means cluster the second set of RANSAC fits using this many clusters, "
+        "in order to find the most stable slope (biggest cluster).",
+        default=3
+    )
+    kmeans_seed_1 = Field[int](
+        doc="Seed for first round of k-means clustering.",
+        default=0
+    )
+    kmeans_seed_2 = Field[int](
+        doc="Seed for second round of k-means clustering.",
+        default=0
+    )
 
 class CoaddInjectTask(BaseInjectTask):
     """Coadd-level class for injecting sources into images."""
 
     _DefaultName = "coaddInjectTask"
     ConfigClass = CoaddInjectConfig
 
+    def run(self, injection_catalogs, input_exposure, psf, photo_calib, wcs):
+        self.log.info("Fitting flux vs. variance in each pixel.")
+        self.config.variance_scale = self.get_variance_scale(input_exposure)
+        self.log.info("Variance scale factor: %.6f",
+                     self.config.variance_scale)
+
+        return super().run(injection_catalogs, input_exposure, psf, photo_calib, wcs)
+
     def runQuantum(self, butler_quantum_context, input_refs, output_refs):
         inputs = butler_quantum_context.get(input_refs)
 
@@ -82,3 +142,116 @@ def runQuantum(self, butler_quantum_context, input_refs, output_refs):
         input_keys = ["injection_catalogs", "input_exposure", "sky_map", "psf", "photo_calib", "wcs"]
         outputs = self.run(**{key: value for (key, value) in inputs.items() if key in input_keys})
         butler_quantum_context.put(outputs, output_refs)
+
+    def get_variance_scale(self, exposure):
+        x = exposure.image.array.flatten()
+        y = exposure.variance.array.flatten()
+
+        # Identify bad pixels
+        bad_pixels = ~np.isfinite(x) | ~np.isfinite(y)
+        # Replace bad pixel values with the image median
+        if np.sum(bad_pixels) > 0:
+            median_image_value = np.median(x)
+            median_variance_value = np.median(y)
+            x[bad_pixels] = median_image_value
+            y[bad_pixels] = median_variance_value
+
+        # Simple linear regression to establish MSE.
+        linear = LinearRegression()
+        linear.fit(x.reshape(-1, 1), y)
+        linear_mse = mean_squared_error(y, linear.predict(x.reshape(-1,1)))
+
+        # First RANSAC fit
+        fit_results = []
+        for seed in range(self.config.variance_fit_seed_1,
+                          self.config.variance_fit_seed_1 + self.config.n_fits_1):
+            ransac = RANSACRegressor(loss='squared_error',
+                                     residual_threshold=self.config.threshold_scale_1 * linear_mse,
+                                     max_trials=1000,
+                                     random_state=seed)
+            ransac.fit(x.reshape(-1, 1), y)
+            # Remember fit results
+            slope = ransac.estimator_.coef_[0]
+            fit_results.append((slope, seed))
+
+        # K-means cluster the first round of fits,
+        # to find the most stable results.
+        kmeans = KMeans(n_clusters=self.config.n_clusters_1,
+                        random_state=self.config.kmeans_seed_1,
+                        n_init=10)
+        kmeans.fit(np.log(np.array([f[0] for f in fit_results if f[0] > 0])).reshape(-1,1))
+        label_counts = [np.sum(kmeans.labels_ == idx) for idx in range(self.config.n_clusters_1)]
+
+        # Recall one of the fits, chosen arbitrarily from those which are both
+        # stable, according to the first k-means fit, and positive-slope.
+        stable_fit_seeds = np.array([f[1] for f in fit_results if f[0] > 0])[
+                                 kmeans.labels_ == np.argmax(label_counts)]
+        if len(stable_fit_seeds == 0):
+            # Throw a warning
+            pass
+        else:
+            seed = stable_fit_seeds[0]
+        ransac = RANSACRegressor(loss='squared_error',
+                             residual_threshold=self.config.threshold_scale_1 * linear_mse,
+                             max_trials=1000,
+                             random_state=seed)
+        ransac.fit(x.reshape(-1, 1), y)
+
+        # Label the pixels with a "good" variance vs. flux relationship
+        # (the inliers), together with the ones that are further from a simple
+        # straight line (the outliers).
+        inlier_mask_1 = ransac.inlier_mask_
+        outlier_mask_1 = ~inlier_mask_1
+
+        # Second RANSAC fit,
+        # on just the outliers of the 1st fit.
+        fit_results = []
+        for seed in range(self.config.variance_fit_seed_2,
+                          self.config.variance_fit_seed_2 + self.config.n_fits_2):
+            ransac = RANSACRegressor(loss='squared_error',
+                                     residual_threshold=self.config.threshold_scale_2 * linear_mse,
+                                     max_trials=1000,
+                                     random_state=seed)
+            ransac.fit(x[outlier_mask_1].reshape(-1, 1), y[outlier_mask_1])
+            # Remember fit results
+            slope = ransac.estimator_.coef_[0]
+            fit_results.append((slope, seed))
+
+        # K-Means cluster the second round of fits,
+        # to find the most stable result
+        kmeans = KMeans(n_clusters=self.config.n_clusters_2,
+                        random_state=self.config.kmeans_seed_2,
+                        n_init=10)
+        kmeans.fit(np.log(np.array([f[0] for f in fit_results if f[0] > 0])).reshape(-1,1))
+        label_counts = [np.sum(kmeans.labels_ == idx) for idx in range(self.config.n_clusters_2)]
+
+        # Recall one of the stable fits
+        stable_fit_seeds = np.array([f[1] for f in fit_results if f[0] > 0])[
+                                 kmeans.labels_ == np.argmax(label_counts)]
+        if len(stable_fit_seeds == 0):
+            # Throw a warning
+            pass
+        else:
+            seed = stable_fit_seeds[0]
+        ransac = RANSACRegressor(loss='squared_error',
+                             residual_threshold=self.config.threshold_scale_2 * linear_mse,
+                             max_trials=1000,
+                             random_state=seed)
+        ransac.fit(x[outlier_mask_1].reshape(-1, 1), y[outlier_mask_1])
+
+        # Pixels with a "good" variance vs. flux relationship:
+        # Union of the inliers from the first fit
+        # together with the inliers from the second fit.
+        x_good = np.concatenate(
+            (x[inlier_mask_1], x[outlier_mask_1][ransac.inlier_mask_]),
+            axis=None)
+        y_good = np.concatenate(
+            (y[inlier_mask_1], y[outlier_mask_1][ransac.inlier_mask_]),
+            axis=None)
+
+        # Fit all the good pixels with a simple least squares regression.
+        linear = LinearRegression()
+        linear.fit(x_good.reshape(-1, 1), y_good)
+
+        # Return the slope of the final fit.
+        return float(linear.coef_[0])
diff --git a/python/lsst/source/injection/inject_engine.py b/python/lsst/source/injection/inject_engine.py
@@ -389,6 +389,9 @@ def inject_galsim_objects_into_exposure(
     mask_plane_name: str = "INJECTED",
     calib_flux_radius: float = 12.0,
     draw_size_max: int = 1000,
+    variance_scale: float = 0.0,
+    add_noise: bool = True,
+    noise_seed: int = 0,
     logger: Any | None = None,
 ) -> tuple[list[int], list[galsim.BoundsI], list[bool], list[bool]]:
     """Inject sources into given exposure using GalSim.
@@ -411,6 +414,14 @@ def inject_galsim_objects_into_exposure(
     draw_size_max : `int`
         Maximum allowed size of the drawn object. If the object is larger than
         this, the draw size will be clipped to this size.
+    variance_scale : `float`
+        Factor by which to multiply injected image flux to obtain the injected
+        variance level.
+    add_noise : `bool`
+        Whether to randomly vary the amount of injected image flux by an amount
+        consistent with the amount of injected variance.
+    noise_seed : `int`
+        Initial seed for random noise generation.
     logger : `lsst.utils.logging.LsstLogAdapter`, optional
         Logger to use for logging messages.
 
@@ -439,6 +450,7 @@ def inject_galsim_objects_into_exposure(
     bbox = exposure.getBBox()
     full_bounds = galsim.BoundsI(bbox.minX, bbox.maxX, bbox.minY, bbox.maxY)
     galsim_image = galsim.Image(exposure.image.array, bounds=full_bounds)
+    galsim_variance = galsim.Image(exposure.variance.array, bounds=full_bounds)
     pixel_scale = wcs.getPixelScale(bbox.getCenter()).asArcseconds()
 
     draw_sizes: list[int] = []
@@ -525,14 +537,20 @@ def inject_galsim_objects_into_exposure(
         # Inject the source if there is any overlap.
         if object_common_bounds.area() > 0:
             common_image = galsim_image[object_common_bounds]
+            common_variance = galsim_variance[object_common_bounds]
             offset = posd - object_common_bounds.true_center
+            # Attempt to draw a smooth version of the image,
+            # representing an expected light profile.
             # Note, for calexp injection, pixel is already part of the PSF and
             # for coadd injection, it's incorrect to include the output pixel.
             # So for both cases, we draw using method='no_pixel'.
+            image_template = common_image.copy()
+            draw_succeeded = False
             try:
-                conv.drawImage(
-                    common_image, add_to_image=True, offset=offset, wcs=galsim_wcs, method="no_pixel"
+                image_template = conv.drawImage(
+                    image_template, add_to_image=False, offset=offset, wcs=galsim_wcs, method="no_pixel"
                 )
+                draw_succeeded = True
             except GalSimFFTSizeError as err:
                 fft_size_errors[i] = True
                 if logger:
@@ -542,6 +560,49 @@ def inject_galsim_objects_into_exposure(
                         err,
                     )
                 continue
+
+            # If the smooth image can be drawn successfully,
+            # we can do everything else.
+            if draw_succeeded:
+                # Set a variance level in each pixel
+                # corresponding to the drawn light profile.
+                variance_template = image_template.copy()
+                variance_template *= variance_scale
+
+                if add_noise:
+                    # For generating noise,
+                    # variance must be meaningful.
+                    if np.sum(variance_template.array < 0) > 0:
+                        if logger:
+                            logger.debug(
+                                "Setting negative-variance pixels to 0 for noise generation."
+                            )
+                        variance_template.array[variance_template.array < 0] = 0
+                    if np.sum(~np.isfinite(variance_template.array)) > 0:
+                        if logger:
+                            logger.debug(
+                                "Setting non-finite-variance pixels to 0 for noise generation."
+                            )
+                        variance_template.array[~np.isfinite(variance_template.array)] = 0
+
+                    # Randomly vary the injected flux in each pixel,
+                    # consistent with the true variance level.
+                    rng = galsim.BaseDeviate(noise_seed)
+                    variable_noise = galsim.VariableGaussianNoise(rng, variance_template)
+                    image_template.addNoise(variable_noise)
+
+                    # Set an "estimated" variance level in each pixel,
+                    # corresponding to the randomly varied image.
+                    variance_template = image_template.copy()
+                    variance_template *= variance_scale
+
+                # Add the randomly varied synthetic image to the original
+                # image.
+                common_image += image_template
+                # Add the estimated variance of the injection to the original
+                # variance.
+                common_variance += variance_template
+
             common_box = Box2I(
                 Point2I(object_common_bounds.xmin, object_common_bounds.ymin),
                 Point2I(object_common_bounds.xmax, object_common_bounds.ymax),
@@ -564,4 +625,8 @@ def inject_galsim_objects_into_exposure(
             if logger:
                 logger.debug("No area overlap for object at %s; flagging and skipping.", sky_coords)
 
+        # Increment the seed so different noise is generated for different
+        # objects.
+        noise_seed += 1
+
     return draw_sizes, common_bounds, fft_size_errors, psf_compute_errors