lsst · erykoff · May 18, 2023 · May 12, 2023 · May 12, 2023 · May 15, 2023
diff --git a/python/lsst/cp/pipe/ptc/cpExtractPtcTask.py b/python/lsst/cp/pipe/ptc/cpExtractPtcTask.py
@@ -22,6 +22,7 @@
 import numpy as np
 from lmfit.models import GaussianModel
 import scipy.stats
+import warnings
 
 import lsst.afw.math as afwMath
 import lsst.pex.config as pexConfig
@@ -450,6 +451,23 @@ def run(self, inputExp, inputDims, taskMetadata):
                 im1Area, im2Area, imStatsCtrl, mu1, mu2 = self.getImageAreasMasksStats(exp1, exp2,
                                                                                        region=region)
 
+                # We demand that both mu1 and mu2 be finite and greater than 0.
+                if not np.isfinite(mu1) or not np.isfinite(mu2) \
+                   or ((np.nan_to_num(mu1) + np.nan_to_num(mu2)/2.) <= 0.0):
+                    self.log.warning(
+                        "Illegal mean value(s) detected for amp %s on exposure pair %d/%d",
+                        ampName,
+                        expId1,
+                        expId2,
+                    )
+                    partialPtcDataset.setAmpValuesPartialDataset(
+                        ampName,
+                        inputExpIdPair=(expId1, expId2),
+                        rawExpTime=expTime,
+                        expIdMask=False,
+                    )
+                    continue
+
                 # `measureMeanVarCov` is the function that measures
                 # the variance and covariances from a region of
                 # the difference image of two flats at the same
@@ -1000,7 +1018,19 @@ def computeGaussianHistogramParameters(self, im1Area, im2Area, imStatsCtrl, mu1,
             errVals = np.sqrt(yVals)
             errVals[(errVals == 0.0)] = 1.0
             pars = model.guess(yVals, x=xVals)
-            out = model.fit(yVals, pars, x=xVals, weights=1./errVals, calc_covar=True, method="least_squares")
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                # The least-squares fitter sometimes spouts (spurious) warnings
+                # when the model is very bad. Swallow these warnings now and
+                # let the KS test check the model below.
+                out = model.fit(
+                    yVals,
+                    pars,
+                    x=xVals,
+                    weights=1./errVals,
+                    calc_covar=True,
+                    method="least_squares",
+                )
 
             # Calculate chi2.
             chiArr = out.residual
@@ -1009,16 +1039,12 @@ def computeGaussianHistogramParameters(self, im1Area, im2Area, imStatsCtrl, mu1,
             sigmaFit = out.params["sigma"].value
 
             # Calculate KS test p-value for the fit.
-            # Seed this with the mean value, so that the same data will get the
-            # same result.
-            randomSeed = int((mu1 + mu2)/2.)
-            gSample = scipy.stats.norm.rvs(
-                size=numOk,
-                scale=sigmaFit,
-                loc=out.params["center"].value,
-                random_state=randomSeed,
+            ksResult = scipy.stats.ks_1samp(
+                diffArr,
+                scipy.stats.norm.cdf,
+                (out.params["center"].value, sigmaFit),
             )
-            ksResult = scipy.stats.ks_2samp(diffArr, gSample)
+
             kspValue = ksResult.pvalue
             if kspValue < 1e-15:
                 kspValue = 0.0

diff --git a/python/lsst/cp/pipe/ptc/cpSolvePtcTask.py b/python/lsst/cp/pipe/ptc/cpSolvePtcTask.py
@@ -122,45 +122,49 @@ class PhotonTransferCurveSolveConfig(pipeBase.PipelineTaskConfig,
         doc="Degree of polynomial to fit the PTC, when 'ptcFitType'=POLYNOMIAL.",
         default=3,
     )
-    doLegacyTurnoffAndOutlierSelection = pexConfig.Field(
+    doLegacyTurnoffSelection = pexConfig.Field(
         dtype=bool,
-        doc="Use 'legacy' computation for PTC turnoff and outlier selection. If set "
+        doc="Use 'legacy' computation for PTC turnoff selection. If set "
             "to False, then the KS test p-value selection will be used instead.",
         default=False,
     )
     sigmaCutPtcOutliers = pexConfig.Field(
         dtype=float,
-        doc="Sigma cut for outlier rejection in PTC. Only used if "
-            "doLegacyTurnoffAndOutlierSelection is True.",
+        doc="Sigma cut for outlier rejection in PTC.",
         default=5.0,
     )
     maxIterationsPtcOutliers = pexConfig.RangeField(
         dtype=int,
-        doc="Maximum number of iterations for outlier rejection in PTC. Only used if "
-            "doLegacyTurnoffAndOutlierSelection is True.",
+        doc="Maximum number of iterations for outlier rejection in PTC.",
         default=2,
         min=0
     )
+    maxSignalInitialPtcOutlierFit = pexConfig.Field(
+        dtype=float,
+        doc="Maximum signal considered for intial outlier fit. This should be below "
+            "the PTC turnoff to ensure accurate outlier rejection.",
+        default=30_000.,
+    )
     minVarPivotSearch = pexConfig.Field(
         dtype=float,
         doc="The code looks for a pivot signal point after which the variance starts decreasing at high-flux"
             " to exclude then from the PTC model fit. However, sometimes at low fluxes, the variance"
             " decreases slightly. Set this variable for the variance value, in ADU^2, after which the pivot "
-            " should be sought. Only used if doLegacyTurnoffAndOutlierSelection is True.",
+            " should be sought. Only used if doLegacyTurnoffSelection is True.",
         default=10000,
     )
     consecutivePointsVarDecreases = pexConfig.RangeField(
         dtype=int,
         doc="Required number of consecutive points/fluxes in the PTC where the variance "
             "decreases in order to find a first estimate of the PTC turn-off. "
-            "Only used if doLegacyTurnoffAndOutlierSelection is True.",
+            "Only used if doLegacyTurnoffSelection is True.",
         default=2,
         min=2
     )
     ksTestMinPvalue = pexConfig.Field(
         dtype=float,
         doc="Minimum value of the Gaussian histogram KS test p-value to be used in PTC fit. "
-            "Only used if doLegacyTurnoffAndOutlierSelection is False.",
+            "Only used if doLegacyTurnoffSelection is False.",
         default=0.01,
     )
     doFitBootstrap = pexConfig.Field(
@@ -328,7 +332,7 @@ def run(self, inputCovariances, camera=None, detId=0):
                     expIdMask = False
 
                 kspValue = partialPtcDataset.kspValues[ampName][0]
-                if not self.config.doLegacyTurnoffAndOutlierSelection and \
+                if not self.config.doLegacyTurnoffSelection and \
                    kspValue < self.config.ksTestMinPvalue:
                     expIdMask = False
 
@@ -357,6 +361,9 @@ def run(self, inputCovariances, camera=None, detId=0):
             datasetPtc.rawExpTimes[ampName] = datasetPtc.rawExpTimes[ampName][index]
             datasetPtc.rawMeans[ampName] = datasetPtc.rawMeans[ampName][index]
             datasetPtc.rawVars[ampName] = datasetPtc.rawVars[ampName][index]
+            datasetPtc.histVars[ampName] = datasetPtc.histVars[ampName][index]
+            datasetPtc.histChi2Dofs[ampName] = datasetPtc.histChi2Dofs[ampName][index]
+            datasetPtc.kspValues[ampName] = datasetPtc.kspValues[ampName][index]
             datasetPtc.expIdMask[ampName] = datasetPtc.expIdMask[ampName][index]
             datasetPtc.covariances[ampName] = datasetPtc.covariances[ampName][index]
             datasetPtc.covariancesSqrtWeights[ampName] = datasetPtc.covariancesSqrtWeights[ampName][index]
@@ -948,12 +955,11 @@ def errFunc(p, x, y):
         maxIterationsPtcOutliers = self.config.maxIterationsPtcOutliers
 
         for i, ampName in enumerate(dataset.ampNames):
-            timeVecOriginal = dataset.rawExpTimes[ampName]
-            meanVecOriginal = dataset.rawMeans[ampName]
-            varVecOriginal = dataset.rawVars[ampName]
+            meanVecOriginal = dataset.rawMeans[ampName].copy()
+            varVecOriginal = dataset.rawVars[ampName].copy()
             varVecOriginal = self._makeZeroSafe(varVecOriginal)
 
-            if self.config.doLegacyTurnoffAndOutlierSelection:
+            if self.config.doLegacyTurnoffSelection:
                 # Discard points when the variance starts to decrease after two
                 # consecutive signal levels
                 goodPoints = self._getInitialGoodPoints(meanVecOriginal, varVecOriginal,
@@ -974,11 +980,6 @@ def errFunc(p, x, y):
                 self.fillBadAmp(dataset, ptcFitType, ampName)
                 continue
 
-            # Save the point where the variance starts decreasing as the
-            # PTC turnoff point
-            ptcTurnoff = meanVecOriginal[goodPoints][-1]
-            dataset.ptcTurnoff[ampName] = ptcTurnoff
-
             mask = goodPoints
 
             if ptcFitType == 'EXPAPPROXIMATION':
@@ -996,73 +997,91 @@ def errFunc(p, x, y):
                 parsIniPtc = self._initialParsForPolynomial(self.config.polynomialFitDegree + 1)
                 bounds = self._boundsForPolynomial(parsIniPtc)
 
-            if self.config.doLegacyTurnoffAndOutlierSelection:
-                # Before bootstrap fit, do an iterative fit to get rid of
-                # outliers. This further process of outlier rejection be
-                # skipped if self.config.maxIterationsPtcOutliers = 0.
-                # We already did some initial outlier rejection above in
-                # self._getInitialGoodPoints.
-                count = 1
-                newMask = np.ones_like(meanVecOriginal, dtype=bool)
-                pars = parsIniPtc
-                while count <= maxIterationsPtcOutliers:
-                    # Note that application of the mask actually shrinks the
-                    # array to size rather than setting elements to zero (as we
-                    # want) so always update mask itself and re-apply to the
-                    # original data.
-                    meanTempVec = meanVecOriginal[mask]
-                    varTempVec = varVecOriginal[mask]
-                    res = least_squares(errFunc, parsIniPtc, bounds=bounds, args=(meanTempVec, varTempVec))
+            # We perform an initial (unweighted) fit of variance vs signal
+            # (after initial KS test or post-drop selection) to look for
+            # outliers, particularly at the high-flux end. The initial fit
+            # is performed only for points that are guaranteed to be below
+            # the PTC turnoff and then extrapolated to ensure that high
+            # flux points that have abnormal variance values can be properly
+            # rejected in this phase without biasing the initial fit.
+            # This algorithm was initially developed by Seth Digel for
+            # the EO Testing pipeline.
+
+            if maxIterationsPtcOutliers == 0:
+                # We are not doing any outlier rejection here, but we do want
+                # an initial fit.
+                res = least_squares(
+                    errFunc,
+                    parsIniPtc,
+                    bounds=bounds,
+                    args=(meanVecOriginal[mask], varVecOriginal[mask]),
+                )
+                pars = res.x
+                newMask = mask.copy()
+            else:
+                newMask = (mask & (meanVecOriginal <= self.config.maxSignalInitialPtcOutlierFit))
+
+                count = 0
+                lastMask = mask.copy()
+                while count < maxIterationsPtcOutliers:
+                    res = least_squares(
+                        errFunc,
+                        parsIniPtc,
+                        bounds=bounds,
+                        args=(meanVecOriginal[newMask], varVecOriginal[newMask]),
+                    )
                     pars = res.x
 
-                    # change this to the original from the temp because
-                    # the masks are ANDed meaning once a point is masked
-                    # it's always masked, and the masks must always be the
-                    # same length for broadcasting
                     sigResids = (varVecOriginal - ptcFunc(pars, meanVecOriginal))/np.sqrt(varVecOriginal)
-                    newMask = np.array([True if np.abs(r) < sigmaCutPtcOutliers else False
-                                        for r in sigResids])
-                    mask = mask & newMask
-                    if not (mask.any() and newMask.any()):
-                        msg = (f"SERIOUS: All points in either mask: {mask} or newMask: {newMask} are bad. "
+                    # The new mask includes points where the residuals are
+                    # finite, are less than the cut, and include the original
+                    # mask of known points that should not be used.
+                    newMask = (
+                        np.isfinite(sigResids)
+                        & (np.abs(np.nan_to_num(sigResids)) < sigmaCutPtcOutliers)
+                        & mask
+                    )
+                    if np.count_nonzero(newMask) == 0:
+                        msg = (f"SERIOUS: All points after outlier rejection are bad. "
                                f"Setting {ampName} to BAD.")
                         self.log.warning(msg)
                         # Fill entries with NaNs
                         self.fillBadAmp(dataset, ptcFitType, ampName)
                         break
-                    nDroppedTotal = Counter(mask)[False]
-                    self.log.debug("Iteration %d: discarded %d points in total for %s",
-                                   count, nDroppedTotal, ampName)
+
+                    self.log.debug(
+                        "Iteration %d: Removed %d points in total for %s.",
+                        count,
+                        np.count_nonzero(mask) - np.count_nonzero(newMask),
+                        ampName,
+                    )
+
+                    # If the mask hasn't changed then break out.
+                    if np.all(newMask == lastMask):
+                        self.log.debug("Convergence at iteration %d; breaking loop for %s.", count, ampName)
+                        break
+
+                    lastMask = newMask.copy()
+
                     count += 1
-                    # objects should never shrink
-                    assert (len(mask) == len(timeVecOriginal) == len(meanVecOriginal) == len(varVecOriginal))
-            else:
-                pars = parsIniPtc
-                meanTempVec = meanVecOriginal[mask]
-                varTempVec = varVecOriginal[mask]
-                res = least_squares(
-                    errFunc,
-                    parsIniPtc,
-                    bounds=bounds,
-                    args=(meanVecOriginal[mask], varVecOriginal[mask]),
-                )
-                pars = res.x
-                newMask = mask
 
-            if not (mask.any() and newMask.any()):
+            # Set the mask to the new mask
+            mask = newMask.copy()
+
+            if not mask.any():
+                # We hae already filled the bad amp above, so continue.
                 continue
-            dataset.expIdMask[ampName] = np.array(dataset.expIdMask[ampName])
-            # store the final mask
-            if len(dataset.expIdMask[ampName]):
-                dataset.expIdMask[ampName] &= mask  # bitwise_and if there is already a mask
-            else:
-                dataset.expIdMask[ampName] = mask
-            # In case there was a previous mask stored
-            mask = dataset.expIdMask[ampName]
+
+            dataset.expIdMask[ampName] = mask
+
             parsIniPtc = pars
             meanVecFinal = meanVecOriginal[mask]
             varVecFinal = varVecOriginal[mask]
 
+            # Save the maximum point after outlier detection as the
+            # PTC turnoff point.
+            dataset.ptcTurnoff[ampName] = meanVecFinal[-1]
+
             if Counter(mask)[False] > 0:
                 self.log.info("Number of points discarded in PTC of amplifier %s:"
                               " %d out of %d", ampName, Counter(mask)[False], len(meanVecOriginal))