Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38500: Define metrics to summarize spuriousness scores for a visit #151

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 19 additions & 1 deletion pipelines/apDetectorVisitQualityCore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,27 @@ tasks:
class: lsst.analysis.tools.tasks.AssocDiaSrcDetectorVisitAnalysisTask
config:
# atools.numDiaSources: NumDiaSourcesMetric
atools.numDiaSourcesAll: NumDiaSourcesAllMetric
atools.numDiaSourcesAll: NumDiaSourcesSelectionMetric
atools.numDiaSourcesAll.metricName: numDiaSourcesAll
atools.numDiaSourcesAll.process.calculateActions.countingAction.vectorKey: diaSourceId
atools.numDipoles: NumDipolesMetric
atools.numSsObjects: NumSsObjectsMetric
atools.numDiaSourcesHighReliability: NumDiaSourcesSelectionMetric
atools.numDiaSourcesHighReliability.metricName: numDiaSourcesHighReliability
atools.numDiaSourcesHighReliability.process.calculateActions.countingAction.op: gt
atools.numDiaSourcesHighReliability.process.calculateActions.countingAction.threshold: 0.9
atools.numDiaSourcesHighReliability.process.calculateActions.countingAction.vectorKey: reliability
atools.numDiaSourcesLowReliability: NumDiaSourcesSelectionMetric
atools.numDiaSourcesLowReliability.metricName: numDiaSourcesLowReliability
atools.numDiaSourcesLowReliability.process.calculateActions.countingAction.op: lt
atools.numDiaSourcesLowReliability.process.calculateActions.countingAction.threshold: 0.1
atools.numDiaSourcesLowReliability.process.calculateActions.countingAction.vectorKey: reliability
atools.numDiaSourcesNanReliability: NumDiaSourcesSelectionMetric
atools.numDiaSourcesNanReliability.metricName: numDiaSourcesNanReliability
atools.numDiaSourcesNanReliability.process.calculateActions.countingAction.op: eq
atools.numDiaSourcesNanReliability.process.calculateActions.countingAction.threshold: !!float nan
atools.numDiaSourcesNanReliability.process.calculateActions.countingAction.vectorKey: reliability
atools.diaSourcesGoodVsBadRatio: DiaSourcesGoodVsBadRatioMetric
connections.outputName: assocDiaSrcCore
atools.simpleSky: SimpleDiaPlot
python: |
Expand Down
85 changes: 81 additions & 4 deletions python/lsst/analysis/tools/actions/scalar/scalarActions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,16 @@
"SumAction",
"MedianHistAction",
"IqrHistAction",
"DivideScalar",
)

import operator
from math import nan
from typing import cast

import numpy as np
from lsst.pex.config import ChoiceField, Field
from lsst.pex.config.configurableActions import ConfigurableActionField

from ...interfaces import KeyedData, KeyedDataSchema, Scalar, ScalarAction, Vector
from ...math import nanMax, nanMean, nanMedian, nanMin, nanSigmaMad, nanStd
Expand Down Expand Up @@ -104,14 +107,59 @@ def __call__(self, data: KeyedData, **kwargs) -> Scalar:
return nanSigmaMad(data[self.vectorKey.format(**kwargs)][mask])


class CountAction(ScalarFromVectorAction):
"""Returns the number of non-NaN entries in the given column."""
class CountAction(ScalarAction):
"""Performs count actions, with threshold-based filtering.
The operator is specified as a string, for example, "lt", "le", "ge",
"gt", "ne", and "eq" for the mathematical operations <, <=, >=, >, !=,
and == respectively. To count non-NaN values, only pass the column name
as vector key. To count NaN values, pass threshold = nan (from math.nan).
Optionally to configure from a YAML file, pass "threshold: !!float nan".
To compute the number of elements with values less than a given threshold,
use op="le".
"""

vectorKey = Field[str]("Key of Vector to count")
op = ChoiceField[str](
doc="Operator name string.",
allowed={
"lt": "less than threshold",
"le": "less than or equal to threshold",
"ge": "greater than or equal to threshold",
"ne": "not equal to a given value",
"eq": "equal to a given value",
"gt": "greater than threshold",
},
default="ne",
)
threshold = Field[float](doc="Threshold to apply.", default=nan)

def getInputSchema(self) -> KeyedDataSchema:
return ((self.vectorKey, Vector),)

def __call__(self, data: KeyedData, **kwargs) -> Scalar:
mask = self.getMask(**kwargs)
arr = cast(Vector, data[self.vectorKey.format(**kwargs)])[mask]
arr = arr[~np.isnan(arr)]
return cast(Scalar, len(arr))

# Count NaNs and non-NaNs
if self.threshold == nan:
if self.op == "eq":
# Count number of NaNs
result = np.isnan(arr).sum()
return cast(Scalar, result)
elif self.op == "ne":
# Count number of non-NaNs
result = len(arr) - np.isnan(arr).sum()
return cast(Scalar, result)
else:
raise ValueError("Invalid operator for counting NaNs.")
# Count for given threshold ignoring all NaNs
else:
result = arr[~np.isnan(arr)]
result = cast(
Scalar,
float(np.sum(getattr(operator, self.op)(result, self.threshold))),
)
return result


class CountUniqueAction(ScalarFromVectorAction):
Expand Down Expand Up @@ -335,3 +383,32 @@ def __call__(self, data: KeyedData, **kwargs):
else:
iqr = np.NaN
return iqr


class DivideScalar(ScalarAction):
"""Calculate (A/B) for scalars."""

actionA = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar A")
actionB = ConfigurableActionField[ScalarAction](doc="Action which supplies scalar B")

def getInputSchema(self) -> KeyedDataSchema:
yield from self.actionA.getInputSchema()
yield from self.actionB.getInputSchema()

def __call__(self, data: KeyedData, **kwargs) -> Scalar:
"""Return the result of A/B.

Parameters
----------
data : `KeyedData`

Returns
-------
result : `Scalar`
The result of dividing A by B.
"""
scalarA = self.actionA(data, **kwargs)
scalarB = self.actionB(data, **kwargs)
if scalarB == 0:
raise ValueError("Denominator is zero!")
return scalarA / scalarB
66 changes: 50 additions & 16 deletions python/lsst/analysis/tools/atools/diaSourceMetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,19 @@
from __future__ import annotations

__all__ = (
"NumDiaSourcesAllMetric",
"NumDiaSourcesMetric",
"NumDipolesMetric",
"NumDiaSourcesSelectionMetric",
"DiaSourcesGoodVsBadRatioMetric",
)

from ..actions.scalar import CountAction
from lsst.pex.config import Field

from ..actions.scalar import CountAction, DivideScalar
from ..actions.vector import FlagSelector, GoodDiaSourceSelector
from ..interfaces import AnalysisTool


class NumDiaSourcesAllMetric(AnalysisTool):
"""Calculate the number of DIA Sources."""

def setDefaults(self):
super().setDefaults()

# Count the number of dia sources
self.process.calculateActions.numDiaSourcesAll = CountAction(vectorKey="diaSourceId")

# the units for the quantity (count, an astropy quantity)
self.produce.metric.units = {"numDiaSourcesAll": "ct"}


class NumDiaSourcesMetric(AnalysisTool):
"""Calculate the number of DIA Sources that do not have known
bad/quality flags set to true.
Expand All @@ -63,7 +53,7 @@ def setDefaults(self):


class NumDipolesMetric(AnalysisTool):
"""Calculate the number of dipoles."""
"""Calculate the number of dipoles with NaN values excluded."""

def setDefaults(self):
super().setDefaults()
Expand All @@ -76,3 +66,47 @@ def setDefaults(self):

# the units for the quantity (count, an astropy quantity)
self.produce.metric.units = {"numDipoles": "ct"}


class NumDiaSourcesSelectionMetric(AnalysisTool):
"""Count the number of DIA Sources for a given threshold."""

metricName = Field[str](doc="Name to use for output metric")

def setDefaults(self):
super().setDefaults()

# Count dia sources with reliability lower than the threshold
self.process.calculateActions.countingAction = CountAction

# The units for the quantity (count, an astropy quantity)
self.produce.metric.units = {"countingAction": "ct"}

def finalize(self):
self.produce.metric.newNames = {"countingAction": self.metricName}


class DiaSourcesGoodVsBadRatioMetric(AnalysisTool):
"""Calculate the ratio of 'good' vs 'bad' DIA Sources."""

def setDefaults(self):
super().setDefaults()

# Count dia sources with reliability higher than the threshold
self.process.buildActions.numDiaSourcesHighReliability = CountAction(
op="gt", threshold=0.9, vectorKey="reliability"
)

# Count dia sources with reliability lower than the threshold
self.process.buildActions.numDiaSourcesLowReliability = CountAction(
op="lt", threshold=0.1, vectorKey="reliability"
)

# Calculate ratio of good vs bad DIA Sources
self.process.calculateActions.DiaSourcesGoodVsBadRatio = DivideScalar(
actionA=self.process.buildActions.numDiaSourcesHighReliability,
actionB=self.process.buildActions.numDiaSourcesLowReliability,
)

# The units for the quantity (dimensionless, an astropy quantity)
self.produce.metric.units = {"DiaSourcesGoodVsBadRatio": ""}