Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40633: Ignore many numpy warnings #155

Merged
merged 6 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 25 additions & 0 deletions doc/lsst.analysis.tools/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -527,3 +527,28 @@ The current plot types that are available are detailed :doc:`here<plot-types>`.
already coded up and please try to reuse them before making your own. Before adding a new plot type please
think about if some of the already coded ones can be adapted to your needs rather than making multiple plots
that are basically identical.

------------------------

numpy and other warnings
========================
Functions from some external packages such as numpy can issue warnings for e.g. division by zero.
These can occur frequently, such as when computing a magnitude from a negative (usually sky-subtracted) flux.
numpy warnings do not include a traceback or context and are therefore generally not informative enough to log.
Therefore, it is recommended that actions and tasks check for potential issues like NaN values and either log
a debug- or info-level message if unexpected values are found, or use the provided mechanisms to filter any uninformative warnings.

There are two built-in methods to filter warnings in `analysis_tools`:

`python/lsst/analysis/tools/math.py <https://github.com/lsst/analysis_tools/blob/main/python/lsst/analysis/tools/math.py>`__
contains wrapped version of math functions (from numpy and scipy) that filter warnings so they always result in a particular action.
These should be used in place of the standard numpy nan-prefixed functions, unless the action already filters out any values that
could produce a warning.

`python/lsst/analysis/tools/warning_control.py <https://github.com/lsst/analysis_tools/blob/main/python/lsst/analysis/tools/warning_control.py>`__
has a global setting `filterwarnings_action` that controls all of the wrapped functions.
This can be set to `"error"` when debugging new or modified actions and tasks.

`python/lsst/analysis/tools/interfaces/_task.py <https://github.com/lsst/analysis_tools/blob/main/python/lsst/analysis/tools/interfaces/_task.py>`__
allows for filtering warnings issued at any other point in task execution, including for direct calls to numpy functions.
Developers can replace the empty list of warnings to catch in `_runTools` with `self.warnings_all` or any other list of warning texts.
3 changes: 2 additions & 1 deletion python/lsst/analysis/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@
from the object itself.
"""

from .statistics import *
from .math import *
from .version import * # Generated by sconsUtils
from .warning_control import *
12 changes: 7 additions & 5 deletions python/lsst/analysis/tools/actions/keyedData/stellarLocusFit.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from lsst.pex.config import DictField

from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, Scalar, Vector
from ...statistics import sigmaMad
from ...math import sigmaMad


def stellarLocusFit(xs, ys, paramDict):
Expand Down Expand Up @@ -290,9 +290,11 @@ def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
xs = np.array([fitParams["xMax"] - 0.2, fitParams["xMax"], fitParams["xMax"] + 0.2])
ys = xs * fitParams["mPerp"] + fitParams["bPerpMax"]

fitParams[f"{self.identity or ''}_sigmaMAD"] = sigmaMad(dists)
fitParams[f"{self.identity or ''}_median"] = np.median(dists)
fitParams[f"{self.identity or ''}_hardwired_sigmaMAD"] = sigmaMad(distsHW)
fitParams[f"{self.identity or ''}_hardwired_median"] = np.median(distsHW)
fit_sigma, fit_med = (sigmaMad(dists), np.median(dists)) if len(dists) else (np.nan, np.nan)
fitParams[f"{self.identity or ''}_sigmaMAD"] = fit_sigma
fitParams[f"{self.identity or ''}_median"] = fit_med
fit_sigma, fit_med = (sigmaMad(distsHW), np.median(distsHW)) if len(distsHW) else (np.nan, np.nan)
fitParams[f"{self.identity or ''}_hardwired_sigmaMAD"] = fit_sigma
fitParams[f"{self.identity or ''}_hardwired_median"] = fit_med

return fitParams # type: ignore
10 changes: 4 additions & 6 deletions python/lsst/analysis/tools/actions/plot/calculateRange.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@

from typing import cast

import numpy as np

from ...interfaces import Vector, VectorAction
from ...statistics import nansigmaMad
from ...math import nanMax, nanMedian, nanMin, nanSigmaMad


class MinMax(VectorAction):
Expand All @@ -47,7 +45,7 @@ class MinMax(VectorAction):
"""

def __call__(self, data: Vector, **kwargs) -> Vector:
return cast(Vector, [np.min(data), np.max(data)])
return cast(Vector, [nanMin(data), nanMax(data)])


class Med2Mad(VectorAction):
Expand All @@ -67,8 +65,8 @@ class Med2Mad(VectorAction):
"""

def __call__(self, data: Vector, **kwargs) -> Vector:
med = np.nanmedian(data)
mad = nansigmaMad(data)
med = nanMedian(data)
mad = nanSigmaMad(data)
cmin = med - 2 * mad
cmax = med + 2 * mad
return cast(Vector, [cmin, cmax])
10 changes: 5 additions & 5 deletions python/lsst/analysis/tools/actions/plot/colorColorFitPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from sklearn.neighbors import KernelDensity

from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, Vector
from ...statistics import sigmaMad
from ...math import nanMean, nanMedian, nanSigmaMad
from ..keyedData.stellarLocusFit import perpDistance
from .plotUtils import addPlotInfo, mkColormap

Expand Down Expand Up @@ -259,7 +259,7 @@ def makePlot(

# Add some useful information to the plot
bbox = dict(alpha=0.9, facecolor="white", edgecolor="none")
medMag = np.nanmedian(cast(Vector, mags))
medMag = nanMedian(cast(Vector, mags))

# TODO: GET THE SN FROM THE EARLIER PREP STEP
SN = "-"
Expand Down Expand Up @@ -378,9 +378,9 @@ def makePlot(
# Add a histogram
axHist.set_ylabel("Number")
axHist.set_xlabel("Distance to Line Fit")
medDists = np.nanmedian(dists)
madDists = sigmaMad(dists, nan_policy="omit")
meanDists = np.nanmean(dists)
medDists = nanMedian(dists)
madDists = nanSigmaMad(dists)
meanDists = nanMean(dists)

axHist.set_xlim(meanDists - 2.0 * madDists, meanDists + 2.0 * madDists)
lineMedian = axHist.axvline(medDists, color="k", label="Median: {:0.3f}".format(medDists))
Expand Down
10 changes: 5 additions & 5 deletions python/lsst/analysis/tools/actions/plot/focalPlanePlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from scipy.stats import binned_statistic_2d, binned_statistic_dd

from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, Vector
from ...statistics import nansigmaMad
from ...math import nanMedian, nanSigmaMad
from .plotUtils import addPlotInfo, sortAllArrays


Expand Down Expand Up @@ -97,8 +97,8 @@ def statsAndText(self, arr, mask=None):
numPoints = len(arr)
if mask is not None:
arr = arr[mask]
med = np.nanmedian(arr)
sigMad = nansigmaMad(arr)
med = nanMedian(arr)
sigMad = nanSigmaMad(arr)

statsText = (
"Median: {:0.2f}\n".format(med)
Expand Down Expand Up @@ -195,8 +195,8 @@ def makePlot(
bbox = dict(facecolor="paleturquoise", alpha=0.5, edgecolor="none")
ax.text(0.8, 0.91, statsText, transform=fig.transFigure, fontsize=8, bbox=bbox)

median = np.nanmedian(statistic.ravel())
mad = nansigmaMad(statistic.ravel())
median = nanMedian(statistic.ravel())
mad = nanSigmaMad(statistic.ravel())

vmin = median - 2 * mad
vmax = median + 2 * mad
Expand Down
14 changes: 7 additions & 7 deletions python/lsst/analysis/tools/actions/plot/histPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from matplotlib.patches import Rectangle

from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Vector
from ...statistics import sigmaMad
from ...math import nanMax, nanMedian, nanMin, sigmaMad
from .plotUtils import addPlotInfo

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -503,9 +503,9 @@ def _getPanelRange(self, data, panel, mads=None, meds=None):
# maximum sigmaMad for the datasets in the panel to the left[right]
# from the minimum[maximum] median value of all datasets in the
# panel.
maxMad = np.nanmax(mads)
maxMed = np.nanmax(meds)
minMed = np.nanmin(meds)
maxMad = nanMax(mads)
maxMed = nanMax(meds)
minMed = nanMin(meds)
panel_range = [minMed - lowerRange * maxMad, maxMed + upperRange * maxMad]
if panel_range[1] - panel_range[0] == 0:
log.info(
Expand All @@ -529,15 +529,15 @@ def _getPercentilePanelRange(self, data, panel):
hist_range = np.nanpercentile(
data[hist], [self.panels[panel].lowerRange, self.panels[panel].upperRange]
)
panel_range[0] = np.nanmin([panel_range[0], hist_range[0]])
panel_range[1] = np.nanmax([panel_range[1], hist_range[1]])
panel_range[0] = nanMin([panel_range[0], hist_range[0]])
panel_range[1] = nanMax([panel_range[1], hist_range[1]])
return panel_range

def _calcStats(self, data):
"""Calculate the number of data points, median, and median absolute
deviation of input data."""
num = len(data)
med = np.nanmedian(data)
med = nanMedian(data)
mad = sigmaMad(data)
return num, med, mad

Expand Down
19 changes: 10 additions & 9 deletions python/lsst/analysis/tools/actions/plot/multiVisitCoveragePlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from matplotlib.ticker import FormatStrFormatter

from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, Vector
from ...math import nanMax, nanMean, nanMedian, nanMin
from ..keyedData import KeyedDataSelectorAction
from ..vector.selectors import RangeSelector
from .plotUtils import mkColormap, plotProjectionWithBinning
Expand Down Expand Up @@ -336,8 +337,8 @@ def makePlot(

corners = camera[0].getCorners(FOCAL_PLANE) # type: ignore
xCorners, yCorners = zip(*corners)
xScatLen = 0.4 * (np.nanmax(xCorners) - np.nanmin(xCorners))
yScatLen = 0.4 * (np.nanmax(yCorners) - np.nanmin(yCorners))
xScatLen = 0.4 * (nanMax(xCorners) - nanMin(xCorners))
yScatLen = 0.4 * (nanMax(yCorners) - nanMin(yCorners))
tractList: List[int] = []
elif self.projection == "raDec":
xKey = "ra"
Expand Down Expand Up @@ -418,15 +419,15 @@ def makePlot(
for zKey in self.parametersToPlotList:
zKeySorted = dataDf[zKey].sort_values()
zKeySorted = zKeySorted[np.isfinite(zKeySorted)]
vMinDict[zKey] = np.nanmean(zKeySorted.head(nPercent))
vMinDict[zKey] = nanMean(zKeySorted.head(nPercent))
if zKey == "medianE":
vMaxDict[zKey] = maxEllipResidual
elif zKey == "psfStarScaledDeltaSizeScatter":
vMaxDict[zKey] = maxScaledSizeScatter
elif zKey == "astromOffsetMean" and self.projection != "raDec":
vMaxDict[zKey] = min(maxMeanDistanceArcsec, 1.1 * np.nanmean(zKeySorted.tail(nPercent)))
vMaxDict[zKey] = min(maxMeanDistanceArcsec, 1.1 * nanMean(zKeySorted.tail(nPercent)))
else:
vMaxDict[zKey] = np.nanmean(zKeySorted.tail(nPercent))
vMaxDict[zKey] = nanMean(zKeySorted.tail(nPercent))

for iRow, band in enumerate(bandList):
dataBand = dataDf[dataDf["band"] == band].copy()
Expand All @@ -443,8 +444,8 @@ def makePlot(
nPercent = max(2, int(0.02 * nDataIdBand))
zKeySorted = dataBand[zKey].sort_values()
zKeySorted = zKeySorted[np.isfinite(zKeySorted)]
vMinDict[zKey] = np.nanmean(zKeySorted.head(nPercent))
vMaxDict[zKey] = np.nanmean(zKeySorted.tail(nPercent))
vMinDict[zKey] = nanMean(zKeySorted.head(nPercent))
vMaxDict[zKey] = nanMean(zKeySorted.tail(nPercent))

# Scatter the plots within the detector for focal plane plots.
if self.doScatterInRaDec:
Expand Down Expand Up @@ -671,8 +672,8 @@ def makePlot(
/ 2.0
)
detScaleDeg = np.sqrt(areaDeg / (dataBand["xSize"] * dataBand["ySize"]))
detWidthDeg = np.nanmedian(detScaleDeg * dataBand["xSize"])
detHeightDeg = np.nanmedian(detScaleDeg * dataBand["ySize"])
detWidthDeg = nanMedian(detScaleDeg * dataBand["xSize"])
detHeightDeg = nanMedian(detScaleDeg * dataBand["ySize"])

patch = mpl.patches.Rectangle(
(xLimMax - 0.02 * limRange - detWidthDeg, yLimMin + 0.03 * limRange),
Expand Down
12 changes: 6 additions & 6 deletions python/lsst/analysis/tools/actions/plot/plotUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from matplotlib.patches import Rectangle
from scipy.stats import binned_statistic_2d

from ...statistics import nansigmaMad
from ...math import nanMedian, nanSigmaMad

if TYPE_CHECKING:
from matplotlib.figure import Figure
Expand Down Expand Up @@ -126,7 +126,7 @@ def generateSummaryStats(data, skymap, plotInfo):
if sum(onPatch) == 0:
stat = np.nan
else:
stat = np.nanmedian(data[yCol][onPatch])
stat = nanMedian(data[yCol][onPatch])
try:
patchTuple = (int(patch.split(",")[0]), int(patch.split(",")[-1]))
patchInfo = tractInfo.getPatchInfo(patchTuple)
Expand Down Expand Up @@ -172,7 +172,7 @@ def generateSummaryStatsVisit(cat, colName, visitSummaryTable):
if ccd is None:
continue
onCcd = cat["detector"] == ccd
stat = np.nanmedian(cat[colName].values[onCcd])
stat = nanMedian(cat[colName].values[onCcd])

sumRow = visitSummaryTable["id"] == ccd
corners = zip(visitSummaryTable["raCorners"][sumRow][0], visitSummaryTable["decCorners"][sumRow][0])
Expand Down Expand Up @@ -349,7 +349,7 @@ def extremaSort(xs):
-------
ids : `np.array`
"""
med = np.nanmedian(xs)
med = nanMedian(xs)
dists = np.abs(xs - med)
ids = np.argsort(dists)
return ids
Expand Down Expand Up @@ -642,8 +642,8 @@ def plotProjectionWithBinning(
plotOut : `matplotlib.collections.PathCollection`
The plot object with ``ax`` updated with data plotted here.
"""
med = np.nanmedian(zs)
mad = nansigmaMad(zs)
med = nanMedian(zs)
mad = nanSigmaMad(zs)
if vmin is None:
vmin = med - 2 * mad
if vmax is None:
Expand Down
28 changes: 14 additions & 14 deletions python/lsst/analysis/tools/actions/plot/scatterplotWithTwoHists.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from mpl_toolkits.axes_grid1 import make_axes_locatable

from ...interfaces import KeyedData, KeyedDataAction, KeyedDataSchema, PlotAction, Scalar, ScalarType, Vector
from ...statistics import nansigmaMad, sigmaMad
from ...math import nanMedian, nanSigmaMad
from ..keyedData.summaryStatistics import SummaryStatisticAction
from ..scalar import MedianAction
from ..vector import ConvertFluxToMag, SnSelector
Expand Down Expand Up @@ -405,15 +405,15 @@ def _scatterPlot(
# ensure the columns are actually array
xs = np.array(xs)
ys = np.array(ys)
sigMadYs = nansigmaMad(ys)
sigMadYs = nanSigmaMad(ys)
# plot lone median point if there's not enough data to measure more
n_xs = len(xs)
if n_xs == 0:
continue
elif n_xs < 10:
xs = [np.nanmedian(xs)]
sigMads = np.array([nansigmaMad(ys)])
ys = np.array([np.nanmedian(ys)])
xs = [nanMedian(xs)]
sigMads = np.array([nanSigmaMad(ys)])
ys = np.array([nanMedian(ys)])
(medLine,) = ax.plot(xs, ys, color, label=f"Median: {ys[0]:.2g}", lw=0.8)
linesForLegend.append(medLine)
(sigMadLine,) = ax.plot(
Expand Down Expand Up @@ -444,7 +444,7 @@ def _scatterPlot(
xLims[1] = max(xLims[1], xMax)

xEdges = np.arange(xMin, xMax, (xMax - xMin) / self.nBins)
medYs = np.nanmedian(ys)
medYs = nanMedian(ys)
fiveSigmaHigh = medYs + 5.0 * sigMadYs
fiveSigmaLow = medYs - 5.0 * sigMadYs
binSize = (fiveSigmaHigh - fiveSigmaLow) / 101.0
Expand Down Expand Up @@ -472,8 +472,8 @@ def _scatterPlot(

for i, xEdge in enumerate(xEdgesPlot):
ids = np.where((xs < xEdge) & (xs > xEdges[i]) & (np.isfinite(ys)))[0]
med = np.nanmedian(ys[ids])
sigMad = sigmaMad(ys[ids], nan_policy="omit")
med = nanMedian(ys[ids])
sigMad = nanSigmaMad(ys[ids])
meds[i] = med
sigMads[i] = sigMad
threeSigMadVerts[i, :] = [xEdge, med + 3 * sigMad]
Expand Down Expand Up @@ -599,10 +599,10 @@ def _scatterPlot(

else:
ax.plot(xs, ys, ".", ms=5, alpha=0.3, mfc=color, mec=color, zorder=-1)
meds = np.array([np.nanmedian(ys)] * len(xs))
(medLine,) = ax.plot(xs, meds, color, label=f"Median: {np.nanmedian(ys):0.3g}", lw=0.8)
meds = np.array([nanMedian(ys)] * len(xs))
(medLine,) = ax.plot(xs, meds, color, label=f"Median: {nanMedian(ys):0.3g}", lw=0.8)
linesForLegend.append(medLine)
sigMads = np.array([nansigmaMad(ys)] * len(xs))
sigMads = np.array([nanSigmaMad(ys)] * len(xs))
(sigMadLine,) = ax.plot(
xs,
meds + 1.0 * sigMads,
Expand All @@ -621,15 +621,15 @@ def _scatterPlot(
# Set the scatter plot limits
# TODO: Make this not work by accident
if "yStars" in data and (len(cast(Vector, data["yStars"])) > 0):
plotMed = np.nanmedian(cast(Vector, data["yStars"]))
plotMed = nanMedian(cast(Vector, data["yStars"]))
elif "yGalaxies" in data and (len(cast(Vector, data["yGalaxies"])) > 0):
plotMed = np.nanmedian(cast(Vector, data["yGalaxies"]))
plotMed = nanMedian(cast(Vector, data["yGalaxies"]))
else:
plotMed = np.nan

# Ignore types below pending making this not working my accident
if len(xs) < 2: # type: ignore
meds = [np.nanmedian(ys)] # type: ignore
meds = [nanMedian(ys)] # type: ignore
if self.yLims:
ax.set_ylim(self.yLims[0], self.yLims[1]) # type: ignore
elif np.isfinite(plotMed):
Expand Down