Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38043: Make more plots succeed with CI test data #89

Merged
merged 4 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
150 changes: 78 additions & 72 deletions python/lsst/analysis/tools/actions/plot/histPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,74 +412,77 @@ def _makePanel(self, data, panel, ax, colors, label_font_size=9, legend_font_siz
meds.append(med)
mads.append(mad)
panel_range = self._getPanelRange(data, panel, mads=mads, meds=meds)

for i, hist in enumerate(self.panels[panel].hists):
hist_data = data[hist][np.isfinite(data[hist])]
ax.hist(
hist_data,
range=panel_range,
bins=self.panels[panel].bins,
histtype="step",
density=self.panels[panel].histDensity,
lw=2,
color=colors[i],
label=self.panels[panel].hists[hist],
)
ax.axvline(meds[i], ls=(0, (5, 3)), lw=1, c=colors[i])

ax.legend(fontsize=legend_font_size, loc="upper left", frameon=False)
ax.set_xlim(panel_range)
# The following accommodates spacing for ranges with large numbers
# but small-ish dynamic range (example use case: RA 300-301).
if ncols > 1 and max(np.abs(panel_range)) >= 100 and (panel_range[1] - panel_range[0]) < 5:
ax.xaxis.set_major_formatter("{x:.2f}")
ax.tick_params(axis="x", labelrotation=25, pad=-1)
ax.set_xlabel(self.panels[panel].label, fontsize=label_font_size)
y_label = "Normalized (PDF)" if self.panels[panel].histDensity else "Frequency"
ax.set_ylabel(y_label, fontsize=label_font_size)
ax.set_yscale(self.panels[panel].yscale)
ax.tick_params(labelsize=max(5, label_font_size - 2))
# add a buffer to the top of the plot to allow headspace for labels
ylims = list(ax.get_ylim())
if ax.get_yscale() == "log":
ylims[1] = 10 ** (np.log10(ylims[1]) * 1.1)
if all(np.isfinite(panel_range)):
for i, hist in enumerate(self.panels[panel].hists):
hist_data = data[hist][np.isfinite(data[hist])]
if len(hist_data) > 0:
ax.hist(
hist_data,
range=panel_range,
bins=self.panels[panel].bins,
histtype="step",
density=self.panels[panel].histDensity,
lw=2,
color=colors[i],
label=self.panels[panel].hists[hist],
)
ax.axvline(meds[i], ls=(0, (5, 3)), lw=1, c=colors[i])

ax.legend(fontsize=legend_font_size, loc="upper left", frameon=False)
ax.set_xlim(panel_range)
# The following accommodates spacing for ranges with large numbers
# but small-ish dynamic range (example use case: RA 300-301).
if ncols > 1 and max(np.abs(panel_range)) >= 100 and (panel_range[1] - panel_range[0]) < 5:
ax.xaxis.set_major_formatter("{x:.2f}")
ax.tick_params(axis="x", labelrotation=25, pad=-1)
ax.set_xlabel(self.panels[panel].label, fontsize=label_font_size)
y_label = "Normalized (PDF)" if self.panels[panel].histDensity else "Frequency"
ax.set_ylabel(y_label, fontsize=label_font_size)
ax.set_yscale(self.panels[panel].yscale)
ax.tick_params(labelsize=max(5, label_font_size - 2))
# add a buffer to the top of the plot to allow headspace for labels
ylims = list(ax.get_ylim())
if ax.get_yscale() == "log":
ylims[1] = 10 ** (np.log10(ylims[1]) * 1.1)
else:
ylims[1] *= 1.1
ax.set_ylim(ylims[0], ylims[1])

# Draw a vertical line at a reference value, if given.
# If histDensity is True, also plot a reference PDF with
# mean = referenceValue and sigma = 1 for reference.
if self.panels[panel].referenceValue is not None:
ax = self._addReferenceLines(ax, panel, panel_range, legend_font_size=legend_font_size)

# Check if we should use the default stats panel or if a custom one
# has been created.
statList = [
self.panels[panel].statsPanel.stat1,
self.panels[panel].statsPanel.stat2,
self.panels[panel].statsPanel.stat3,
]
if not any(statList):
stats_dict = {
"statLabels": ["N$_{{data}}$", "Med", "${{\\sigma}}_{{MAD}}$"],
"stat1": nums,
"stat2": meds,
"stat3": mads,
}
elif all(statList):
stat1 = [data[stat] for stat in self.panels[panel].statsPanel.stat1]
stat2 = [data[stat] for stat in self.panels[panel].statsPanel.stat2]
stat3 = [data[stat] for stat in self.panels[panel].statsPanel.stat3]
stats_dict = {
"statLabels": self.panels[panel].statsPanel.statsLabels,
"stat1": stat1,
"stat2": stat2,
"stat3": stat3,
}
else:
raise RuntimeError("Invalid configuration of HistStatPanel")
else:
ylims[1] *= 1.1
ax.set_ylim(ylims[0], ylims[1])

# Draw a vertical line at a reference value, if given. If histDensity
# is True, also plot a reference PDF with mean = referenceValue and
# sigma = 1 for reference.
if self.panels[panel].referenceValue is not None:
ax = self._addReferenceLines(ax, panel, panel_range, legend_font_size=legend_font_size)

# Check if we should use the default stats panel or if a custom one
# has been created.
statList = [
self.panels[panel].statsPanel.stat1,
self.panels[panel].statsPanel.stat2,
self.panels[panel].statsPanel.stat3,
]
if not any(statList):
stats_dict = {
"statLabels": ["N$_{{data}}$", "Med", "${{\\sigma}}_{{MAD}}$"],
"stat1": nums,
"stat2": meds,
"stat3": mads,
}
elif all(statList):
stat1 = [data[stat] for stat in self.panels[panel].statsPanel.stat1]
stat2 = [data[stat] for stat in self.panels[panel].statsPanel.stat2]
stat3 = [data[stat] for stat in self.panels[panel].statsPanel.stat3]
stats_dict = {
"statLabels": self.panels[panel].statsPanel.statsLabels,
"stat1": stat1,
"stat2": stat2,
"stat3": stat3,
}
else:
raise RuntimeError("Invalid configuration of HistStatPanel")

stats_dict = {key: [] for key in ("stat1", "stat2", "stat3")}
stats_dict["statLabels"] = [""] * 3
return nums, meds, mads, stats_dict

def _getPanelRange(self, data, panel, mads=None, meds=None):
Expand Down Expand Up @@ -515,11 +518,14 @@ def _getPercentilePanelRange(self, data, panel):
"""Determine panel x-axis range based on data percentile limits."""
panel_range = [np.nan, np.nan]
for hist in self.panels[panel].hists:
hist_range = np.nanpercentile(
data[hist], [self.panels[panel].lowerRange, self.panels[panel].upperRange]
)
panel_range[0] = np.nanmin([panel_range[0], hist_range[0]])
panel_range[1] = np.nanmax([panel_range[1], hist_range[1]])
data_hist = data[hist]
# TODO: Consider raising instead
if len(data_hist) > 0:
hist_range = np.nanpercentile(
data[hist], [self.panels[panel].lowerRange, self.panels[panel].upperRange]
)
panel_range[0] = np.nanmin([panel_range[0], hist_range[0]])
panel_range[1] = np.nanmax([panel_range[1], hist_range[1]])
return panel_range

def _calcStats(self, data):
Expand Down
51 changes: 29 additions & 22 deletions python/lsst/analysis/tools/actions/plot/scatterplotWithTwoHists.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,21 +462,25 @@ def _scatterPlot(
xs = np.array(xs)
ys = np.array(ys)
sigMadYs = nansigmaMad(ys)
if len(xs) < 2:
(medLine,) = ax.plot(
xs, np.nanmedian(ys), color, label=f"Median: {np.nanmedian(ys):.2g}", lw=0.8
)
# plot lone median point if there's not enough data to measure more
n_xs = len(xs)
if n_xs == 0:
continue
elif n_xs < 10:
xs = [np.nanmedian(xs)]
sigMads = np.array([nansigmaMad(ys)])
ys = [np.nanmedian(ys)]
(medLine,) = ax.plot(xs, ys, color, label=f"Median: {ys[0]:.2g}", lw=0.8)
linesForLegend.append(medLine)
sigMads = np.array([nansigmaMad(ys)] * len(xs))
(sigMadLine,) = ax.plot(
xs,
np.nanmedian(ys) + 1.0 * sigMads,
ys + 1.0 * sigMads,
color,
alpha=0.8,
lw=0.8,
label=r"$\sigma_{MAD}$: " + f"{sigMads[0]:.2g}",
)
ax.plot(xs, np.nanmedian(ys) - 1.0 * sigMads, color, alpha=0.8)
ax.plot(xs, ys - 1.0 * sigMads, color, alpha=0.8)
linesForLegend.append(sigMadLine)
histIm = None
continue
Expand Down Expand Up @@ -669,16 +673,19 @@ def _scatterPlot(

# Set the scatter plot limits
# TODO: Make this not work by accident
if len(cast(Vector, data["yStars"])) > 0:
if "yStars" in data and (len(cast(Vector, data["yStars"])) > 0):
plotMed = np.nanmedian(cast(Vector, data["yStars"]))
else:
elif "yGalaxies" in data and (len(cast(Vector, data["yGalaxies"])) > 0):
plotMed = np.nanmedian(cast(Vector, data["yGalaxies"]))
else:
plotMed = np.nan

# Ignore types below pending making this not working my accident
if len(xs) < 2: # type: ignore
meds = [np.nanmedian(ys)] # type: ignore
if self.yLims:
ax.set_ylim(self.yLims[0], self.yLims[1]) # type: ignore
else:
elif np.isfinite(plotMed):
numSig = 4
yLimMin = plotMed - numSig * sigMadYs # type: ignore
yLimMax = plotMed + numSig * sigMadYs # type: ignore
Expand Down Expand Up @@ -770,16 +777,16 @@ def _makeSideHistogram(
) -> None:
sideHist = figure.add_subplot(gs[1:, -1], sharey=ax)

totalY: list[Vector] = []
if "stars" in self.plotTypes: # type: ignore
totalY.append(cast(Vector, data["yStars"]))
if "galaxies" in self.plotTypes: # type: ignore
totalY.append(cast(Vector, data["yGalaxies"]))
if "unknown" in self.plotTypes: # type: ignore
totalY.append(cast(Vector, data["yUknown"]))
if "any" in self.plotTypes: # type: ignore
totalY.append(cast(Vector, data["y"]))
totalYChained = [y for y in chain.from_iterable(totalY) if y == y]
totalY: dict[str, Vector] = {}
if "stars" in self.plotTypes and "yStars" in data: # type: ignore
totalY["stars"] = cast(Vector, data["yStars"])
if "galaxies" in self.plotTypes and "yGalaxies" in data: # type: ignore
totalY["galaxies"] = cast(Vector, data["yGalaxies"])
if "unknown" in self.plotTypes and "yUnknown" in data: # type: ignore
totalY["unknown"] = cast(Vector, data["yUnknown"])
if "any" in self.plotTypes and "y" in data: # type: ignore
totalY["y"] = cast(Vector, data["y"])
totalYChained = [y for y in chain.from_iterable(totalY.values()) if y == y]

# cheat to get the total count while iterating once
yLimMin, yLimMax = ax.get_ylim()
Expand All @@ -792,7 +799,7 @@ def _makeSideHistogram(
orientation="horizontal",
log=True,
)
if "galaxies" in self.plotTypes: # type: ignore
if "galaxies" in totalY: # type: ignore
sideHist.hist(
[g for g in cast(Vector, data["yGalaxies"]) if g == g],
bins=bins,
Expand Down Expand Up @@ -820,7 +827,7 @@ def _makeSideHistogram(
ls=":",
)

if "stars" in self.plotTypes: # type: ignore
if "stars" in totalY: # type: ignore
sideHist.hist(
[s for s in cast(Vector, data["yStars"]) if s == s],
bins=bins,
Expand Down
11 changes: 11 additions & 0 deletions python/lsst/analysis/tools/actions/plot/skyPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,17 @@ def makePlot(
)

for i, (xs, ys, colorVals, cmap, label) in enumerate(toPlotList):
finite = np.isfinite(xs) & np.isfinite(ys)
xs = xs[finite]
ys = ys[finite]
n_xs = len(xs)
# colorVal column is unusable so zero it out
# This should be obvious on the plot
if not any(np.isfinite(colorVals)):
colorVals[:] = 0

if n_xs < 5:
continue
if not self.plotOutlines or "tract" not in sumStats.keys():
minRa = np.min(xs)
maxRa = np.max(xs)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/analysis/tools/atools/numericalValidity.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def coaddContext(self) -> None:
# Need to pass a mapping of new names so the default names get the
# band prepended. Otherwise, each subsequent band's metric will
# overwrite the current one.
self.produce.newNames = { # type: ignore
self.produce.metric.newNames = { # type: ignore
"validFracColumn": "{band}_validFracColumn",
"nanFracColumn": "{band}_nanFracColumn",
}
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/analysis/tools/tasks/catalogMatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class CatalogMatchConnections(

refCat = pipeBase.connectionTypes.PrerequisiteInput(
doc="The reference catalog to match to loaded input catalog sources.",
name="gaia_dr2_20200414",
name="{refCatalog}",
storageClass="SimpleCatalog",
dimensions=("skypix",),
deferLoad=True,
Expand Down Expand Up @@ -326,7 +326,7 @@ class CatalogMatchVisitConnections(

refCat = pipeBase.connectionTypes.PrerequisiteInput(
doc="The astrometry reference catalog to match to loaded input catalog sources.",
name="gaia_dr2_20200414",
name="{refCatalog}",
storageClass="SimpleCatalog",
dimensions=("skypix",),
deferLoad=True,
Expand Down
8 changes: 6 additions & 2 deletions python/lsst/analysis/tools/tasks/refCatObjectAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,15 @@
class RefCatObjectAnalysisConnections(
AnalysisBaseConnections,
dimensions=("skymap", "tract"),
defaultTemplates={"outputName": "objectTable_tract_gaia_dr2_20200414_match"},
defaultTemplates={
"targetCatalog": "objectTable_tract",
"refCatalog": "gaia_dr2_20200414",
"outputName": "{targetCatalog}_{refCatalog}_match",
},
):
data = ct.Input(
doc="Tract based object table to load from the butler",
name="objectTable_tract_gaia_dr2_20200414_match",
name="{targetCatalog}_{refCatalog}_match",
storageClass="DataFrame",
deferLoad=True,
dimensions=("skymap", "tract"),
Expand Down