# Fake Injection Purity and Efficiency analysis

DM-46624

Michael Wood-Vasey, <wmwv@pitt.edu>

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from lsst.daf.butler import Butler
import lsst.afw.display as afwDisplay

import lsst.geom

import lsst.afw.image
from lsst.afw.math import Warper, WarperConfig
import lsst.afw.table

from lsst.ip.diffim import AlardLuptonSubtractConfig, AlardLuptonSubtractTask
from lsst.ip.diffim import GetTemplateConfig, GetTemplateTask
from lsst.ip.diffim import DetectAndMeasureConfig, DetectAndMeasureTask

In [None]:
afwDisplay.setDefaultBackend("firefly")

In [None]:
# Load collection
repo = "/repo/dc2"
collection = "u/ebellm/DM-46624/w_2024_40/DC2-with-injection"
instrument = "LSSTComCam"

butler = Butler(repo, collections=collection)

In [None]:
band = "i"

datasetRefs_dia = butler.query_datasets("goodSeeingDiff_differenceExp", where=f"band='{band}'")

In [None]:
verbose = False
if verbose:
    for dr in datasetRefs_dia:
        print(dr)

print(f"\nFound {len(datasetRefs_dia)} differenceExps")

In [None]:
i = 50
data_id = datasetRefs_dia[i].dataId
print(data_id)

The standard data products are:

In [None]:
dia = butler.get("goodSeeingDiff_differenceExp", dataId=data_id)
template = butler.get("goodSeeingDiff_templateExp", dataId=data_id)
calexp = butler.get("calexp", dataId=data_id)
src = butler.get("src", dataId=data_id)
dia_src = butler.get("goodSeeingDiff_diaSrc", dataId=data_id)
dia_src_table = butler.get("goodSeeingDiff_diaSrcTable", dataId=data_id)  # SDM-ified table.  Does not containg sky sources

In [None]:
injected_dia = butler.get("fakes_goodSeeingDiff_differenceExp", dataId=data_id)
injected_template = butler.get("fakes_goodSeeingDiff_templateExp", dataId=data_id)
injected_calexp = butler.get("fakes_initial_pvi", dataId=data_id)
injected_dia_src = butler.get("fakes_goodSeeingDiff_diaSrc", dataId=data_id)
injected_dia_src_table = butler.get("fakes_goodSeeingDiff_diaSrcTable", dataId=data_id)  # SDM-ified table.  Does not containg sky sources
# Pre-matched tables
injected_match_dia_src_table = butler.get("fakes_goodSeeingDiff_matchDiaSourceTable", dataId=data_id)
injected_match_dia_src = butler.get("fakes_goodSeeingDiff_matchDiaSrc", dataId=data_id)

And the injected data products are:

In [None]:
# This is the catalog of the injected fakes in RA, Dec
injected_fakes = butler.get("fakes_initial_pvi_catalog", dataId=data_id)

In [None]:
afw_display = afwDisplay.Display(frame=1)

afw_display.setMaskTransparency(80)
afw_display.scale("asinh", -2, 5)
afw_display.mtv(template)

In [None]:
afw_display = afwDisplay.Display(frame=2)

afw_display.setMaskTransparency(80)
afw_display.scale("asinh", -2, 5)
afw_display.mtv(injected_calexp)

In [None]:
afw_display = afwDisplay.Display(frame=3)

afw_display.setMaskTransparency(100)
afw_display.scale("linear", "zscale")
afw_display.mtv(injected_dia)

In [None]:
snr_threshold = 5
max_science_snr = 200

# We're mixing columns from dia_src and dia_src_table
# This is dangerous.  I don't think there is a gaurantee that the ordering is the same.
# But I want the flags from dia_src and the science flux from dia_src_table

# 2024-12-04
#  There's some incompatibility that sometimes means that sky sources are in dia_src and sometimes they're not?

def good_sources(dia_src, dia_src_table, verbose=True):
    dia_src = dia_src[~dia_src["sky_source"]]
    
    good = ~dia_src["slot_Shape_flag"] & \
        (dia_src["base_PsfFlux_instFlux"] / dia_src["base_PsfFlux_instFluxErr"] > snr_threshold) & \
        ~dia_src["base_PixelFlags_flag_edge"] & \
        ((dia_src_table["scienceFlux"] / dia_src_table["scienceFluxErr"]) < max_science_snr) & \
        ~dia_src_table["pixelFlags_streak"]
    
    # If I were doing just dia_src_table, I'd do something like this:
    just_dia_src_table = False
    if just_dia_src_table:
        good = (dia_src_table["snr"] > snr_threshold) & \
            ~dia_src_table["shape_flag"] & \
            ~dia_src_table["pixelFlags_bad"] & \
            ~dia_src_table["pixelFlags_cr"] & \
            ((dia_src_table["scienceFlux"] / dia_src_table["scienceFluxErr"]) < max_science_snr)
    
    good_dia_src = dia_src_table[good].copy(deep=True)
    
    print(f"{len(good_dia_src)} good DIA sources found out of {len(dia_src)} detections.")
    
    return good_dia_src

In [None]:
good_dia_src = good_sources(dia_src, dia_src_table)

In [None]:
good_injected_dia_src = good_sources(injected_dia_src, injected_dia_src_table)

In [None]:
afw_display = afwDisplay.Display(frame=3)

for (x, y) in zip(good_injected_dia_src["x"], good_injected_dia_src["y"]):
    afw_display.dot("o", x, y, size=20, ctype="green")

In [None]:
import re
[c for c in injected_match_dia_src if re.search("Flux", c)]

## Compare input fakes to dia_src

Translate RA, Dec to x, y from input injection catalog

In [None]:
wcs = calexp.getWcs()
from lsst import geom
coord = [geom.SpherePoint(r, d, geom.degrees) for r, d in zip(injected_fakes["ra"], injected_fakes["dec"])]
# The injected catalogs are in degrees.
x, y = wcs.skyToPixelArray(injected_fakes["ra"], injected_fakes["dec"], degrees=True)
injected_fakes["x"] = x
injected_fakes["y"] = y

In [None]:
plt.scatter(injected_fakes["x"], injected_fakes["y"], edgecolor="gray", facecolor="none", label="injected with margin")
matched = injected_match_dia_src["diaSourceId"] > 0
plt.scatter(injected_match_dia_src["x_ssi"], injected_match_dia_src["y_ssi"], label="injected")
plt.scatter(injected_dia_src_table["x"], injected_dia_src_table["y"], color="green", facecolor="none", marker="s", label="dia_src")
plt.scatter(injected_match_dia_src[matched]["x_ssi"], injected_match_dia_src[matched]["y_ssi"], marker='.', color='red', label="matched")

plt.axvline(0, linestyle="--")
plt.axvline(4000, linestyle="--")
plt.axhline(0, linestyle="--")
plt.axhline(4000, linestyle="--")

plt.gca().set_aspect("equal")
plt.legend()

In [None]:
_, ax = plt.subplots(1, 2, figsize=(8, 4))

matched = injected_match_dia_src["diaSourceId"] > 0
bins = np.linspace(18, 24, 25)

h, _, _ = ax[0].hist(injected_match_dia_src["mag"], label="injected", histtype="step", color="blue", bins=bins)
hm, _, _ = ax[0].hist(injected_match_dia_src[matched]["mag"], label="good", histtype="step", color="orange", bins=bins)
ax[0].set_xlabel("injected mag")
ax[0].set_ylabel("number of sources / bin")
plt.legend()

ax[1].scatter((bins[:len(hm)] + bins[1:])/2, hm/h, color="orange", label="injected")
ax[1].stairs(hm / h, bins, color="orange", label="matched")
ax[1].set_xlabel("injected mag")
ax[1].set_ylabel("fraction of recovered sources")


In [None]:
import numpy as np
calib = injected_dia.getPhotoCalib()
recovered_mag = np.array([calib.instFluxToMagnitude(f) for f in injected_match_dia_src["psfFlux"]])
injected_flux = np.array([calib.magnitudeToInstFlux(m) for m in injected_match_dia_src["mag"]])
flux_residual = injected_match_dia_src["psfFlux"] - injected_flux
plt.errorbar(injected_match_dia_src["mag"], flux_residual, injected_match_dia_src["psfFluxErr"],
             linestyle="none", marker=".", ecolor="gray")
plt.axhline(0, linestyle="--", color="gray")
plt.xlabel("Injected mag")
plt.ylabel("Measured - Injected")
sigma = np.std(flux_residual / injected_match_dia_src["psfFluxErr"])
print(f"Pull distribution sigma: {sigma}")


In [None]:
good_injected_match_dia_src = injected_match_dia_src.join(good_injected_dia_src[["diaSourceId"]].set_index("diaSourceId"), how="inner", on="diaSourceId", rsuffix=("dia_src")) #, validate="one_to_one")
print(len(good_injected_dia_src))

In [None]:
_, ax = plt.subplots(1, 2, figsize=(8, 4))

matched = injected_match_dia_src["diaSourceId"] > 0
bins = np.linspace(18, 24, 25)

h, _, _ = ax[0].hist(injected_match_dia_src["mag"], label="injected", histtype="step", color="blue", bins=bins)
hm, _, _ = ax[0].hist(injected_match_dia_src[matched]["mag"], label="matched", histtype="step", color="orange", bins=bins)
hg, _, _ = ax[0].hist(good_injected_match_dia_src["mag"], linestyle="--", label="good matched", histtype="step", color="red", bins=bins)
ax[0].set_xlabel("injected mag")
ax[0].set_ylabel("number of sources / bin")
plt.legend()

ax[1].scatter((bins[:len(hm)] + bins[1:])/2, hm/h, color="orange", label="matched")
ax[1].stairs(hm / h, bins, color="orange", label="matched")

ax[1].scatter((bins[:len(hg)] + bins[1:])/2, hg/h, linestyle="--", color="red", label="good matched")
ax[1].stairs(hg / h, bins, linestyle="--", color="red", label="matched")

ax[1].set_xlabel("injected mag")
ax[1].set_ylabel("fraction of recovered sources")


In [None]:
[c for c in injected_match_dia_src.columns]

In [None]:
s = set(good_injected_dia_src["diaSourceId"])
t = set(injected_match_dia_src["diaSourceId"])

In [None]:
d = s - t
print(d)

In [None]:
unmatched_dia = good_injected_dia_src.set_index("diaSourceId").loc[list(d)]


In [None]:
afw_display = afwDisplay.Display(frame=3)

for (x, y) in zip(unmatched_dia["x"], unmatched_dia["y"]):
    afw_display.dot("o", x, y, size=20, ctype="orange")

In [None]:
afw_display = afwDisplay.Display(frame=2)
afw_display.scale("linear", "zscale")
