# ComCamSim 2024 DIA Sprint
Authors: Michael Wood-Vasey <wmwv@pitt.edu>  
Last Verified to Run: 2024-05-14  

DIA Sprint.  This Notebook is to explore and figure out how to calculate the Metrics and visualizations we want to look at dipole orientation across an image and check against other things of interest.  Some key new quantities calculated are

"ip_diffim_DipoleFit_separation"  
"ip_diffim_DipoleFit_orientation"

Let's see what we can learn about the orientation on the sky.

1. [x] Count dipoles and plot on image
3. [ ] Look at pixel stamps of dipoles
4. [ ] Spatially plot dipole orientation
5. [ ] Plot w.r.t. Parallactic Angle.

Run at RSP on USDF.  Run using `d_2024_05_15` on a Large container (because we need the memory).

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
from lsst.daf.butler import Butler
import lsst.daf.butler as dafButler
from lsst.daf.butler import DatasetNotFoundError
from lsst.pipe.base import Instrument
from lsst.ap.association.transformDiaSourceCatalog import UnpackApdbFlags
import lsst.afw.display as afwDisplay
import lsst.display.astrowidgets
from astropy.visualization import ZScaleInterval, SqrtStretch, ImageNormalize, MinMaxInterval, LogStretch
from astropy.table import Table, join

afwDisplay.setDefaultBackend('matplotlib')

Key orientation information: repo, collection, dataset types.

In [None]:
repo = "/repo/embargo"
collection = "u/elhoward/DM-44138/LSSTComCamSim"
collections = [collection, "LSSTComCamSim/templates", "LSSTComCamSim/defaults"]
instrument = "LSSTComCamSim"

Dataset Types we'll be interested in  
`postISRCCD`  (I don't think there's a `calexp` in prompt processing)  
`goodSeeingDiff_differenceExp`  
`goodSeeingDiff_diaSrc`  (or `goodSeeingDiff_diaSrcTable` depending on the schema you want.)

In [None]:
butler = Butler(repo, collections=collections, instrument=instrument)

In [None]:
registry = butler.registry
for ref in registry.queryDatasets('goodSeeingDiff_differenceExp'):
    print(ref.dataId)

In [None]:
data_id = {"detector": 0, "exposure": 7024040400021}
# An alternate data_id to look at
# data_id = ["detector": 5, "exposure": 7024040300440}
data_id["visit"] = data_id["exposure"]

In [None]:
science = butler.get("initial_pvi", dataId=data_id)
diff = butler.get("goodSeeingDiff_differenceExp", dataId=data_id)
template = butler.get("goodSeeingDiff_templateExp", dataId=data_id)

In [None]:
# Catalog from raw pipeline output is a `lsst.afw.table.SourceCatalog` and includes 100 blank "sky" sources
full_dia_cat = butler.get("goodSeeingDiff_diaSrc", dataId=data_id )

In [None]:
fit_but_not_dipole = full_dia_cat[full_dia_cat["ip_diffim_DipoleFit_flag"] & ~full_dia_cat["ip_diffim_DipoleFit_flag_classification"] & ~full_dia_cat["sky_source"]].copy(deep=True)

In [None]:
import re
ip_diffim_cols = [c for c in full_dia_cat.schema.getNames() if re.match("ip_diffim", c)]

In [None]:
for c in ip_diffim_cols:
    print(c, fit_but_not_dipole[c])

In [None]:
src = butler.get("src", dataId=data_id)

In [None]:
plt.scatter(dipoles["ip_diffim_DipoleFit_pos_instFlux"], dipoles["ip_diffim_DipoleFit_separation"])

In [None]:
dia_sources = full_dia_cat[full_dia_cat["sky_source"] == False]
sky_sources = full_dia_cat[full_dia_cat["sky_source"] == True]

# Deep copy to get a new contiguous catalog
dipoles = dia_sources[dia_sources["ip_diffim_DipoleFit_flag_classification"]].copy(deep=True)

In [None]:
dir(diff.info)

In [None]:
full_dia_cat.schema.getNames()

In [None]:
print(f"{len(filtered_transformed_cat)} total detections.")
print(f"{len(dipoles)} detections classified as dipoles.")

In [None]:
psf = science.getPsf()
middle_x, middle_y = 2000, 2000
position = lsst.geom.Point2D(middle_x, middle_y)
sigma = psf.computeShape(position).getDeterminantRadius()
print(2 * sigma)

The `ip_diffim_Dipolefit_separation` is fit to the science and template and not the DIA image.  So we expect separations to be on the order of hundredths of a pixel, not the 2*sigma that the poles are separated by in the difference image.

In [None]:
large_separation_threshold = 0.8  # pixels
unreasonable_separation_threshold = 10  # pixels
reasonable_dipoles = dipoles[dipoles["ip_diffim_DipoleFit_separation"] <= large_separation_threshold].copy(deep=True)
large_dipoles  = dipoles[(dipoles["ip_diffim_DipoleFit_separation"] > large_separation_threshold) 
                      & (dipoles["ip_diffim_DipoleFit_separation"] < unreasonable_separation_threshold)].copy(deep=True)
unbelievably_wide_dipoles = dipoles[dipoles["ip_diffim_DipoleFit_separation"] > unreasonable_separation_threshold].copy(deep=True)

In [None]:
full_dia_cat["ip_diffim_DipoleFit_separation"]

In [None]:
bins=np.logspace(-2, 2)
plt.hist(dipoles["ip_diffim_DipoleFit_separation"], bins=bins, histtype="step", linewidth=2)
plt.xlabel("Dipole +/- separation [pixels]")
plt.xscale("log")
plt.axvline(large_separation_threshold, color="gray", linestyle="--")
plt.axvline(unreasonable_separation_threshold, color="orange", linestyle="--")

In [None]:
# Plot direction of arrows on sky
plt.quiver(dipoles["base_SdssCentroid_x"],
           dipoles["base_SdssCentroid_y"],
           dipoles["ip_diffim_DipoleFit_separation"],
           dipoles["ip_diffim_DipoleFit_separation"],
           angles=dipoles["ip_diffim_DipoleFit_orientation"])

Anything more than 10 pixels is spurious.  Let's take a look at them.

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(18, 8))

plt.sca(axes[0])
display_template = afwDisplay.Display(frame=fig)
display_template.scale("linear", "zscale")
display_template.image(template)

# Plot direction of arrows on sky
plt.quiver(reasonable_dipoles["base_SdssCentroid_x"],
           reasonable_dipoles["base_SdssCentroid_y"],
           reasonable_dipoles["ip_diffim_DipoleFit_separation"],
           reasonable_dipoles["ip_diffim_DipoleFit_separation"],
           angles=reasonable_dipoles["ip_diffim_DipoleFit_orientation"],
          color="orange")

plt.sca(axes[1])
display_mask = afwDisplay.Display(frame=fig)
display_mask.image(template.mask)
# Plot direction of arrows on sky
plt.quiver(reasonable_dipoles["base_SdssCentroid_x"],
           reasonable_dipoles["base_SdssCentroid_y"],
           reasonable_dipoles["ip_diffim_DipoleFit_separation"],
           reasonable_dipoles["ip_diffim_DipoleFit_separation"],
           angles=reasonable_dipoles["ip_diffim_DipoleFit_orientation"],
          color="orange")
plt.quiver(large_dipoles["base_SdssCentroid_x"],
           large_dipoles["base_SdssCentroid_y"],
           large_dipoles["ip_diffim_DipoleFit_separation"],
           large_dipoles["ip_diffim_DipoleFit_separation"],
           angles=large_dipoles["ip_diffim_DipoleFit_orientation"],
          color="red")

In [None]:
# Plot direction of arrows on sky
plt.quiver(dipoles["base_SdssCentroid_x"],
           dipoles["base_SdssCentroid_y"],
           dipoles["ip_diffim_DipoleFit_separation"],
           dipoles["ip_diffim_DipoleFit_separation"],
           angles=dipoles["ip_diffim_DipoleFit_orientation"])

In [None]:
unbelievably_wide_dipoles["base_PixelFlags_flag"]

In [None]:
x = unbelievably_wide_dipoles["slot_Centroid_x"]
y = unbelievably_wide_dipoles["slot_Centroid_y"]
print(x, y)

In [None]:
i = 0
# x, y flipped for imshow
y = int(unbelievably_wide_dipoles["slot_Centroid_x"][i])
x = int(unbelievably_wide_dipoles["slot_Centroid_y"][i])
size = 50  # pixels
small_diff = diff.image.array[x-size:x+size,y-size:y+size]
small_science = science.image.array[x-size:x+size,y-size:y+size]
small_template = template.image.array[x-size:x+size,y-size:y+size]

fig, axes = plt.subplots(1, 3, figsize=(16, 6))
plt.sca(axes[0])
display_diff = afwDisplay.Display(frame=fig)
display_diff.scale("linear", "zscale")
display_diff.mtv(diff)

plt.sca(axes[1])
display_science = afwDisplay.Display(frame=fig)
display_diff.scale("linear", "zscale")
display_diff.mtv(science)

plt.sca(axes[2])
display_template = afwDisplay.Display(frame=fig)
display_template.scale("linear", "zscale")
display_template.mtv(template)


plt.tight_layout()
plt.show()


In [None]:
SHOW_SCHEMA = False
if SHOW_SCHEMA:
    print(type(dia_sources))
    print(dia_sources.schema)

In [None]:
STAMP_SIZE = 100
def postage_stamp_from_image(image, x, y, stamp_size=STAMP_SIZE):
    """
    Return postage stamp of stamp_size x stamp_size around given x, y

    x, y can be fractional, but the image will be in original pixels.
    """

    import lsst.geom as geom
    # Naive version that is wrong because it doesn't account for x, y orientation
    center = geom.Point2D(x, y)
    size = geom.Extent2I(stamp_size, stamp_size)
    foo = image.getCutout(center, size)

    return foo

In [None]:
def show_postage_stamps(*args, figsize=(12, 6)):
    n = len(args)
    fig, axes = plt.subplots(1, n, figsize=figsize)
    displays = []
    for i, image in enumerate(args):
        plt.sca(axes[i])
        display = afwDisplay.Display(frame=fig)
        display.setMaskTransparency(80)
        display.scale("linear", "zscale")
        display.mtv(image)

        displays.append(display)

    plt.tight_layout()
    return fig

In [None]:
x = unbelievably_wide_dipoles["slot_Centroid_x"]
y = unbelievably_wide_dipoles["slot_Centroid_y"]
for xi, yi in zip(x, y):
    # Show image name?
    d = postage_stamp_from_image(diff, xi, yi)
    s = postage_stamp_from_image(science, xi, yi)
    t = postage_stamp_from_image(template, xi, yi)
    show_postage_stamps(d, s, t)


In [None]:
print(data_id)

In [None]:
display_diff.getMaskPlaneColor()

In [None]:
x = reasonable_dipoles["slot_Centroid_x"]
y = reasonable_dipoles["slot_Centroid_y"]
for xi, yi in zip(x[:10], y[:10]):
    # Show image name?
    d = postage_stamp_from_image(diff, xi, yi)
    s = postage_stamp_from_image(science, xi, yi)
    t = postage_stamp_from_image(template, xi, yi)
    show_postage_stamps(d, s, t)


In [None]:
x = large_dipoles["slot_Centroid_x"]
y = large_dipoles["slot_Centroid_y"]
for xi, yi in zip(x[:10], y[:10]):
    # Show image name?
    d = postage_stamp_from_image(diff, xi, yi)
    s = postage_stamp_from_image(science, xi, yi)
    t = postage_stamp_from_image(template, xi, yi)
    show_postage_stamps(d, s, t)


In [None]:
PIXEL_FLAG_LIST = [
    'base_PixelFlags_flag_offimage', 'base_PixelFlags_flag_edge', 'base_PixelFlags_flag_interpolated',
    'base_PixelFlags_flag_saturated', 'base_PixelFlags_flag_cr', 'base_PixelFlags_flag_bad',
    'base_PixelFlags_flag_suspect', 'base_PixelFlags_flag_interpolatedCenter', 'base_PixelFlags_flag_saturatedCenter',
    'base_PixelFlags_flag_crCenter', 'base_PixelFlags_flag_suspectCenter'
]
SHAPE_FLAG_LIST = ["slot_Shape_flag"]
EDGE_FLAG_LIST = ["base_SdssCentroid_flag_edge"]
SKY_SOURCE = ["sky_source"]

FLAG_LIST = PIXEL_FLAG_LIST + SHAPE_FLAG_LIST + EDGE_FLAG_LIST + SKY_SOURCE

SNR_THRESHOLD = 5.5

def get_good_sources_idx(df, flag_list=FLAG_LIST, snr_threshold=SNR_THRESHOLD):
    bad = np.array(np.zeros_like(df), dtype=bool)
    for flag in flag_list:
        bad |= df[flag]

    snr = df["slot_ApFlux_instFlux"] / df["slot_ApFlux_instFluxErr"]
    # This is a diff so take things both above the positive SNR cut and below the negative SNR cut
    good_snr = (snr < -snr_threshold) | (snr > snr_threshold)
        
    good = ~bad
    good &= good_snr
    return good

In [None]:
good_sources_idx = get_good_sources_idx(dia_sources)
good_sources = dia_sources[good_sources_idx]

In [None]:
len(good_sources)

In [None]:
x = good_sources["slot_Centroid_x"]
y = good_sources["slot_Centroid_y"]
for xi, yi in zip(x[:10], y[:10]):
    # Show image name?
    d = postage_stamp_from_image(diff, xi, yi)
    s = postage_stamp_from_image(science, xi, yi)
    t = postage_stamp_from_image(template, xi, yi)
    show_postage_stamps(d, s, t)
