# Find outbursts in ComCam data

In [1]:
# %pip install -U lsdb astropy light-curve

In [2]:
from pathlib import Path

from lsdb import read_hats
from nested_pandas import NestedDtype

In [3]:
BAND = "y"

In [4]:
gaia = read_hats(
    'https://data.lsdb.io/hats/gaia_dr3/gaia',
    margin_cache='https://data.lsdb.io/hats/gaia_dr3/gaia_10arcs',
    columns=["source_id", "ra", "dec", "phot_g_mean_mag", "phot_bp_mean_mag", "phot_rp_mean_mag"],
).map_partitions(
    # Convert to AB mags, table 3 of https://www.aanda.org/articles/aa/pdf/2021/05/aa39587-20.pdf
    lambda df: df.assign(
        g_mag=df.phot_g_mean_mag + 25.8010 - 25.6874,
        bp_mag=df.phot_bp_mean_mag + 25.1040 - 24.7479,
        rp_mag=df.phot_rp_mean_mag + 25.3540 - 25.3385,
    ).drop(
        columns=["phot_g_mean_mag", "phot_bp_mean_mag", "phot_rp_mean_mag"],
    ),
)

In [5]:
release = 'v29_0_0_rc5'
hats_path = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats") / release
# list dir
print(list(map(str, hats_path.iterdir())))

dia_object_lc_path = hats_path / "dia_object_lc"
object_lc_path = hats_path / "object_lc"

BRIGHTEST_BAND_MAG = 19.5

obj = read_hats(
    object_lc_path,
    columns=["objectId", "coord_ra", "coord_dec", f"{BAND}_psfMag"],
    filters=[(f"{BAND}_psfMag", ">", BRIGHTEST_BAND_MAG)],
)

id_column = "diaObjectId"
lc_column = "diaObjectForcedSource"
coord_columns = ["ra", "dec"]

obj_lc = read_hats(
    dia_object_lc_path,
    columns=[id_column, lc_column] + coord_columns,
).map_partitions(
    lambda df: df.assign(
        lc=df[lc_column].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes[lc_column])
        ),
    ).drop(
        columns=[lc_column],
    ).rename(columns={id_column: "id"}),
).crossmatch(  # Select faint objects only
    obj,
    radius_arcsec=3.0,
    n_neighbors=1,
    suffixes=("", "_obj"),
)
obj_lc

['/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/object', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object_lc_index', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/object_lc_index', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object_lc_x_ps1', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/object_lc_x_ps1', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object_lc_5arcs', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/object_lc_5arcs', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object_lc', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/object_forced_source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0_rc5/dia_object_force



Unnamed: 0_level_0,id,ra,dec,lc,objectId_obj,coord_ra_obj,coord_dec_obj,y_psfMag_obj,_dist_arcsec
npartitions=45,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Order: 5, Pixel: 32",int64[pyarrow],double[pyarrow],double[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou...",int64[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow]
"Order: 6, Pixel: 136",...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 2247",...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 562",...,...,...,...,...,...,...,...,...


In [None]:
obj_x_gaia = obj_lc.crossmatch_nested(
    gaia,
    radius_arcsec=10,
    n_neighbors=1,
).map_partitions(
    lambda df: df.rename(columns={"_dist_arcsec": "dist_gaia"}),
)
obj_x_gaia

TypeError: KdTreeCrossmatch.validate() got an unexpected keyword argument 'name'

In [None]:
obj_no_gaia = obj_x_gaia.query("name.is_na()")

## Start Dask client

In [26]:
from dask.distributed import Client

# Start with a small client
client = Client(n_workers=24, memory_limit="16GB", threads_per_worker=1)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 27111 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:27111/status,

0,1
Dashboard: http://127.0.0.1:27111/status,Workers: 24
Total threads: 24,Total memory: 357.63 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:15847,Workers: 24
Dashboard: http://127.0.0.1:27111/status,Total threads: 24
Started: Just now,Total memory: 357.63 GiB

0,1
Comm: tcp://127.0.0.1:23799,Total threads: 1
Dashboard: http://127.0.0.1:21269/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:9973,
Local directory: /lscratch/kostya/dask-scratch-space/worker-ymgqiwqc,Local directory: /lscratch/kostya/dask-scratch-space/worker-ymgqiwqc

0,1
Comm: tcp://127.0.0.1:11539,Total threads: 1
Dashboard: http://127.0.0.1:23787/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:1385,
Local directory: /lscratch/kostya/dask-scratch-space/worker-8y_iali4,Local directory: /lscratch/kostya/dask-scratch-space/worker-8y_iali4

0,1
Comm: tcp://127.0.0.1:3775,Total threads: 1
Dashboard: http://127.0.0.1:5473/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:8529,
Local directory: /lscratch/kostya/dask-scratch-space/worker-w4r7if9_,Local directory: /lscratch/kostya/dask-scratch-space/worker-w4r7if9_

0,1
Comm: tcp://127.0.0.1:22039,Total threads: 1
Dashboard: http://127.0.0.1:11083/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:3219,
Local directory: /lscratch/kostya/dask-scratch-space/worker-onfu33qg,Local directory: /lscratch/kostya/dask-scratch-space/worker-onfu33qg

0,1
Comm: tcp://127.0.0.1:23083,Total threads: 1
Dashboard: http://127.0.0.1:27045/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:27101,
Local directory: /lscratch/kostya/dask-scratch-space/worker-rddgjn6g,Local directory: /lscratch/kostya/dask-scratch-space/worker-rddgjn6g

0,1
Comm: tcp://127.0.0.1:30345,Total threads: 1
Dashboard: http://127.0.0.1:29595/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:32899,
Local directory: /lscratch/kostya/dask-scratch-space/worker-oap83h3d,Local directory: /lscratch/kostya/dask-scratch-space/worker-oap83h3d

0,1
Comm: tcp://127.0.0.1:17625,Total threads: 1
Dashboard: http://127.0.0.1:26885/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:31997,
Local directory: /lscratch/kostya/dask-scratch-space/worker-mqz623uy,Local directory: /lscratch/kostya/dask-scratch-space/worker-mqz623uy

0,1
Comm: tcp://127.0.0.1:7863,Total threads: 1
Dashboard: http://127.0.0.1:28261/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:25289,
Local directory: /lscratch/kostya/dask-scratch-space/worker-izh03tyc,Local directory: /lscratch/kostya/dask-scratch-space/worker-izh03tyc

0,1
Comm: tcp://127.0.0.1:21651,Total threads: 1
Dashboard: http://127.0.0.1:28561/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:4351,
Local directory: /lscratch/kostya/dask-scratch-space/worker-qk6cd7r8,Local directory: /lscratch/kostya/dask-scratch-space/worker-qk6cd7r8

0,1
Comm: tcp://127.0.0.1:7751,Total threads: 1
Dashboard: http://127.0.0.1:29075/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:32709,
Local directory: /lscratch/kostya/dask-scratch-space/worker-gp5v34o9,Local directory: /lscratch/kostya/dask-scratch-space/worker-gp5v34o9

0,1
Comm: tcp://127.0.0.1:13045,Total threads: 1
Dashboard: http://127.0.0.1:24263/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:27525,
Local directory: /lscratch/kostya/dask-scratch-space/worker-rrrkubky,Local directory: /lscratch/kostya/dask-scratch-space/worker-rrrkubky

0,1
Comm: tcp://127.0.0.1:2901,Total threads: 1
Dashboard: http://127.0.0.1:31571/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:10753,
Local directory: /lscratch/kostya/dask-scratch-space/worker-eooprgwh,Local directory: /lscratch/kostya/dask-scratch-space/worker-eooprgwh

0,1
Comm: tcp://127.0.0.1:21333,Total threads: 1
Dashboard: http://127.0.0.1:8315/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:27589,
Local directory: /lscratch/kostya/dask-scratch-space/worker-k4u_55sf,Local directory: /lscratch/kostya/dask-scratch-space/worker-k4u_55sf

0,1
Comm: tcp://127.0.0.1:3725,Total threads: 1
Dashboard: http://127.0.0.1:28819/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:13149,
Local directory: /lscratch/kostya/dask-scratch-space/worker-cgcd75j1,Local directory: /lscratch/kostya/dask-scratch-space/worker-cgcd75j1

0,1
Comm: tcp://127.0.0.1:5747,Total threads: 1
Dashboard: http://127.0.0.1:10497/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:29227,
Local directory: /lscratch/kostya/dask-scratch-space/worker-1wwz4ncq,Local directory: /lscratch/kostya/dask-scratch-space/worker-1wwz4ncq

0,1
Comm: tcp://127.0.0.1:15257,Total threads: 1
Dashboard: http://127.0.0.1:27217/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:26149,
Local directory: /lscratch/kostya/dask-scratch-space/worker-4hs73s7m,Local directory: /lscratch/kostya/dask-scratch-space/worker-4hs73s7m

0,1
Comm: tcp://127.0.0.1:25131,Total threads: 1
Dashboard: http://127.0.0.1:27695/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:4843,
Local directory: /lscratch/kostya/dask-scratch-space/worker-9ua_j6en,Local directory: /lscratch/kostya/dask-scratch-space/worker-9ua_j6en

0,1
Comm: tcp://127.0.0.1:29385,Total threads: 1
Dashboard: http://127.0.0.1:21603/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:6585,
Local directory: /lscratch/kostya/dask-scratch-space/worker-yb6gay6d,Local directory: /lscratch/kostya/dask-scratch-space/worker-yb6gay6d

0,1
Comm: tcp://127.0.0.1:8541,Total threads: 1
Dashboard: http://127.0.0.1:19959/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:2445,
Local directory: /lscratch/kostya/dask-scratch-space/worker-75ljwuiz,Local directory: /lscratch/kostya/dask-scratch-space/worker-75ljwuiz

0,1
Comm: tcp://127.0.0.1:1651,Total threads: 1
Dashboard: http://127.0.0.1:7859/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:1553,
Local directory: /lscratch/kostya/dask-scratch-space/worker-tsbl6sko,Local directory: /lscratch/kostya/dask-scratch-space/worker-tsbl6sko

0,1
Comm: tcp://127.0.0.1:19809,Total threads: 1
Dashboard: http://127.0.0.1:10681/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:30875,
Local directory: /lscratch/kostya/dask-scratch-space/worker-dyf_cc0r,Local directory: /lscratch/kostya/dask-scratch-space/worker-dyf_cc0r

0,1
Comm: tcp://127.0.0.1:5829,Total threads: 1
Dashboard: http://127.0.0.1:4363/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:21959,
Local directory: /lscratch/kostya/dask-scratch-space/worker-nratf5xp,Local directory: /lscratch/kostya/dask-scratch-space/worker-nratf5xp

0,1
Comm: tcp://127.0.0.1:12579,Total threads: 1
Dashboard: http://127.0.0.1:6447/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:1585,
Local directory: /lscratch/kostya/dask-scratch-space/worker-z5x3551w,Local directory: /lscratch/kostya/dask-scratch-space/worker-z5x3551w

0,1
Comm: tcp://127.0.0.1:23409,Total threads: 1
Dashboard: http://127.0.0.1:18713/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:25311,
Local directory: /lscratch/kostya/dask-scratch-space/worker-w5ufljce,Local directory: /lscratch/kostya/dask-scratch-space/worker-w5ufljce


## Filter out "bad" detections and select light curves with enough observations

In [27]:
import numpy as np
import light_curve as licu


obj_lc_filtered = obj_lc.dropna(subset="lc.psfFlux").query(
    "~lc.psfDiffFlux_flag"
    " and ~lc.pixelFlags_suspect"
    " and ~lc.pixelFlags_saturated"
    " and ~lc.pixelFlags_cr"
    " and ~lc.pixelFlags_bad"
).dropna(
    subset="lc"
).reduce(
    lambda t, flux, sigma: {"nights_s2n_le_5": len(np.unique(np.floor(t[np.abs(flux) >= 5.0 * sigma])))},
    "lc.midpointMjdTai",
    "lc.psfDiffFlux",
    "lc.psfDiffFluxErr",
    meta={"nights_s2n_le_5": int},
    append_columns=True,
).query(
    "nights_s2n_le_5 >= 10"
)

# MIN_NOBS = 50
# MIN_NOBS_BAND = 30
# MIN_RCHI2 = 10
# MIN_AMPLITUDE = 0.05

bazin_fit = licu.BazinFit(algorithm="ceres", ceres_niter=20, ceres_loss_reg=3)
bins = licu.Bins(
    [
        bazin_fit,
        licu.ReducedChi2(),
        licu.ObservationCount(),
    ],
    window=1.0,
    offset=0.0,
)
feature_extractor = licu.Extractor(
    bins,
)
# feature_extractor = licu.Extractor(
#     bazin_fit,
#     licu.ReducedChi2(),
#     licu.ObservationCount(),
# )
feature_names = [n.removeprefix('bins_window1.0_offset0.0_') for n in feature_extractor.names]


def extract_features(band, t, y, yerr):
    band_idx = band == BAND
    del band
    t, y, yerr = t, y, yerr = t[band_idx], y[band_idx], yerr[band_idx]

    # At least five points with S/N > 3
    if np.count_nonzero(np.abs(y / yerr) > 3.0) < 5:
        return dict.fromkeys(feature_names, np.nan)

    _, sort_index = np.unique(t, return_index=True)
    t, y, yerr = t[sort_index], y[sort_index], yerr[sort_index]
    
    features = feature_extractor(t, y, yerr, fill_value=np.nan)

    return dict(zip(feature_names, features))


def add_mjd_60000(df):
    df["lc.mjd_60000"] = np.asarray(df["lc.midpointMjdTai"] - 60_000.0, dtype=np.float32)
    return df


candidates = obj_lc_filtered.map_partitions(
    add_mjd_60000
).reduce(
    extract_features,
    "lc.band",
    "lc.mjd_60000",
    "lc.psfDiffFlux",
    "lc.psfDiffFluxErr",
    meta=dict.fromkeys(feature_names, float),
    append_columns=True,
).query(
    "observation_count >= 8"
    " and chi2 > 1.0"
    # " and bazin_fit_reduced_chi2 > 0.8 and bazin_fit_reduced_chi2 < 5.0"
    " and chi2 / bazin_fit_reduced_chi2 > 3.0"
    " and bazin_fit_reduced_chi2 < 10.0"
    # " and bazin_fit_rise_time > 3 and bazin_fit_rise_time < 10"
    # " and bazin_fit_fall_time < 50 and bazin_fit_fall_time > 1 and bazin_fit_fall_time / bazin_fit_fall_time < 10"
)
candidates

Unnamed: 0_level_0,id,ra,dec,lc,objectId_obj,coord_ra_obj,coord_dec_obj,z_psfMag_obj,_dist_arcsec,nights_s2n_le_5,bazin_fit_amplitude,bazin_fit_baseline,bazin_fit_reference_time,bazin_fit_rise_time,bazin_fit_fall_time,bazin_fit_reduced_chi2,chi2,observation_count
npartitions=45,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
"Order: 5, Pixel: 32",int64[pyarrow],double[pyarrow],double[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou...",int64[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],int64,float64,float64,float64,float64,float64,float64,float64,float64
"Order: 6, Pixel: 136",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 2247",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 562",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


## Plotting a few Candidates

In [28]:
cand_subset = candidates.compute()
cand_subset.to_parquet(f"transient-candidates-{release}.parquet")
cand_subset

E20250425 10:29:27.187794 61086 program_evaluator.h:286] Accumulated cost = inf is not a finite number. Evaluation failed.
E20250425 10:29:27.188005 61086 trust_region_minimizer.cc:121] Terminating: Residual and Jacobian evaluation failed.
E20250425 10:29:27.188356 61086 program_evaluator.h:286] Accumulated cost = inf is not a finite number. Evaluation failed.
E20250425 10:29:27.450536 61101 program_evaluator.h:286] Accumulated cost = inf is not a finite number. Evaluation failed.
E20250425 10:29:29.942804 61089 program_evaluator.h:286] Accumulated cost = inf is not a finite number. Evaluation failed.
E20250425 10:29:32.678540 61102 program_evaluator.h:286] Accumulated cost = inf is not a finite number. Evaluation failed.


Unnamed: 0_level_0,id,ra,dec,lc,objectId_obj,coord_ra_obj,coord_dec_obj,z_psfMag_obj,_dist_arcsec,nights_s2n_le_5,bazin_fit_amplitude,bazin_fit_baseline,bazin_fit_reference_time,bazin_fit_rise_time,bazin_fit_fall_time,bazin_fit_reduced_chi2,chi2,observation_count
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1


In [29]:
import matplotlib.pyplot as plt

COLORS = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00',
          'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}
BANDS = list(COLORS)

FOLDED = True

cand_path = Path("transient-candidates")
cand_path.mkdir(exist_ok=True, parents=True)

for healpix29, cand in cand_subset.iloc[:20].iterrows():
    fig, ax_mjd = plt.subplots(1, 1, figsize=(7, 5), sharey=True)
    for b in 'grizy':
        idx = (cand.lc["band"] == b) & (np.abs(cand.lc["psfDiffFlux"] / cand.lc["psfDiffFluxErr"]) > 3.0)
        ax_mjd.errorbar(
            cand.lc["mjd_60000"][idx],
            y=cand.lc["psfDiffFlux"][idx],
            yerr=cand.lc["psfDiffFluxErr"][idx],
            fmt="o",
            color=COLORS[b],
            label=b,
            alpha=0.3,
        )
    t_ = np.linspace(cand.lc["mjd_60000"].min(), cand.lc["mjd_60000"].max(), 1000)
    bazin_params = np.asarray(cand[bazin_fit.names], dtype=t_.dtype)
    # print(dict(zip(bazin_fit.names, bazin_params)))
    plt.plot(t_, bazin_fit.model(t_, bazin_params), ls='-', color=COLORS[BAND], 
             label=f'{BAND}-band Bazin fit Χ²/ddof={cand["bazin_fit_reduced_chi2"]:.2f}')
    ax_mjd.plot()
    
    fig.suptitle(
        f"OID: {cand.id}, RA: {cand[coord_columns[0]]:.5f}, Dec: {cand[coord_columns[1]]:.5f}"
    )
    ax_mjd.set_ylabel("diff Flux, nJy")

    ax_mjd.set_xlabel("MJD - 60000")
    ax_mjd.set_xlim(np.min(cand.lc["mjd_60000"])-1, np.max(cand.lc["mjd_60000"])+1)
    
    ax_mjd.plot(ax_mjd.get_xlim(), [0, 0], color='k', linestyle='--', alpha=0.5)
    ax_mjd.legend()#loc='upper left')
    ax_mjd.grid()
    
    plt.savefig(cand_path / f"{release}-{cand.id}.pdf")

    print(cand.id)