# Find outbursts in Rubin DP1

This notebook uses [LSDB](https://lsdb.io) to detect transients by fitting single-passband light curves with the Bazin function.
It also detects objects which have near-exponential or near-linear light curves.

In [None]:
# %pip install -U lsdb astropy light-curve

In [None]:
from pathlib import Path

from lsdb import read_hats
from nested_pandas import NestedDtype

In [None]:
BAND = "g"

In [None]:
release = 'v29_0_0'
hats_path = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats") / release
# list dir
print(list(map(str, hats_path.iterdir())))

dia_object_lc_path = hats_path / "dia_object_collection"
object_lc_path = hats_path / "object_collection"

BRIGHTEST_BAND_MAG = 19.5

obj = read_hats(
    object_lc_path,
    columns=["objectId", "coord_ra", "coord_dec", f"{BAND}_psfMag"],
    filters=[(f"{BAND}_psfMag", ">", BRIGHTEST_BAND_MAG)],
)

id_column = "diaObjectId"
lc_column = "diaObjectForcedSource"
coord_columns = ["ra", "dec"]

obj_lc = read_hats(
    dia_object_lc_path,
    columns=[id_column, lc_column] + coord_columns,
).map_partitions(
    lambda df: df.assign(
        lc=df[lc_column].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes[lc_column])
        ),
    ).drop(
        columns=[lc_column],
    ).rename(columns={id_column: "id"}),
).crossmatch(  # Select faint objects only
    obj,
    radius_arcsec=3.0,
    n_neighbors=1,
    suffixes=("", "_obj"),
)
obj_lc

## Start Dask client

In [None]:
from dask.distributed import Client

# Start with a small client
client = Client(n_workers=24, memory_limit="16GB", threads_per_worker=1)
client

## Filter out "bad" detections and select light curves with enough observations

In [None]:
import numpy as np
import light_curve as licu


obj_lc_filtered = obj_lc.dropna(subset="lc.psfFlux").query(
    "~lc.psfDiffFlux_flag"
    " and ~lc.pixelFlags_suspect"
    " and ~lc.pixelFlags_saturated"
    " and ~lc.pixelFlags_cr"
    " and ~lc.pixelFlags_bad"
).dropna(
    subset="lc"
)
# Additional filtering to keep long events only
# obj_lc_filtered = obj_lc_filtered.reduce(
#     lambda t, flux, sigma: {"nights_s2n_le_5": len(np.unique(np.floor(t[np.abs(flux) >= 5.0 * sigma])))},
#     "lc.midpointMjdTai",
#     "lc.psfDiffFlux",
#     "lc.psfDiffFluxErr",
#     meta={"nights_s2n_le_5": int},
#     append_columns=True,
# ).query(
#     "nights_s2n_le_5 >= 10"
# )

bazin_fit = licu.BazinFit(algorithm="ceres", ceres_niter=20, ceres_loss_reg=3)
bins = licu.Bins(
    [
        bazin_fit,
        licu.ReducedChi2(),
        licu.ObservationCount(),
    ],
    window=1.0,
    offset=0.0,
)
feature_extractor = licu.Extractor(
    bins,
)
feature_names = [n.removeprefix('bins_window1.0_offset0.0_') for n in feature_extractor.names]

def extract_features(band, t, y, yerr):
    band_idx = band == BAND
    del band
    t, y, yerr = t, y, yerr = t[band_idx], y[band_idx], yerr[band_idx]

    # At least five points with S/N > 3
    if np.count_nonzero(np.abs(y / yerr) > 3.0) < 5:
        return dict.fromkeys(feature_names, np.nan)

    _, sort_index = np.unique(t, return_index=True)
    t, y, yerr = t[sort_index], y[sort_index], yerr[sort_index]
    
    features = feature_extractor(t, y, yerr, fill_value=np.nan)

    return dict(zip(feature_names, features))


def add_mjd_60000(df):
    df["lc.mjd_60000"] = np.asarray(df["lc.midpointMjdTai"] - 60_000.0, dtype=np.float32)
    return df


candidates = obj_lc_filtered.map_partitions(
    add_mjd_60000
).reduce(
    extract_features,
    "lc.band",
    "lc.mjd_60000",
    "lc.psfDiffFlux",
    "lc.psfDiffFluxErr",
    meta=dict.fromkeys(feature_names, float),
    append_columns=True,
).query(
    "observation_count >= 8"
    " and chi2 > 1.0"
    # " and bazin_fit_reduced_chi2 > 0.8 and bazin_fit_reduced_chi2 < 5.0"
    " and chi2 / bazin_fit_reduced_chi2 > 3.0"
    " and bazin_fit_reduced_chi2 < 10.0"
    # " and bazin_fit_rise_time > 3 and bazin_fit_rise_time < 10"
    # " and bazin_fit_fall_time < 50 and bazin_fit_fall_time > 1 and bazin_fit_fall_time / bazin_fit_fall_time < 10"
)
candidates

## Plotting a few Candidates

In [None]:
cand_subset = candidates.compute()
cand_subset.to_parquet(f"transient-candidates-{release}.parquet")
cand_subset

In [None]:
import matplotlib.pyplot as plt

COLORS = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00',
          'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}
BANDS = list(COLORS)

FOLDED = True

cand_path = Path("transient-candidates")
cand_path.mkdir(exist_ok=True, parents=True)

for healpix29, cand in cand_subset.iloc[:20].iterrows():
    fig, ax_mjd = plt.subplots(1, 1, figsize=(7, 5), sharey=True)
    for b in 'grizy':
        idx = (cand.lc["band"] == b) & (np.abs(cand.lc["psfDiffFlux"] / cand.lc["psfDiffFluxErr"]) > 3.0)
        ax_mjd.errorbar(
            cand.lc["mjd_60000"][idx],
            y=cand.lc["psfDiffFlux"][idx],
            yerr=cand.lc["psfDiffFluxErr"][idx],
            fmt="o",
            color=COLORS[b],
            label=b,
            alpha=0.3,
        )
    t_ = np.linspace(cand.lc["mjd_60000"].min(), cand.lc["mjd_60000"].max(), 1000)
    bazin_params = np.asarray(cand[bazin_fit.names], dtype=t_.dtype)
    # print(dict(zip(bazin_fit.names, bazin_params)))
    plt.plot(t_, bazin_fit.model(t_, bazin_params), ls='-', color=COLORS[BAND], 
             label=f'{BAND}-band Bazin fit Χ²/ddof={cand["bazin_fit_reduced_chi2"]:.2f}')
    ax_mjd.plot()
    
    fig.suptitle(
        f"OID: {cand.id}, RA: {cand[coord_columns[0]]:.5f}, Dec: {cand[coord_columns[1]]:.5f}"
    )
    ax_mjd.set_ylabel("diff Flux, nJy")

    ax_mjd.set_xlabel("MJD - 60000")
    ax_mjd.set_xlim(np.min(cand.lc["mjd_60000"])-1, np.max(cand.lc["mjd_60000"])+1)
    
    ax_mjd.plot(ax_mjd.get_xlim(), [0, 0], color='k', linestyle='--', alpha=0.5)
    ax_mjd.legend()#loc='upper left')
    ax_mjd.grid()
    
    plt.savefig(cand_path / f"{release}-{cand.id}.pdf")

    print(cand.id)