# Find periodic variables in ComCam data

In [1]:
# %pip install lsdb dask nested-dask astropy light-curve

In [2]:
!hostname

sdfiana008


In [3]:
import lsdb
lsdb.__version__

'0.6.0'

## Start Dask client

In [4]:
from dask.distributed import Client

## Loading the catalog

In [5]:
CATALOG_TYPE = "dia_object" # object or diaObject


In [6]:
from pathlib import Path

release = 'v29_0_0'
hats_path = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats/") / release
# list dir
print(list(map(str, hats_path.iterdir())))

catalog_path = hats_path / f"{CATALOG_TYPE}_collection" / f"{CATALOG_TYPE}_lc"
assert catalog_path.exists(), f"Catalog path {catalog_path} does not exist"
catalog_path

['/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/dia_object_collection', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/dia_object_forced_source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_collection_lite', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_collection', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_forced_source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/dia_source', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_lite', '/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/dia_object']


PosixPath('/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/dia_object_collection/dia_object_lc')

In [7]:
# Load the Forced Source + MJD Table
from lsdb import read_hats
from nested_pandas import NestedDtype


BRIGHTEST_R_MAG = 21.5

if CATALOG_TYPE == "object":
    # Load the object table
    kwargs = dict(filters = [("r_psfMag", ">", BRIGHTEST_R_MAG)])
    lc_column = "objectForcedSource"
    coord_columns = ["coord_ra", "coord_dec"]
    id_column = "objectId"
    phot_column = "psfMag"
    err_column = "psfMagErr"
    flux_column = "psfFlux"
    fluxerr_column = "psfFluxErr"
elif CATALOG_TYPE == "dia_object":
    # Cannot pass empty filters because of 
    # https://github.com/astronomy-commons/lsdb/issues/739
    kwargs = dict()
    lc_column = "diaObjectForcedSource"
    coord_columns = ["ra", "dec"]
    id_column = "diaObjectId"
    flux_column = phot_column = "psfDiffFlux"
    fluxerr_column = err_column = "psfDiffFluxErr"
else:
    raise ValueError(f"Unknown catalog type: {CATALOG_TYPE}")

obj_lc = read_hats(
    catalog_path,
    columns=[id_column, lc_column] + coord_columns,
    **kwargs,
).map_partitions(
    lambda df: df.assign(
        lc=df[lc_column].astype(NestedDtype.from_pandas_arrow_dtype(df.dtypes[lc_column])),
    ).drop(columns=[lc_column]),
)
obj_lc

Unnamed: 0_level_0,diaObjectId,ra,dec,lc
npartitions=28,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Order: 3, Pixel: 2",int64[pyarrow],double[pyarrow],double[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou..."
"Order: 5, Pixel: 4471",...,...,...,...
...,...,...,...,...
"Order: 5, Pixel: 8989",...,...,...,...
"Order: 5, Pixel: 8992",...,...,...,...


In [8]:
obj_lc.dtypes["lc"].__str__()

'nested<parentObjectId: [int64], coord_ra: [double], coord_dec: [double], visit: [int64], detector: [int16], band: [string], psfFlux: [float], psfFluxErr: [float], psfFlux_flag: [bool], psfDiffFlux: [float], psfDiffFluxErr: [float], psfDiffFlux_flag: [bool], diff_PixelFlags_nodataCenter: [bool], pixelFlags_bad: [bool], pixelFlags_cr: [bool], pixelFlags_crCenter: [bool], pixelFlags_edge: [bool], pixelFlags_interpolated: [bool], pixelFlags_interpolatedCenter: [bool], pixelFlags_nodata: [bool], pixelFlags_saturated: [bool], pixelFlags_saturatedCenter: [bool], pixelFlags_suspect: [bool], pixelFlags_suspectCenter: [bool], invalidPsfFlag: [bool], tract: [int64], patch: [int64], forcedSourceOnDiaObjectId: [int64], psfMag: [float], psfMagErr: [float], midpointMjdTai: [double]>'

## Filter out "bad" detections and select light curves with enough observations

In [9]:
import numpy as np
import light_curve as licu

if CATALOG_TYPE == "object":
    query = (
        "lc.psfMagErr < 0.3"
        " and ~lc.psfFlux_flag"
    )
elif CATALOG_TYPE == "dia_object":
    query = (
        "abs(lc.psfDiffFlux) > 0.3 * lc.psfDiffFluxErr"
        " and ~lc.psfDiffFlux_flag"
    )
else:
    raise ValueError(f"Unknown catalog type: {CATALOG_TYPE}")
query += (
    " and ~lc.pixelFlags_suspect"
    " and ~lc.pixelFlags_saturated"
    " and ~lc.pixelFlags_cr"
    " and ~lc.pixelFlags_bad"
)

filtered_lc = obj_lc.dropna(subset="lc.psfFlux").query(query).dropna(subset="lc")

MIN_NOBS = 50
MIN_NOBS_BAND = 30
MIN_RCHI2 = 2

if CATALOG_TYPE == "object":
    MIN_AMPLITUDE = 0.05
else:
    MIN_AMPLITUDE = 10.0

BANDS = 'ugrizy'
SCAN_BANDS = "griz"

feature_extractor = licu.Extractor(
    licu.ObservationCount(),
    licu.InterPercentileRange(0.05),
    licu.ReducedChi2(),
)

def extract_features(band, t, y, yerr):
    y, yerr = np.asarray(y, dtype=float), np.asarray(yerr, dtype=float)

    nobs = len(band)

    band_idx = band == 'r'
    del band
    t, y, yerr = t, y, yerr = t[band_idx], y[band_idx], yerr[band_idx]

    _, sort_index = np.unique(t, return_index=True)
    t, y, yerr = t[sort_index], y[sort_index], yerr[sort_index]
    
    nobs_r, amplitude_r, rchi2_r = feature_extractor(t, y, yerr, fill_value=np.nan)

    return {'nobs': nobs, 'nobs_r': nobs_r, 'amplitude_r': amplitude_r, 'rchi2_r': rchi2_r}


lc_w_features = filtered_lc.reduce(
    extract_features,
    "lc.band",
    "lc.midpointMjdTai",
    f"lc.{phot_column}",
    f"lc.{err_column}",
    meta=dict.fromkeys(['nobs', 'nobs_r', 'amplitude_r', 'rchi2_r'], float),
    append_columns=True,
).query(f"nobs >= {MIN_NOBS} and nobs_r >= {MIN_NOBS_BAND} and amplitude_r > {MIN_AMPLITUDE} and rchi2_r >= {MIN_RCHI2}")

In [10]:
# # Non-lazy computation
# with Client(n_workers=24, memory_limit="16GB", threads_per_worker=1) as client:
#     display(client)
#     len_lc_w_features = lc_w_features._ddf.shape[0].compute()
# print(f"Number of light curves: {len_lc_w_features}")

### Add heliocentric times

In [11]:
import numpy as np
import astropy.units as u
from astropy.time import Time, TimeDelta
from astropy.coordinates import CartesianRepresentation, SkyCoord, HeliocentricTrueEcliptic, ICRS
from astropy.constants import c

def approx_earth_heliocentric_position(time):
    # Orbital elements for Earth at J2000 epoch
    a = 1.000001018 * u.AU       # semi-major axis
    e = 0.0167086                # eccentricity
    M0 = np.deg2rad(357.51716)   # mean anomaly at J2000
    omega = np.deg2rad(102.9373) # argument of perihelion
    T0 = Time('J2000.0').tdb

    # Mean motion in rad/day
    n = 2 * np.pi / 365.256363004

    # Time in days since J2000
    t = (time.tdb - T0).to(u.day).value

    M = M0 + n * t

    # Solve Kepler's equation
    def kepler_eq(E, M, e):
        return E - e * np.sin(E) - M

    def solve_kepler(M, e):
        E = M
        for _ in range(5):
            E -= kepler_eq(E, M, e) / (1 - e * np.cos(E))
        return E

    E = solve_kepler(M, e)

    # True anomaly
    nu = 2 * np.arctan2(np.sqrt(1+e) * np.sin(E/2),
                        np.sqrt(1-e) * np.cos(E/2))

    # Radius vector
    r = a * (1 - e * np.cos(E))

    # Position in orbital plane (ecliptic coordinates)
    x = r * np.cos(omega + nu)
    y = r * np.sin(omega + nu)
    z = 0 * u.AU

    # Create CartesianRepresentation in HeliocentricTrueEcliptic frame
    pos_ecl = CartesianRepresentation(x, y, z)
    coord_ecl = SkyCoord(pos_ecl, frame=HeliocentricTrueEcliptic(equinox='J2000'))

    # Transform to ICRS (equatorial frame)
    coord_icrs = coord_ecl.transform_to(ICRS())

    return coord_icrs.cartesian


# ~few seconds difference from Time.light_travel_time, but much-much faster
def fast_light_travel_time_heliocentric_elliptical(time, target):
    earth_pos = approx_earth_heliocentric_position(time)
    los_unit_vec = target.icrs.represent_as('cartesian').get_xyz()
    los_unit_vec /= np.linalg.norm(los_unit_vec, axis=0)
    los_unit = CartesianRepresentation(los_unit_vec)

    projection = earth_pos.dot(los_unit)
    return TimeDelta(projection / c)

In [12]:
def add_helio_mjd(df):
    coord = SkyCoord(ra=df["lc.coord_ra"], dec=df["lc.coord_dec"], unit="deg")
    time = Time(df["lc.midpointMjdTai"], format="mjd", scale="tai")
    helio_time = time + fast_light_travel_time_heliocentric_elliptical(time, coord)
    df["lc.helioMjd"] = helio_time.mjd
    return df


lc_helio = lc_w_features.map_partitions(add_helio_mjd)

## Running Lomb-Scargle
Use light-curve package or astropy

In [23]:
from astropy.timeseries import BoxLeastSquares, LombScargle, LombScargleMultiband

# PERIODOGRAM_RESOLUTION = 30_000
# PERIODOGRAM_NYQUIST_FACTOR = 10

# periodogram_extractor = licu.Periodogram(
#     peaks=10,
#     nyquist='average',
#     # resolution=1000,
#     # max_freq_factor=10,
#     resolution=PERIODOGRAM_RESOLUTION,
#     max_freq_factor=PERIODOGRAM_NYQUIST_FACTOR,
#     fast=False,
# )
reduced_chi2_extractor = licu.ReducedChi2()

MAX_PERIOD = 1  # days
MIN_PERIOD = 5 / 60 / 24
BAD_PERIODS = np.array([1/3, 0.5, 2/3, 1, 2, 29.5])
# BAD_PERIODS = np.array([])
BAD_PERIOD_REL_RANGE = 10 / 365.2422

def filter_periods(periods):
    periods = np.asarray(periods)
    return (
        np.all(np.abs(periods[:, None]/BAD_PERIODS - 1.0) > BAD_PERIOD_REL_RANGE, axis=1)
        & (periods >= MIN_PERIOD) 
        & (periods <= MAX_PERIOD)
    )


def extract_period_multiband_licu(band, t, flux, fluxerr, **kwargs):
    # We offset date, so we still would have <1 second precision

    _, sort_index = np.unique(t, return_index=True)
    band, t, flux, fluxerr = band[sort_index], t[sort_index], flux[sort_index], fluxerr[sort_index]
    mag = -2.5 * np.log10(flux)
    magerr = 2.5 / np.log(10) * fluxerr / flux

    band_freqs = []
    band_periodograms = []
    band_weights = []
    periods = []
    s2n = []
    for b in BANDS:
        band_idx = band == b

        # At least few points in the band
        if np.count_nonzero(band_idx) < MIN_NOBS_BAND:
            continue
        
        band_t = t[band_idx]
        band_mag = mag[band_idx]
        band_magerr = magerr[band_idx]

        freq, periodogram = periodogram_extractor.freq_power(band_t, band_mag)
        freq_idx = filter_periods(2*np.pi / freq)
        if not np.any(freq_idx):
            continue
        band_freqs.append(freq[freq_idx])
        band_periodograms.append(periodogram[freq_idx])

        reduced_chi2 = reduced_chi2_extractor(band_t, band_mag, band_magerr, **kwargs)[0]
        chi2 = reduced_chi2 * (len(band_t) - 1)
        band_weights.append(chi2)

        idx_band_period = np.argmax(band_periodograms[-1])
        periods.append(2*np.pi / band_freqs[-1][idx_band_period])
        s2n.append(
            (band_periodograms[-1][idx_band_period] - np.mean(band_periodograms[-1])) / np.std(band_periodograms[-1], ddof=1)
        )

    if len(band_periodograms) == 0:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    freq = np.unique(np.concatenate(band_freqs))
    periodograms = [np.interp(freq, f, p, left=np.mean(p), right=np.mean(p)) for f, p in zip(band_freqs, band_periodograms)]
    multiband_periodogram = np.average(periodograms, weights=band_weights, axis=0)

    idx_multiband_period = np.argmax(multiband_periodogram)
    multiband_s2n = (multiband_periodogram[idx_multiband_period] - np.mean(multiband_periodogram)) / np.std(multiband_periodogram, ddof=1)
    multiband_period = 2*np.pi / freq[idx_multiband_period]

    periods.append(multiband_period)
    s2n.append(multiband_s2n)

    best_period = periods[np.argmax(s2n)]
    best_s2n = s2n[np.argmax(s2n)]

    # Return the features as a dictionary
    return {"period_0": best_period, "period_s_to_n_0": best_s2n}


def extract_period_singleband_licu(band, t, flux, fluxerr, **kwargs):
    del fluxerr  # unused

    _, sort_index = np.unique(t, return_index=True)
    band, t, flux = band[sort_index], t[sort_index], flux[sort_index]
    
    periods = []
    s2n = []
    for b in BANDS:
        band_index = band == b
        band_t, band_flux = t[band_index], flux[band_index]

        if len(band_t) < MIN_NOBS_BAND:
            continue

        band_mag = -2.5 * np.log10(band_flux)

        features = periodogram_extractor(band_t, band_mag, **kwargs)
        periods.extend(features[::2])
        s2n.extend(features[1::2])
    periods, s2n = np.asarray(periods), np.asarray(s2n)
    
    if len(periods) == 0:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}
    
    idx_periods = filter_periods(periods)
    if not np.any(idx_periods):
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}
    periods_inrange, s2n_inrange = periods[idx_periods], s2n[idx_periods]
    
    idx_best_period = np.argmax(s2n_inrange)
    best_period = periods_inrange[idx_best_period]
    best_s2n = s2n_inrange[idx_best_period]

    return {"period_0": best_period, "period_s_to_n_0": best_s2n}


def extract_period_rband_licu(band, t, flux, fluxerr, **kwargs):
    _, sort_index = np.unique(t, return_index=True)
    band, t, flux = band[sort_index], t[sort_index], flux[sort_index]

    r_band = band == 'r'
    t, flux = t[r_band], flux[r_band]
    
    if len(t) < MIN_NOBS_BAND:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    mag = -2.5 * np.log10(flux)

    features = periodogram_extractor(t, mag, **kwargs)
    periods, s2n = features[::2], features[1::2]
    
    idx_periods = filter_periods(periods)
    if not np.any(idx_periods):
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    best_period = periods[idx_periods][0]
    best_s2n = s2n[idx_periods][0]
    return {"period_0": best_period, "period_s_to_n_0": best_s2n}


def extract_period_multiband_astropy(band, t, flux, fluxerr, **kwargs):
    del kwargs  # unused

    mag = -2.5 * np.log10(flux)
    magerr = 2.5 / np.log(10) * fluxerr / flux
    freq, power = LombScargleMultiband(t, mag, band, magerr).autopower()
    
    freq_idx = filter_periods(1 / freq)
    freq, power = freq[freq_idx], power[freq_idx]
    
    if len(freq) == 0:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    idx_period = np.argmax(power)
    period = 1 / freq[idx_period]
    s2n = (power[idx_period] - np.mean(power[idx_period])) / np.std(power[idx_period], ddof=1)
    return {"period_0": period, "period_s_to_n_0": s2n}


def extract_period_rband_box_astropy(band, t, flux, fluxerr, **kwargs):
    del kwargs  # unused

    r_band = band == 'r'
    t, flux, fluxerr = t[r_band], flux[r_band], fluxerr[r_band]
    
    # Not enough points in the light curve
    if len(t) < MIN_NOBS_BAND:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    # Light curve is too short
    if np.ptp(t) <= 2.0:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    input_period = filter_periods(np.geomspace(2.0, MAX_PERIOD, 10))
    result = BoxLeastSquares(t, flux, fluxerr).autopower(
        duration=np.geomspace(1.0 / (24 * 60), 1.0, 10),
    )

    idx_period = np.argmax(result.power)
    return {"period_0": result.period[idx_period], "period_s_to_n_0": (result.power[idx_period] - np.mean(result.power[idx_period])) / np.std(result.power, ddof=1)}


# FREQS = 1.0 / np.linspace(0.5, 5 / (60 * 24), 1_000_000)  # 5 minutes to 12 hours
FREQS = np.linspace(1 / 0.5, 1 / (5 / (60 * 24)), 30_000)  # 5 minutes to 12 hours

freqs_periodogram_extractor = licu.Periodogram(
    freqs=2.0 * np.pi * FREQS,
    fast=False,
)

def extract_period_single_band(band, t, flux, fluxerr, single_band, **kwargs):
    del kwargs  # unused

    band_idx = band == single_band
    del band
    t, flux, fluxerr = t[band_idx], flux[band_idx], fluxerr[band_idx]
    t = np.asarray(t - 60_000.0, dtype=np.float32)

    n = len(t)

    if n < MIN_NOBS_BAND:
        return {f"{single_band}_period_0": 1e9, f"{single_band}_period_s_to_n_0": 0.0, f"{single_band}_period_0_false_alarm_prob": 1.0}

    # mag = -2.5 * np.log10(flux)
    # magerr = 2.5 / np.log(10) * fluxerr / flux
    ls = LombScargle(t, flux, fluxerr)
    power = ls.power(FREQS)
    # ls = LombScargle(t, flux, fit_mean=False) # fit_mean=False to match the behavior of licu.Periodogram
    # power = freqs_periodogram_extractor.power(t, flux) * 2.0 / (n - 1)
    
    import pickle
    assert power.shape == FREQS.shape, f"Power shape {power.shape} does not match FREQS shape {FREQS.shape}\n{n = }\nfreqs_periodogram_extractor = {pickle.loads(freqs_periodogram_extractor.__getstate__())}\n.freq_power = {freqs_periodogram_extractor.freq_power(t, flux)}"

    freq_idx = filter_periods(1 / FREQS)
    freq, power = FREQS[freq_idx], power[freq_idx]
    
    if len(freq) == 0:
        return {"period_0": 0.0, "period_s_to_n_0": 0.0}

    idx_period = np.argmax(power)
    period = 1 / freq[idx_period]
    s2n = (power[idx_period] - np.mean(power)) / np.std(power, ddof=1)
    period_0_false_alarm_prob = ls.false_alarm_probability(power[idx_period])

    return {f"{single_band}_period_0": period, f"{single_band}_period_s_to_n_0": s2n, f"{single_band}_period_0_false_alarm_prob": period_0_false_alarm_prob}


tmp_cat = lc_helio
for single_band in SCAN_BANDS:
    tmp_cat = tmp_cat.reduce(
        extract_period_single_band,
        "lc.band",
        "lc.helioMjd",
        f"lc.{phot_column}",
        f"lc.{err_column}",
        single_band=single_band,
        meta={f"{single_band}_period_0": float, f"{single_band}_period_s_to_n_0": float, f"{single_band}_period_0_false_alarm_prob": float},
        append_columns=True,
    )
lc_w_periods = tmp_cat
lc_w_periods

Unnamed: 0_level_0,diaObjectId,ra,dec,lc,nobs,nobs_r,amplitude_r,rchi2_r,g_period_0,g_period_s_to_n_0,g_period_0_false_alarm_prob,r_period_0,r_period_s_to_n_0,r_period_0_false_alarm_prob,i_period_0,i_period_s_to_n_0,i_period_0_false_alarm_prob,z_period_0,z_period_s_to_n_0,z_period_0_false_alarm_prob
npartitions=28,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
"Order: 3, Pixel: 2",int64[pyarrow],double[pyarrow],double[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou...",float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
"Order: 5, Pixel: 4471",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 8989",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 8992",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


## Periodic Candidate Selection

In [24]:
import pandas as pd

def select_best_period(row):
    # Function to use in .apply(axis=1) which would output best band, best period and best prob
    period_diff = {}
    for i_first_band in range(len(SCAN_BANDS)):
        for i_second_band in range(i_first_band + 1, len(SCAN_BANDS)):
            first_band = SCAN_BANDS[i_first_band]
            second_band = SCAN_BANDS[i_second_band]
            period_diff_multiplier = np.inf
            for multiplier in [0.25, 0.5, 1.0, 2.0, 4.0]:
                period_diff_multiplier = min(period_diff_multiplier, np.abs(row[f"{first_band}_period_0"] - multiplier * row[f"{second_band}_period_0"]) / row[f"{first_band}_period_0"])
            period_diff[f"{first_band}{second_band}"] = period_diff_multiplier
    best_pair = min(period_diff, key=period_diff.get)
    min_rel_period_diff = period_diff[best_pair]
    first_band, second_band = best_pair
    first_band_prob = row[f"{first_band}_period_0_false_alarm_prob"]
    second_band_prob = row[f"{second_band}_period_0_false_alarm_prob"]
    if first_band_prob < second_band_prob:
        best_band = first_band
        best_period = row[f"{first_band}_period_0"]
        best_prob = first_band_prob
    else:
        best_band = second_band
        best_period = row[f"{second_band}_period_0"]
        best_prob = second_band_prob
    new_data = pd.Series({
        "best_period_band": best_band,
        "period_0": best_period,
        "period_0_false_alarm_prob": best_prob,
        "min_rel_period_diff": min_rel_period_diff,
    })
    return pd.concat([row, new_data])


lc_period_cand = lc_w_periods.map_partitions(
    lambda df: df.apply(select_best_period, axis=1),
    meta=pd.concat(
        [
            lc_w_periods._ddf.meta,
            pd.DataFrame({
                "best_period_band": np.array([], dtype=str),
                "period_0": np.array([], dtype=float),
                "period_0_false_alarm_prob": np.array([], dtype=float),
                "min_rel_period_diff": np.array([], dtype=float),
            })
        ],
        axis=1
    ),
)
lc_period_cand

Unnamed: 0_level_0,diaObjectId,ra,dec,lc,nobs,nobs_r,amplitude_r,rchi2_r,g_period_0,g_period_s_to_n_0,g_period_0_false_alarm_prob,r_period_0,r_period_s_to_n_0,r_period_0_false_alarm_prob,i_period_0,i_period_s_to_n_0,i_period_0_false_alarm_prob,z_period_0,z_period_s_to_n_0,z_period_0_false_alarm_prob,best_period_band,period_0,period_0_false_alarm_prob,min_rel_period_diff
npartitions=28,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
"Order: 3, Pixel: 2",int64[pyarrow],double[pyarrow],double[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou...",float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,object,float64,float64,float64
"Order: 5, Pixel: 4471",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 8989",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 8992",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


## Plotting a few Phase-Folded Candidates

In [None]:
with Client(n_workers=24, memory_limit="16GB", threads_per_worker=1) as client:
    display(client)
    cand_df = lc_period_cand.compute()
cand_df.to_parquet(f"periodic_cand-{release}.parquet")

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42571 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:42571/status,

0,1
Dashboard: http://127.0.0.1:42571/status,Workers: 24
Total threads: 24,Total memory: 357.63 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33519,Workers: 24
Dashboard: http://127.0.0.1:42571/status,Total threads: 24
Started: Just now,Total memory: 357.63 GiB

0,1
Comm: tcp://127.0.0.1:34275,Total threads: 1
Dashboard: http://127.0.0.1:42917/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42721,
Local directory: /lscratch/kostya/dask-scratch-space/worker-hr_c9se3,Local directory: /lscratch/kostya/dask-scratch-space/worker-hr_c9se3

0,1
Comm: tcp://127.0.0.1:35569,Total threads: 1
Dashboard: http://127.0.0.1:42245/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42033,
Local directory: /lscratch/kostya/dask-scratch-space/worker-zaqb1kce,Local directory: /lscratch/kostya/dask-scratch-space/worker-zaqb1kce

0,1
Comm: tcp://127.0.0.1:37609,Total threads: 1
Dashboard: http://127.0.0.1:46781/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:37837,
Local directory: /lscratch/kostya/dask-scratch-space/worker-csoyitm4,Local directory: /lscratch/kostya/dask-scratch-space/worker-csoyitm4

0,1
Comm: tcp://127.0.0.1:46447,Total threads: 1
Dashboard: http://127.0.0.1:38219/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42901,
Local directory: /lscratch/kostya/dask-scratch-space/worker-_ky51rlt,Local directory: /lscratch/kostya/dask-scratch-space/worker-_ky51rlt

0,1
Comm: tcp://127.0.0.1:36819,Total threads: 1
Dashboard: http://127.0.0.1:43019/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:43533,
Local directory: /lscratch/kostya/dask-scratch-space/worker-7ft95bkq,Local directory: /lscratch/kostya/dask-scratch-space/worker-7ft95bkq

0,1
Comm: tcp://127.0.0.1:32989,Total threads: 1
Dashboard: http://127.0.0.1:45029/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:43459,
Local directory: /lscratch/kostya/dask-scratch-space/worker-zx2y3l_l,Local directory: /lscratch/kostya/dask-scratch-space/worker-zx2y3l_l

0,1
Comm: tcp://127.0.0.1:37161,Total threads: 1
Dashboard: http://127.0.0.1:35309/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:40961,
Local directory: /lscratch/kostya/dask-scratch-space/worker-ch5wn3mf,Local directory: /lscratch/kostya/dask-scratch-space/worker-ch5wn3mf

0,1
Comm: tcp://127.0.0.1:37167,Total threads: 1
Dashboard: http://127.0.0.1:33629/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:35675,
Local directory: /lscratch/kostya/dask-scratch-space/worker-yqm24_6o,Local directory: /lscratch/kostya/dask-scratch-space/worker-yqm24_6o

0,1
Comm: tcp://127.0.0.1:42437,Total threads: 1
Dashboard: http://127.0.0.1:43139/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:38493,
Local directory: /lscratch/kostya/dask-scratch-space/worker-1tz0s90p,Local directory: /lscratch/kostya/dask-scratch-space/worker-1tz0s90p

0,1
Comm: tcp://127.0.0.1:34179,Total threads: 1
Dashboard: http://127.0.0.1:46027/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:40151,
Local directory: /lscratch/kostya/dask-scratch-space/worker-4ouxvpu5,Local directory: /lscratch/kostya/dask-scratch-space/worker-4ouxvpu5

0,1
Comm: tcp://127.0.0.1:42691,Total threads: 1
Dashboard: http://127.0.0.1:33897/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:35093,
Local directory: /lscratch/kostya/dask-scratch-space/worker-sekg0r34,Local directory: /lscratch/kostya/dask-scratch-space/worker-sekg0r34

0,1
Comm: tcp://127.0.0.1:41269,Total threads: 1
Dashboard: http://127.0.0.1:36639/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:38869,
Local directory: /lscratch/kostya/dask-scratch-space/worker-rpbyhiye,Local directory: /lscratch/kostya/dask-scratch-space/worker-rpbyhiye

0,1
Comm: tcp://127.0.0.1:46817,Total threads: 1
Dashboard: http://127.0.0.1:36423/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:45363,
Local directory: /lscratch/kostya/dask-scratch-space/worker-rtkba4ew,Local directory: /lscratch/kostya/dask-scratch-space/worker-rtkba4ew

0,1
Comm: tcp://127.0.0.1:37473,Total threads: 1
Dashboard: http://127.0.0.1:34815/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:33043,
Local directory: /lscratch/kostya/dask-scratch-space/worker-xofg55un,Local directory: /lscratch/kostya/dask-scratch-space/worker-xofg55un

0,1
Comm: tcp://127.0.0.1:42547,Total threads: 1
Dashboard: http://127.0.0.1:39345/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:33573,
Local directory: /lscratch/kostya/dask-scratch-space/worker-3k6tss_y,Local directory: /lscratch/kostya/dask-scratch-space/worker-3k6tss_y

0,1
Comm: tcp://127.0.0.1:35837,Total threads: 1
Dashboard: http://127.0.0.1:45327/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:33709,
Local directory: /lscratch/kostya/dask-scratch-space/worker-29yxy8k_,Local directory: /lscratch/kostya/dask-scratch-space/worker-29yxy8k_

0,1
Comm: tcp://127.0.0.1:40487,Total threads: 1
Dashboard: http://127.0.0.1:33151/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:34763,
Local directory: /lscratch/kostya/dask-scratch-space/worker-40qlekd4,Local directory: /lscratch/kostya/dask-scratch-space/worker-40qlekd4

0,1
Comm: tcp://127.0.0.1:38591,Total threads: 1
Dashboard: http://127.0.0.1:35979/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42527,
Local directory: /lscratch/kostya/dask-scratch-space/worker-ozwj1sp0,Local directory: /lscratch/kostya/dask-scratch-space/worker-ozwj1sp0

0,1
Comm: tcp://127.0.0.1:37933,Total threads: 1
Dashboard: http://127.0.0.1:39799/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:45113,
Local directory: /lscratch/kostya/dask-scratch-space/worker-rmmdqlcp,Local directory: /lscratch/kostya/dask-scratch-space/worker-rmmdqlcp

0,1
Comm: tcp://127.0.0.1:43515,Total threads: 1
Dashboard: http://127.0.0.1:40141/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:41045,
Local directory: /lscratch/kostya/dask-scratch-space/worker-p5jj17a9,Local directory: /lscratch/kostya/dask-scratch-space/worker-p5jj17a9

0,1
Comm: tcp://127.0.0.1:36047,Total threads: 1
Dashboard: http://127.0.0.1:41527/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:34291,
Local directory: /lscratch/kostya/dask-scratch-space/worker-3qwr3kgk,Local directory: /lscratch/kostya/dask-scratch-space/worker-3qwr3kgk

0,1
Comm: tcp://127.0.0.1:33813,Total threads: 1
Dashboard: http://127.0.0.1:41657/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:40003,
Local directory: /lscratch/kostya/dask-scratch-space/worker-j6y16vfw,Local directory: /lscratch/kostya/dask-scratch-space/worker-j6y16vfw

0,1
Comm: tcp://127.0.0.1:39343,Total threads: 1
Dashboard: http://127.0.0.1:43435/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:33795,
Local directory: /lscratch/kostya/dask-scratch-space/worker-spn1x41q,Local directory: /lscratch/kostya/dask-scratch-space/worker-spn1x41q

0,1
Comm: tcp://127.0.0.1:39239,Total threads: 1
Dashboard: http://127.0.0.1:44545/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:36671,
Local directory: /lscratch/kostya/dask-scratch-space/worker-8aj14fot,Local directory: /lscratch/kostya/dask-scratch-space/worker-8aj14fot


  return _gamma(NH) * W * (1 - Z) ** (0.5 * (NK - 1)) * np.sqrt(0.5 * NH * Z)
  tan_2omega_tau = (S2 - 2 * S * C) / (C2 - (C * C - S * S))
  S2w = tan_2omega_tau / np.sqrt(1 + tan_2omega_tau * tan_2omega_tau)


In [None]:
cand_subset = cand_df.sort_values(by="period_0_false_alarm_prob", ascending=True)
cand_subset = cand_subset.query(
   "log10(period_0_false_alarm_prob) < -10"
   "and min_rel_period_diff < 0.001"
)
cand_subset

Unnamed: 0_level_0,diaObjectId,ra,dec,lc,nobs,nobs_r,amplitude_r,rchi2_r,g_period_0,g_period_s_to_n_0,g_period_0_false_alarm_prob,r_period_0,r_period_s_to_n_0,r_period_0_false_alarm_prob,i_period_0,i_period_s_to_n_0,i_period_0_false_alarm_prob,z_period_0,z_period_s_to_n_0,z_period_0_false_alarm_prob,best_period_band,period_0,period_0_false_alarm_prob,min_rel_period_diff
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1


In [None]:
import matplotlib.pyplot as plt

COLORS = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00',
          'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}

FOLDED = True

fig_path = Path("periodic_cand")
fig_path.mkdir(exist_ok=True, parents=True)

for healpix29, cand in cand_subset.iloc[:200].iterrows():
    phase = cand.lc["helioMjd"] % cand["period_0"] / cand["period_0"]
    mag = -2.5 * np.log10(cand.lc["psfFlux"]) + 31.4
    magerr = 2.5 / np.log(10) * cand.lc["psfFluxErr"] / cand.lc["psfFlux"]
    fig, (ax_mjd, ax_phase) = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    all_delta_mag = []
    for b in BANDS:
        idx = (cand.lc["band"] == b) & (magerr < 0.1)
        mean_band_mag = np.mean(mag[idx])
        delta_mag = mag[idx] - mean_band_mag
        all_delta_mag.append(delta_mag)
        errorbar_kwargs = dict(
            y=delta_mag,
            yerr=magerr[idx],
            fmt="o",
            color=COLORS[b],
            label=f'{b} $- {mean_band_mag:.2f}$',
            alpha=0.3,
        )
        ax_mjd.errorbar(cand.lc["helioMjd"][idx], **errorbar_kwargs)
        ax_phase.errorbar(phase[idx], **errorbar_kwargs)
    fig.suptitle(
        f"OID: {cand.objectId}, RA: {cand['coord_ra']:.5f}, Dec: {cand['coord_dec']:.5f}"
        rf"\nPeriod: {cand['period_0']:.5f}$\,$d, L—S lg(F-P): {np.log10(cand['period_0_false_alarm_prob']):.1f}"
    )
    ax_mjd.set_ylabel("mag - mean(mag)")

    ax_mjd.set_xlabel("MJD")
    ax_mjd.set_xlim(np.min(cand.lc["helioMjd"])-1, np.max(cand.lc["helioMjd"])+1)
    ax_phase.set_xlabel("Phase")
    ax_phase.set_xlim(0, 1)
    
    max_abs_ylim = max(np.abs(plt.ylim()))
    y_lim_min, y_lim_max = np.quantile(np.concatenate(all_delta_mag), [0.01, 0.99])
    plt.ylim(y_lim_min, y_lim_max)
    plt.gca().invert_yaxis()
    ax_mjd.plot(ax_mjd.get_xlim(), [0, 0], color='k', linestyle='--', alpha=0.5)
    ax_phase.plot(ax_phase.get_xlim(), [0, 0], color='k', linestyle='--', alpha=0.5)
    ax_mjd.legend(loc='upper left')
    ax_mjd.grid()
    ax_phase.grid()
    
    plt.savefig(fig_path / f"{release}-{cand.objectId}.pdf")

    print(cand.objectId)