## Find variable objects in Rubin

This notebook finds the closest matches for our 10 variable objects in Rubin.

In [None]:
import lsdb
import numpy as np
import pandas as pd
import pyarrow as pa

from astropy.timeseries import LombScargle
from nested_pandas import NestedDtype
from pathlib import Path

In [None]:
# We add the index as a column of the DataFrame so we can later retrieve
# all the Rubin data from difference and science imaging
variables_df = pd.read_csv("periodic_objects.csv").reset_index()
# Transform the DataFrame into a LSDB Catalog
variables_catalog = lsdb.from_dataframe(variables_df)

In [None]:
drp_release = "w_2025_10"
base_dir = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats")
hats_dir = base_dir / drp_release

In [None]:
object_lc_x_ps1 = lsdb.read_hats(hats_dir / "object_lc_x_ps1")
object_lc_x_ps1

In [None]:
def cast_nested(df, columns):
    return df.assign(
        **{
            col: df[col].astype(NestedDtype.from_pandas_arrow_dtype(df.dtypes[col]))
            for col in columns
        },
    )

In [None]:
object_lc = lsdb.read_hats(
    hats_dir / "object_lc",
    margin_cache=hats_dir / "object_lc_5arcs",
    columns=["objectId", "coord_ra", "coord_dec", "forcedSource"],
)
# We use the `cast_nested` utility method to cast columns into the NestedFrame type
object_lc = object_lc.map_partitions(cast_nested, columns=["forcedSource"])

In [None]:
# Get the Rubin data for our objects
variables_catalog = variables_catalog.crossmatch(object_lc, suffixes=("", ""))

In [None]:
# Use the "r" band only to get the periods
r_band = variables_catalog.query("forcedSource.band == 'r'")

In [None]:
def compute_periods(index_rubin, true_period, mjd, flux):
    # Define a freq grid to search around the true period/frequency
    true_freq = 1 / true_period
    frequency = np.linspace(0.9 * true_freq, 1.1 * true_freq, 100)
    power = LombScargle(mjd, flux).power(frequency)
    maxpower = np.max(power)
    period = 1 / frequency[np.argmax(power)]
    return {"index": index_rubin, "maxpower": maxpower, "period": period}

In [None]:
meta = {
    "index": pd.ArrowDtype(pa.int64()),
    "maxpower": pd.ArrowDtype(pa.float64()),
    "period": pd.ArrowDtype(pa.float64()),
}

periods = r_band.reduce(
    compute_periods,
    "index",
    "true_period",
    "forcedSource.midpointMjdTai",
    "forcedSource.psfFlux",
    meta=meta,
)

In [None]:
results = variables_catalog.join(
    periods, left_on="index", right_on="index", suffixes=("", "")
)

In [None]:
# Save all calculated Rubin periods to disk
results.to_hats("rubin_variables", catalog_name="rubin_variables")