## Calculate Rubin periods

This notebook runs the LombScargle for all the variable objects we found in Rubin.

In [1]:
import lsdb
import light_curve as licu
import numpy as np
import pandas as pd

from nested_pandas import NestedDtype
from pathlib import Path

### Load our objects

In [2]:
# We add the index as a column of the DataFrame so we can later retrieve
# all the Rubin data from difference and science imaging
variables_df = pd.read_csv("periodic_objects.csv").reset_index()
# Transform the DataFrame into a LSDB Catalog
variables_catalog = lsdb.from_dataframe(variables_df)

### Load the Rubin data

In [3]:
def cast_nested(df, columns):
    return df.assign(
        **{
            col: df[col].astype(NestedDtype.from_pandas_arrow_dtype(df.dtypes[col]))
            for col in columns
        },
    )

In [6]:
drp_release = "w_2025_10"
base_dir = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats")
hats_dir = base_dir / drp_release

In [7]:
object_lc = lsdb.read_hats(
    hats_dir / "object_lc",
    margin_cache=hats_dir / "object_lc_5arcs",
    columns=["objectId", "coord_ra", "coord_dec", "forcedSource"],
)
# We use the `cast_nested` utility method to cast columns into the NestedFrame type
object_lc = object_lc.map_partitions(cast_nested, columns=["forcedSource"])
# Get the Rubin data for our objects
object_lc = variables_catalog.crossmatch(object_lc, radius_arcsec=0.2, suffixes=["", ""])

### Filter out invalid light curves

Let's drop all observations with nan PSF and objects with light curves of less than 10 observations.

In [8]:
object_lc = object_lc.dropna(
    subset=["forcedSource.psfMag", "forcedSource.psfMagErr"]
).dropna(subset=["forcedSource"])

Filter to r-band only:

In [9]:
object_lc = object_lc.query("forcedSource.band == 'r'")

And make sure we have enough observations per light curve:

In [10]:
nobs = object_lc.reduce(
    lambda mjd: {"nobs": mjd.size}, "forcedSource.midpointMjdTai", meta={"nobs": int}
)
object_lc = object_lc[nobs["nobs"] > 10]

### Run the LombScargle

To compute the periods for each object in the Rubin data.

In [None]:
extractor = licu.Extractor(
    licu.Periodogram(
        peaks=1,
        max_freq_factor=10.0,
        resolution=50.0,
    ),
    licu.WeightedMean(),  # Mean magnitude
    licu.Eta(),  # Von Neumann's eta statistics
    licu.ExcessVariance(),  # Excess variance statistics
    licu.Amplitude(),  # 0.5 * [max(mag) - min(mag)]
)


# light-curve package requires all arrays to be the same dtype.
# It also requires the time array to be ordered and to have no duplicates.
def extract_features(mjd, mag, magerr, **kwargs):
    # We offset date, so we still would have <1 second precision
    t = np.asarray(mjd - 60000, dtype=np.float64)
    _, sort_index = np.unique(t, return_index=True)
    features = extractor(
        t[sort_index],
        mag[sort_index],
        magerr[sort_index],
        **kwargs,
    )
    # Return the features as a dictionary
    return dict(zip(extractor.names, features))


features = object_lc.reduce(
    extract_features,
    "forcedSource.midpointMjdTai",
    "forcedSource.psfMag",
    "forcedSource.psfMagErr",
    meta={name: np.float64 for name in extractor.names},
    append_columns=True,
)
features

In [12]:
features.compute()

In [13]:
# Save all calculated Rubin periods to disk
features.to_hats("rubin_periods", catalog_name="rubin_periods")