## Nesting

Nest source and forced source in the DIA object catalog.

In [None]:
import tempfile

from dask.distributed import Client
from hats_import import pipeline_with_client
from hats_import.catalog import ImportArguments
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments
from pathlib import Path

In [None]:
ppdb_dir = Path("/sdf/scratch/rubin/ppdb/data/lsstcam")
hats_dir = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats/PPDB_v1")

In [None]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = Path(tmp_path.name)
client = Client(n_workers=16, threads_per_worker=1, local_directory=tmp_dir)

Prepare a margin cache for the source catalogs so that we can nest without losing data: 

In [None]:
margin_radius_arcsec = 2

In [None]:
args = MarginCacheArguments(
    input_catalog_path=hats_dir / "dia_source",
    output_path=tmp_dir,
    margin_threshold=margin_radius_arcsec,
    output_artifact_name=f"dia_source_{margin_radius_arcsec}arcs",
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

In [None]:
args = MarginCacheArguments(
    input_catalog_path=hats_dir / "dia_forced_source",
    output_path=tmp_dir,
    margin_threshold=margin_radius_arcsec,
    output_artifact_name=f"dia_forced_source_{margin_radius_arcsec}arcs",
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

Load the base catalogs, nest, and then use `map_partitions` to sort the sources chronologically:

In [None]:
dia_object_cat = lsdb.read_hats(hats_dir / "dia_object")

dia_source_cat = lsdb.read_hats(
    hats_dir / "dia_source",
    margin_cache=Path(tmp_dir) / f"dia_source_{margin_radius_arcsec}arcs",
)

dia_forced_source_cat = lsdb.read_hats(
    hats_dir / "dia_forced_source",
    margin_cache=Path(tmp_dir) / f"dia_forced_source_{margin_radius_arcsec}arcs",
)

In [None]:
def sort_nested_sources(df, source_cols):
    mjd_col = "midpointMjdTai"
    for source_col in source_cols:
        flat_sources = df[source_col].nest.to_flat()
        df = df.drop(columns=[source_col])
        df = df.add_nested(
            flat_sources.sort_values([flat_sources.index.name, mjd_col]), source_col
        )
    return df

In [None]:
dia_object_cat_nested = dia_object_cat.join_nested(
    dia_source_cat,
    left_on="diaObjectId",
    right_on="diaObjectId",
    nested_column_name="diaSource",
).join_nested(
    dia_forced_source_cat,
    left_on="diaObjectId",
    right_on="diaObjectId",
    nested_column_name="diaForcedSource",
).map_partitions(
    lambda x: sort_nested_sources(x, source_cols=["diaSource", "diaForcedSource"])
)
dia_object_cat_nested

Save the result to disk, setting our desired default columns:

In [None]:
hats_cols_default = ",".join("""dec
decErr
diaObjectId
ra
raErr
u_psfFluxMean
g_psfFluxMean
r_psfFluxMean
i_psfFluxMean
z_psfFluxMean
y_psfFluxMean
u_psfFluxMeanErr
g_psfFluxMeanErr
r_psfFluxMeanErr
i_psfFluxMeanErr
z_psfFluxMeanErr
y_psfFluxMeanErr
nDiaSources
validityStart
diaSource.apFlux
diaSource.apFluxErr
diaSource.band
diaSource.dec
diaSource.decErr
diaSource.detector
diaSource.diaSourceId
diaSource.isDipole
diaSource.ixx
diaSource.ixxPSF
diaSource.iyy
diaSource.iyyPSF
diaSource.ixy
diaSource.ixyPSF
diaSource.midpointMjdTai
diaSource.psfFlux
diaSource.psfFluxErr
diaSource.scienceFlux
diaSource.scienceFluxErr
diaSource.ra
diaSource.raErr
diaSource.visit
diaSource.x
diaSource.xErr
diaSource.y
diaSource.yErr
diaSource.scienceMag
diaSource.scienceMagErr
diaForcedSource
""".splitlines())

In [None]:
dia_object_cat_nested.to_hats(hats_dir / "dia_object_lc_intermediate", catalog_name="dia_object_lc")

Finally, reimport with better row group strategy:

In [None]:
args = ImportArguments.reimport_from_hats(
    hats_dir / "dia_object_lc_intermediate",
    output_dir=hats_dir,
    highest_healpix_order=11,
    pixel_threshold=15_000,
    skymap_alt_orders=[2, 4, 6],
    row_group_kwargs={"subtile_order_delta": 1},
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

In [None]:
%rm -rf $hats_dir/dia_object
%rm -rf $hats_dir/dia_source
%rm -rf $hats_dir/dia_forced_source
%rm -rf $hats_dir/dia_object_lc_intermediate

In [None]:
client.close()