# Nesting

Create catalogs for `dia_object` and `object` with nested sources and forced sources.

In [None]:
import os
import lsdb
import tempfile

from pathlib import Path
from dask.distributed import Client
from hats_import import pipeline_with_client
from hats_import.catalog import ImportArguments
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments

In [None]:
VERSION = os.environ["VERSION"]
OUTPUT_DIR = Path(os.environ["OUTPUT_DIR"])

print(f"VERSION: {VERSION}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

raw_dir = OUTPUT_DIR / "raw" / VERSION
hats_dir = OUTPUT_DIR / "hats" / VERSION

In [None]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=16, threads_per_worker=1, local_directory=tmp_dir)

In [None]:
def sort_nested_sources(df, source_cols):
    mjd_col = "midpointMjdTai"
    for source_col in source_cols:
        flat_sources = df[source_col].nest.to_flat()
        df = df.drop(columns=[source_col])
        df = df.add_nested(
            flat_sources.sort_values([flat_sources.index.name, mjd_col]), source_col
        )
    return df

### Generate margin caches

To nest the sources accurately we need to generate intermediate margin caches for those catalogs. They will be temporarily stored in a scratch directory and automatically erased at the end of the notebook.

In [None]:
margin_radius_arcsec = 2

In [None]:
args = MarginCacheArguments(
    input_catalog_path=hats_dir / "dia_source",
    output_path=tmp_dir,
    margin_threshold=margin_radius_arcsec,
    output_artifact_name=f"dia_source_{margin_radius_arcsec}arcs",
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

In [None]:
args = MarginCacheArguments(
    input_catalog_path=hats_dir / "dia_object_forced_source",
    output_path=tmp_dir,
    margin_threshold=margin_radius_arcsec,
    output_artifact_name=f"dia_object_forced_source_{margin_radius_arcsec}arcs",
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

In [None]:
args = MarginCacheArguments(
    input_catalog_path=hats_dir / "object_forced_source",
    output_path=tmp_dir,
    margin_threshold=margin_radius_arcsec,
    output_artifact_name=f"object_forced_source_{margin_radius_arcsec}arcs",
    simple_progress_bar=True,
    resume=False,
)
pipeline_with_client(args, client)

### dia_object with nested sources

In [None]:
dia_object_cat = lsdb.read_hats(hats_dir / "dia_object")

dia_source_cat = lsdb.read_hats(
    hats_dir / "dia_source",
    margin_cache=Path(tmp_dir) / f"dia_source_{margin_radius_arcsec}arcs",
)

dia_object_forced_source_cat = lsdb.read_hats(
    hats_dir / "dia_object_forced_source",
    margin_cache=Path(tmp_dir) / f"dia_object_forced_source_{margin_radius_arcsec}arcs",
)

In [None]:
dia_object_cat_nested = dia_object_cat.join_nested(
    dia_source_cat,
    left_on="diaObjectId",
    right_on="diaObjectId",
    nested_column_name="diaSource",
).join_nested(
    dia_object_forced_source_cat,
    left_on="diaObjectId",
    right_on="diaObjectId",
    nested_column_name="diaObjectForcedSource",
)
dia_object_cat_nested

Also, for each object, sort sources by timestamp:

In [None]:
dia_object_cat_nested = dia_object_cat_nested.map_partitions(
    lambda x: sort_nested_sources(x, source_cols=["diaSource", "diaObjectForcedSource"])
)

And save the resulting catalog to disk:

In [None]:
dia_object_cat_nested.to_hats(hats_dir / "dia_object_lc_intermediate", catalog_name="dia_object_lc")

Finally, reimport with a new threshold, and select the columns to be loaded by default:

In [None]:
hats_cols_default = ",".join("""dec
diaObjectForcedSource.band
diaObjectForcedSource.coord_dec
diaObjectForcedSource.coord_ra
diaObjectForcedSource.diff_PixelFlags_nodataCenter
diaObjectForcedSource.forcedSourceOnDiaObjectId
diaObjectForcedSource.invalidPsfFlag
diaObjectForcedSource.midpointMjdTai
diaObjectForcedSource.pixelFlags_bad
diaObjectForcedSource.pixelFlags_cr
diaObjectForcedSource.pixelFlags_crCenter
diaObjectForcedSource.pixelFlags_edge
diaObjectForcedSource.pixelFlags_interpolated
diaObjectForcedSource.pixelFlags_interpolatedCenter
diaObjectForcedSource.pixelFlags_nodata
diaObjectForcedSource.pixelFlags_saturated
diaObjectForcedSource.pixelFlags_saturatedCenter
diaObjectForcedSource.pixelFlags_suspect
diaObjectForcedSource.pixelFlags_suspectCenter
diaObjectForcedSource.psfDiffFlux
diaObjectForcedSource.psfDiffFlux_flag
diaObjectForcedSource.psfDiffFluxErr
diaObjectForcedSource.psfFlux
diaObjectForcedSource.psfFlux_flag
diaObjectForcedSource.psfFluxErr
diaObjectForcedSource.psfMag
diaObjectForcedSource.psfMagErr
diaObjectForcedSource.visit
diaObjectId
diaSource.band
diaSource.centroid_flag
diaSource.coord_dec
diaSource.coord_ra
diaSource.dec
diaSource.decErr
diaSource.diaSourceId
diaSource.forced_PsfFlux_flag
diaSource.forced_PsfFlux_flag_edge
diaSource.forced_PsfFlux_flag_noGoodPixels
diaSource.midpointMjdTai
diaSource.pixelFlags
diaSource.pixelFlags_bad
diaSource.pixelFlags_cr
diaSource.pixelFlags_crCenter
diaSource.pixelFlags_edge
diaSource.pixelFlags_interpolated
diaSource.pixelFlags_interpolatedCenter
diaSource.pixelFlags_nodata
diaSource.pixelFlags_nodataCenter
diaSource.pixelFlags_offimage
diaSource.pixelFlags_saturated
diaSource.pixelFlags_saturatedCenter
diaSource.pixelFlags_streak
diaSource.pixelFlags_streakCenter
diaSource.pixelFlags_suspect
diaSource.pixelFlags_suspectCenter
diaSource.psfFlux
diaSource.psfFlux_flag
diaSource.psfFlux_flag_edge
diaSource.psfFlux_flag_noGoodPixels
diaSource.psfFluxErr
diaSource.psfMag
diaSource.psfMagErr
diaSource.ra
diaSource.raErr
diaSource.reliability
diaSource.scienceFlux
diaSource.scienceFluxErr
diaSource.scienceMag
diaSource.scienceMagErr
diaSource.shape_flag
diaSource.shape_flag_no_pixels
diaSource.shape_flag_not_contained
diaSource.shape_flag_parent_source
diaSource.snr
diaSource.trail_flag_edge
diaSource.visit
diaSource.x
diaSource.xErr
diaSource.y
diaSource.yErr
nDiaSources
ra
radecMjdTai
tract
""".splitlines())

In [None]:
args = ImportArguments.reimport_from_hats(
    hats_dir / "dia_object_lc_intermediate",
    output_dir=hats_dir,
    highest_healpix_order=11,
    pixel_threshold=15_000,
    skymap_alt_orders=[2, 4, 6],
    row_group_kwargs={"subtile_order_delta": 1},
    addl_hats_properties={"hats_cols_default": hats_cols_default},
)
pipeline_with_client(args, client)

In [None]:
%rm -r $hats_dir/dia_object_lc_intermediate

### object with nested sources

In [None]:
object_cat = lsdb.read_hats(hats_dir / "object")

object_forced_source_cat = lsdb.read_hats(
    hats_dir / "object_forced_source",
    margin_cache=Path(tmp_dir) / f"object_forced_source_{margin_radius_arcsec}arcs",
)

In [None]:
object_cat_nested = object_cat.join_nested(
    object_forced_source_cat,
    left_on="objectId",
    right_on="objectId",
    nested_column_name="objectForcedSource",
)
object_cat_nested

Also, for each object, sort sources by timestamp:

In [None]:
object_cat_nested = object_cat_nested.map_partitions(
    lambda x: sort_nested_sources(x, source_cols=["objectForcedSource"])
)

And save the resulting catalog to disk:

In [None]:
object_cat_nested.to_hats(hats_dir / "object_lc_intermediate", catalog_name="object_lc")

Finally, reimport with a new threshold, and select the columns to be loaded by default:

In [None]:
hats_cols_default = ",".join("""coord_dec
coord_decErr
coord_ra
coord_raErr
g_psfFlux
g_psfFluxErr
g_psfMag
g_psfMagErr
i_psfFlux
i_psfFluxErr
i_psfMag
i_psfMagErr
objectForcedSource.band
objectForcedSource.coord_dec
objectForcedSource.coord_ra
objectForcedSource.detector
objectForcedSource.forcedSourceId
objectForcedSource.invalidPsfFlag
objectForcedSource.midpointMjdTai
objectForcedSource.pixelFlags_bad
objectForcedSource.pixelFlags_cr
objectForcedSource.pixelFlags_crCenter
objectForcedSource.pixelFlags_edge
objectForcedSource.pixelFlags_interpolated
objectForcedSource.pixelFlags_interpolatedCenter
objectForcedSource.pixelFlags_nodata
objectForcedSource.pixelFlags_saturated
objectForcedSource.pixelFlags_saturatedCenter
objectForcedSource.pixelFlags_suspect
objectForcedSource.pixelFlags_suspectCenter
objectForcedSource.psfDiffFlux
objectForcedSource.psfDiffFlux_flag
objectForcedSource.psfDiffFluxErr
objectForcedSource.psfFlux
objectForcedSource.psfFlux_flag
objectForcedSource.psfFluxErr
objectForcedSource.psfMag
objectForcedSource.psfMagErr
objectForcedSource.visit
objectId
patch
r_psfFlux
r_psfFluxErr
r_psfMag
r_psfMagErr
refBand
refFwhm
shape_flag
shape_xx
shape_xy
shape_yy
tract
u_psfFlux
u_psfFluxErr
u_psfMag
u_psfMagErr
x
xErr
y
y_psfFlux
y_psfFluxErr
y_psfMag
y_psfMagErr
yErr
z_psfFlux
z_psfFluxErr
z_psfMag
z_psfMagErr
""".splitlines())

In [None]:
args = ImportArguments.reimport_from_hats(
    hats_dir / "object_lc_intermediate",
    output_dir=hats_dir,
    highest_healpix_order=11,
    pixel_threshold=15_000,
    skymap_alt_orders=[2, 4, 6],
    row_group_kwargs={"subtile_order_delta": 1},
    addl_hats_properties={"hats_cols_default": hats_cols_default},
)
pipeline_with_client(args, client)

In [None]:
%rm -r $hats_dir/object_lc_intermediate

In [None]:
client.close()
tmp_path.cleanup()