## Nest light curves

Nest source and forced source in the DIA object catalog.

In [1]:
import lsdb
import os

from dask.distributed import Client
from hats_import import pipeline_with_client
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments
from pathlib import Path

In [2]:
# Path to the RAW parquet data
PPDB_DIR = Path("/sdf/scratch/rubin/ppdb/data/lsstcam")

# Paths to the target OUTPUT directories
TMP_DIR = Path(os.environ["OUTPUT_DIR"]) / "tmp"

In [3]:
client = Client(n_workers=16, threads_per_worker=1, local_directory=TMP_DIR)

Prepare a margin cache for the source catalogs so that we can nest without losing data: 

In [None]:
def load_sources_with_margin(dataset_type):
    margin_threshold = 5
    main_catalog_path = TMP_DIR / dataset_type
    margin_name = f"{dataset_type}_{margin_threshold}arcs"
    try:
        args = MarginCacheArguments(
            input_catalog_path=main_catalog_path,
            output_path=TMP_DIR,
            margin_threshold=margin_threshold,
            output_artifact_name=margin_name,
            simple_progress_bar=True,
            resume=False,
        )
        pipeline_with_client(args, client)
    except ValueError as e:
        print(dataset_type, e)
        catalog = lsdb.read_hats(main_catalog_path)
    else:
        catalog = lsdb.read_hats(main_catalog_path, margin_cache=TMP_DIR / margin_name)
    return catalog

In [5]:
dia_source = load_sources_with_margin("dia_source")
dia_forced_source = load_sources_with_margin("dia_forced_source")

Planning  : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:05<00:00,  2.00s/it]
Mapping   : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.94s/it]
Binning   :   0%|                                                                                                                                                                                                                                                                                       

dia_source Margin cache contains no rows. Increase margin size and re-run.


Planning  : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.19it/s]
Mapping   : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.95s/it]
Binning   :   0%|                                                                                                                                                                                                                                                                                       

dia_forced_source Margin cache contains no rows. Increase margin size and re-run.


Nest and use `map_partitions` to sort the sources chronologically:

In [6]:
dia_object = lsdb.read_hats(TMP_DIR / "dia_object")

In [7]:
def sort_nested_sources(df, source_cols):
    mjd_col = "midpointMjdTai"
    for source_col in source_cols:
        flat_sources = df[source_col].nest.to_flat()
        df = df.drop(columns=[source_col])
        df = df.add_nested(
            flat_sources.sort_values([flat_sources.index.name, mjd_col]), source_col
        )
    return df

In [8]:
dia_object_nested = (
    dia_object.join_nested(
        dia_source,
        left_on="diaObjectId",
        right_on="diaObjectId",
        nested_column_name="diaSource",
    )
    .join_nested(
        dia_forced_source,
        left_on="diaObjectId",
        right_on="diaObjectId",
        nested_column_name="diaForcedSource",
    )
    .map_partitions(
        lambda x: sort_nested_sources(x, source_cols=["diaSource", "diaForcedSource"])
    )
)
dia_object_nested



Unnamed: 0_level_0,diaObjectId,validityStart,dec,decErr,g_fpFluxMean,g_fpFluxMeanErr,g_fpFluxSigma,g_psfFluxChi2,g_psfFluxErrMean,g_psfFluxLinearIntercept,g_psfFluxLinearSlope,g_psfFluxMAD,g_psfFluxMax,g_psfFluxMaxSlope,g_psfFluxMean,g_psfFluxMeanErr,g_psfFluxMin,g_psfFluxNdata,g_psfFluxPercentile05,g_psfFluxPercentile25,g_psfFluxPercentile50,g_psfFluxPercentile75,g_psfFluxPercentile95,g_psfFluxSigma,g_psfFluxSkew,g_psfFluxStetsonJ,g_scienceFluxMean,g_scienceFluxMeanErr,g_scienceFluxSigma,i_fpFluxMean,i_fpFluxMeanErr,i_fpFluxSigma,i_psfFluxChi2,i_psfFluxErrMean,i_psfFluxLinearIntercept,i_psfFluxLinearSlope,i_psfFluxMAD,i_psfFluxMax,i_psfFluxMaxSlope,i_psfFluxMean,i_psfFluxMeanErr,i_psfFluxMin,i_psfFluxNdata,i_psfFluxPercentile05,i_psfFluxPercentile25,i_psfFluxPercentile50,i_psfFluxPercentile75,i_psfFluxPercentile95,i_psfFluxSigma,i_psfFluxSkew,i_psfFluxStetsonJ,i_scienceFluxMean,i_scienceFluxMeanErr,i_scienceFluxSigma,lastNonForcedSource,nDiaSources,nearbyExtObj1,nearbyExtObj1Sep,nearbyExtObj2,nearbyExtObj2Sep,nearbyExtObj3,nearbyExtObj3Sep,nearbyLowzGal,nearbyLowzGalSep,nearbyObj1,nearbyObj1Dist,nearbyObj1LnP,nearbyObj2,nearbyObj2Dist,nearbyObj2LnP,nearbyObj3,nearbyObj3Dist,nearbyObj3LnP,parallax,parallaxErr,pmDec,pmDecErr,pmDec_parallax_Cov,pmParallaxChi2,pmParallaxLnL,pmParallaxNdata,pmRa,pmRaErr,pmRa_parallax_Cov,pmRa_pmDec_Cov,r_fpFluxMean,r_fpFluxMeanErr,r_fpFluxSigma,r_psfFluxChi2,r_psfFluxErrMean,r_psfFluxLinearIntercept,r_psfFluxLinearSlope,r_psfFluxMAD,r_psfFluxMax,r_psfFluxMaxSlope,r_psfFluxMean,r_psfFluxMeanErr,r_psfFluxMin,r_psfFluxNdata,r_psfFluxPercentile05,r_psfFluxPercentile25,r_psfFluxPercentile50,r_psfFluxPercentile75,r_psfFluxPercentile95,r_psfFluxSigma,r_psfFluxSkew,r_psfFluxStetsonJ,r_scienceFluxMean,r_scienceFluxMeanErr,r_scienceFluxSigma,ra,raErr,ra_dec_Cov,radecMjdTai,u_fpFluxMean,u_fpFluxMeanErr,u_fpFluxSigma,u_psfFluxChi2,u_psfFluxErrMean,u_psfFluxLinearIntercept,u_psfFluxLinearSlope,u_psfFluxMAD,u_psfFluxMax,u_psfFluxMaxSlope,u_psfFluxMean,u_psfFluxMeanErr,u_psfFluxMin,u_psfFluxNdata,u_psfFluxPercentile05,u_psfFluxPercentile25,u_psfFluxPercentile50,u_psfFluxPercentile75,u_psfFluxPercentile95,u_psfFluxSigma,u_psfFluxSkew,u_psfFluxStetsonJ,u_scienceFluxMean,u_scienceFluxMeanErr,u_scienceFluxSigma,validityEnd,y_fpFluxMean,y_fpFluxMeanErr,y_fpFluxSigma,y_psfFluxChi2,y_psfFluxErrMean,y_psfFluxLinearIntercept,y_psfFluxLinearSlope,y_psfFluxMAD,y_psfFluxMax,y_psfFluxMaxSlope,y_psfFluxMean,y_psfFluxMeanErr,y_psfFluxMin,y_psfFluxNdata,y_psfFluxPercentile05,y_psfFluxPercentile25,y_psfFluxPercentile50,y_psfFluxPercentile75,y_psfFluxPercentile95,y_psfFluxSigma,y_psfFluxSkew,y_psfFluxStetsonJ,y_scienceFluxMean,y_scienceFluxMeanErr,y_scienceFluxSigma,z_fpFluxMean,z_fpFluxMeanErr,z_fpFluxSigma,z_psfFluxChi2,z_psfFluxErrMean,z_psfFluxLinearIntercept,z_psfFluxLinearSlope,z_psfFluxMAD,z_psfFluxMax,z_psfFluxMaxSlope,z_psfFluxMean,z_psfFluxMeanErr,z_psfFluxMin,z_psfFluxNdata,z_psfFluxPercentile05,z_psfFluxPercentile25,z_psfFluxPercentile50,z_psfFluxPercentile75,z_psfFluxPercentile95,z_psfFluxSigma,z_psfFluxSkew,z_psfFluxStetsonJ,z_scienceFluxMean,z_scienceFluxMeanErr,z_scienceFluxSigma,u_scienceMagMean,u_scienceMagMeanErr,g_scienceMagMean,g_scienceMagMeanErr,r_scienceMagMean,r_scienceMagMeanErr,i_scienceMagMean,i_scienceMagMeanErr,z_scienceMagMean,z_scienceMagMeanErr,y_scienceMagMean,y_scienceMagMeanErr,diaSource,diaForcedSource
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1
"Order: 0, Pixel: 6",int64[pyarrow],"timestamp[ms, tz=UTC][pyarrow]",double[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],"timestamp[ms, tz=UTC][pyarrow]",int32[pyarrow],int64[pyarrow],float[pyarrow],int64[pyarrow],float[pyarrow],int64[pyarrow],float[pyarrow],null[pyarrow],float[pyarrow],int64[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],"timestamp[ms, tz=UTC][pyarrow]",float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],int32[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],float[pyarrow],"nested<diaSourceId: [int64], apFlux: [float], ...","nested<visit: [int64], detector: [int16], band..."


Save the new data catalog to disk:

In [9]:
dia_object_nested.to_hats(
    TMP_DIR / "new_dia_object_lc", catalog_name="dia_object_lc", as_collection=False
)

Free up some space from intermediate catalogs that are no longer needed:

In [10]:
%rm -rf $TMP_DIR/dia_object*
%rm -rf $TMP_DIR/dia_source*
%rm -rf $TMP_DIR/dia_forced_source*

In [11]:
client.close()