# Generate index catalogs

Index catalogs help us support quick object access by ID, similarly to how one would use `loc` in a pandas DataFrame.

In [1]:
import os
import hats
import lsdb
import tempfile
import hats_import.pipeline as runner

from dask.distributed import Client
from hats_import.index.arguments import IndexArguments
from nested_pandas import NestedDtype
from pathlib import Path

In [2]:
DRP_VERSION = os.environ["DRP_VERSION"]
print(f"DRP_VERSION: {DRP_VERSION}")
base_output_dir = Path(f"/sdf/data/rubin/shared/lsdb_commissioning")
hats_dir = base_output_dir / "hats" / DRP_VERSION

DRP_VERSION: w_2025_09


In [3]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=4, threads_per_worker=1, local_directory=tmp_dir)

In [4]:
args = IndexArguments(
    input_catalog_path=hats_dir / "diaObject_lc",
    indexing_column="diaObjectId",
    output_path=hats_dir,
    output_artifact_name="diaObject_lc_index",
)
runner.pipeline_with_client(args, client)

Finishing :   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
args = IndexArguments(
    input_catalog_path=hats_dir / "object_lc",
    indexing_column="objectId",
    output_path=hats_dir,
    output_artifact_name="object_lc_index",
)
runner.pipeline_with_client(args, client)

Finishing :   0%|          | 0/3 [00:00<?, ?it/s]

Let's load `object_lc` and look for object of ID `2132480811839010201`:

In [6]:
test_object_id = 2132480811839010201

In [7]:
object_lc = lsdb.read_hats(hats_dir / "object_lc").map_partitions(
    lambda df: df.assign(
        **{
            "forcedSource": df["forcedSource"].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes["forcedSource"])
            )
        }
    )
)
object_lc

Unnamed: 0_level_0,objectId,refFwhm,shape_flag,sky_object,parentObjectId,detect_isPrimary,x,y,xErr,yErr,shape_yy,shape_xx,shape_xy,coord_ra,coord_dec,coord_raErr,coord_decErr,tract,patch,detect_isIsolated,u_psfFlux,u_psfFluxErr,u_kronFlux,u_kronFluxErr,u_kronRad,g_psfFlux,g_psfFluxErr,g_kronFlux,g_kronFluxErr,g_kronRad,r_psfFlux,r_psfFluxErr,r_kronFlux,r_kronFluxErr,r_kronRad,i_psfFlux,i_psfFluxErr,i_kronFlux,i_kronFluxErr,i_kronRad,z_psfFlux,z_psfFluxErr,z_kronFlux,z_kronFluxErr,z_kronRad,y_psfFlux,y_psfFluxErr,y_kronFlux,y_kronFluxErr,y_kronRad,u_psfMag,u_psfMagErr,u_kronMag,u_kronMagErr,g_psfMag,g_psfMagErr,g_kronMag,g_kronMagErr,r_psfMag,r_psfMagErr,r_kronMag,r_kronMagErr,i_psfMag,i_psfMagErr,i_kronMag,i_kronMagErr,z_psfMag,z_psfMagErr,z_kronMag,z_kronMagErr,y_psfMag,y_psfMagErr,y_kronMag,y_kronMagErr,Norder,Dir,Npix,forcedSource
npartitions=213,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1
"Order: 5, Pixel: 32",int64[pyarrow],double[pyarrow],bool[pyarrow],bool[pyarrow],int64[pyarrow],bool[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],int64[pyarrow],bool[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],uint8[pyarrow],uint64[pyarrow],uint64[pyarrow],"nested<parentObjectId: [int64], coord_ra: [dou..."
"Order: 7, Pixel: 544",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 35970",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 35971",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [8]:
object_lc_index = hats.read_hats(hats_dir / "object_lc_index")
object_lc_index.schema

_healpix_29: int64
Norder: uint8
Dir: uint64
Npix: uint64
objectId: int64
-- schema metadata --
pandas: '{"index_columns": ["objectId"], "column_indexes": [{"name": null' + 757

In [9]:
%%time
test_object = object_lc.index_search([test_object_id], object_lc_index).compute()

CPU times: user 1.1 s, sys: 76.3 ms, total: 1.17 s
Wall time: 2.92 s


Comparing the previous execution with that of `Catalog.query`:

In [10]:
%%time
test_object = object_lc.query(f"objectId == {test_object_id}").compute()

CPU times: user 8.2 s, sys: 678 ms, total: 8.88 s
Wall time: 1min 16s


In [11]:
client.close()
tmp_path.cleanup()