# Generate index catalogs

Index catalogs help us support quick object access by ID, similarly to how one would use `loc` in a pandas DataFrame.

In [None]:
import os
import hats
import lsdb
import tempfile
import hats_import.pipeline as runner

from dask.distributed import Client
from hats_import.index.arguments import IndexArguments
from nested_pandas import NestedDtype
from pathlib import Path

In [None]:
DRP_VERSION = os.environ["DRP_VERSION"]
print(f"DRP_VERSION: {DRP_VERSION}")
base_output_dir = Path(f"/sdf/data/rubin/shared/lsdb_commissioning")
hats_dir = base_output_dir / "hats" / DRP_VERSION

In [None]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=4, threads_per_worker=1, local_directory=tmp_dir)

In [None]:
args = IndexArguments(
    input_catalog_path=hats_dir / "diaObject_lc",
    indexing_column="diaObjectId",
    output_path=hats_dir,
    output_artifact_name="diaObject_lc_index",
)
runner.pipeline_with_client(args, client)

In [None]:
args = IndexArguments(
    input_catalog_path=hats_dir / "object_lc",
    indexing_column="objectId",
    output_path=hats_dir,
    output_artifact_name="object_lc_index",
)
runner.pipeline_with_client(args, client)

Let's load `object_lc` and look for object of ID `2132480811839010201`:

In [None]:
test_object_id = 2132480811839010201

In [None]:
object_lc = lsdb.read_hats(hats_dir / "object_lc").map_partitions(
    lambda df: df.assign(
        **{
            "forcedSource": df["forcedSource"].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes["forcedSource"])
            )
        }
    )
)
object_lc

In [None]:
object_lc_index = hats.read_hats(hats_dir / "object_lc_index")
object_lc_index.schema

In [None]:
%%time
test_object = object_lc.index_search([test_object_id], object_lc_index).compute()

Comparing the previous execution with that of `Catalog.query`:

In [None]:
%%time
test_object = object_lc.query(f"objectId == {test_object_id}").compute()

In [None]:
client.close()
tmp_path.cleanup()