# Generate index catalogs

Index catalogs help us support quick object access by ID, similarly to how one would use `loc` in a pandas DataFrame.

In [1]:
import os
import tempfile
import hats_import.pipeline as runner

from dask.distributed import Client
from hats_import.index.arguments import IndexArguments
from pathlib import Path

In [2]:
DRP_VERSION = os.environ["DRP_VERSION"]
print(f"DRP_VERSION: {DRP_VERSION}")
base_output_dir = Path(f"/sdf/data/rubin/shared/lsdb_commissioning")
hats_dir = base_output_dir / "hats" / DRP_VERSION

In [None]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=4, threads_per_worker=1, local_directory=tmp_dir)

In [4]:
args = IndexArguments(
    input_catalog_path=hats_dir / "dia_object_lc",
    indexing_column="diaObjectId",
    output_path=hats_dir,
    output_artifact_name="dia_object_lc_index",
    simple_progress_bar=True,
    resume=False,
)
runner.pipeline_with_client(args, client)

In [5]:
args = IndexArguments(
    input_catalog_path=hats_dir / "object_lc",
    indexing_column="objectId",
    output_path=hats_dir,
    output_artifact_name="object_lc_index",
    simple_progress_bar=True,
    resume=False,
)
runner.pipeline_with_client(args, client)

In [11]:
client.close()
tmp_path.cleanup()