# Generate collections

Let's generate catalog collections for `dia_object_lc` and `object_lc`.

In [None]:
import os
import tempfile
import hats_import.pipeline as runner

from dask.distributed import Client
from hats_import.collection.arguments import CollectionArguments
from pathlib import Path

In [None]:
VERSION = os.environ["VERSION"]
OUTPUT_DIR = Path(os.environ["OUTPUT_DIR"])

print(f"VERSION: {VERSION}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

hats_dir = OUTPUT_DIR / "hats" / VERSION

In [None]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=16, threads_per_worker=1, local_directory=tmp_dir)

### dia_object_collection

In [None]:
%mkdir $hats_dir/dia_object_collection
%mv $hats_dir/dia_object_lc $hats_dir/dia_object_collection

In [None]:
args = (
    CollectionArguments(
        output_artifact_name="dia_object_collection",
        new_catalog_name="dia_object_lc",
        output_path=hats_dir,
        simple_progress_bar=True,
    )
    .catalog(
        catalog_path=hats_dir / "dia_object_collection" / "dia_object_lc",
    )
    .add_margin(margin_threshold=5.0, is_default=True)
    .add_index(indexing_column="diaObjectId")
)
runner.pipeline_with_client(args, client)

### object_collection

In [None]:
%mkdir $hats_dir/object_collection
%mv $hats_dir/object_lc $hats_dir/object_collection

In [None]:
args = (
    CollectionArguments(
        output_artifact_name="object_collection",
        new_catalog_name="object_lc",
        output_path=hats_dir,
        simple_progress_bar=True,
    )
    .catalog(
        catalog_path=hats_dir / "object_collection" / "object_lc",
    )
    .add_margin(margin_threshold=5.0, is_default=True)
    .add_index(indexing_column="objectId")
)
runner.pipeline_with_client(args, client)

In [None]:
client.close()
tmp_path.cleanup()