# Crossmatch with ZTF and PanSTARRS

Crossmatch the Rubin data with ZTF and PanSTARRS and store the results as HATS catalogs on disk.

In [1]:
import os
import lsdb
import tempfile

from dask.distributed import Client
from lsdb.io.to_association import to_association
from pathlib import Path
from upath import UPath

In [2]:
VERSION = os.environ["VERSION"]
OUTPUT_DIR = Path(os.environ["OUTPUT_DIR"])

print(f"VERSION: {VERSION}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

hats_dir = OUTPUT_DIR / "hats" / VERSION

In [3]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=16, threads_per_worker=1, local_directory=tmp_dir)

In [4]:
dia_object_collection = lsdb.read_hats(hats_dir / "dia_object_collection")
object_collection = lsdb.read_hats(hats_dir / "object_collection")

### Crossmatch with ZTF

In [None]:
ztf_dr22 = lsdb.read_hats(
    "https://data.lsdb.io/hats/ztf_dr22/ztf_lc",
    margin_cache="https://data.lsdb.io/hats/ztf_dr22/ztf_lc_10arcs",
)
ztf_dr22

In [None]:
for collection in [dia_object_collection, object_collection]:
    collection_properties = collection.hc_collection.collection_properties
    collection_name = collection_properties.name
    lsst_id_column = next(iter(collection_properties.all_indexes))

    xmatch = collection.crossmatch(
        ztf_dr22,
        radius_arcsec=0.2,
        n_neighbors=20,
        suffixes=("", "_ztf"),
    )

    xmatch_catalog_name = f"{collection.hc_structure.catalog_name}_x_ztf_dr22"
    to_association(
        xmatch[[lsst_id_column, "objectid_ztf", "_dist_arcsec"]],
        catalog_name=xmatch_catalog_name,
        base_catalog_path=hats_dir / collection_name / xmatch_catalog_name,
        primary_catalog_dir=hats_dir / collection_name,
        primary_column_association=lsst_id_column,
        primary_id_column=lsst_id_column,
        join_catalog_dir=ztf_dr22.hc_structure.catalog_path,
        join_column_association="objectid_ztf",
        join_id_column="objectid",
    )
    print(f"Saved {xmatch_catalog_name}")

### Crossmatch with PanSTARRS

In [None]:
s3_kwargs = {"endpoint_url": "https://s3.amazonaws.com", "anon": True}

ps1 = lsdb.read_hats(
    UPath("s3://stpubdata/panstarrs/ps1/public/hats/otmo", **s3_kwargs),
    margin_cache=UPath(
        "s3://stpubdata/panstarrs/ps1/public/hats/otmo_10arcs", **s3_kwargs
    ),
)
ps1

In [8]:
for collection in [dia_object_collection, object_collection]:
    collection_properties = collection.hc_collection.collection_properties
    collection_name = collection_properties.name
    lsst_id_column = next(iter(collection_properties.all_indexes))

    xmatch = collection.crossmatch(
        ps1,
        radius_arcsec=0.2,
        n_neighbors=20,
        suffixes=("", "_ps1"),
    )

    xmatch_catalog_name = f"{collection.hc_structure.catalog_name}_x_ps1"
    to_association(
        xmatch[[lsst_id_column, "objID_ps1", "_dist_arcsec"]],
        catalog_name=xmatch_catalog_name,
        base_catalog_path=hats_dir / collection_name / xmatch_catalog_name,
        primary_catalog_dir=hats_dir / collection_name,
        primary_column_association=lsst_id_column,
        primary_id_column=lsst_id_column,
        join_catalog_dir=ps1.hc_structure.catalog_path,
        join_column_association="objID_ps1",
        join_id_column="objID",
    )
    print(f"Saved {xmatch_catalog_name}")

In [9]:
client.close()
tmp_path.cleanup()