# Crossmatch with ZTF and PanSTARRS

Crossmatch the ComCam data with ZTF and PanSTARRS and store the results as HATS catalogs on disk.

In [1]:
import os
import lsdb
import tempfile

from hats.io import paths
from dask.distributed import Client
from nested_pandas import NestedDtype
from upath import UPath

In [2]:
DRP_VERSION = os.environ["DRP_VERSION"]
print(f"DRP_VERSION: {DRP_VERSION}")
base_output_dir = UPath(f"/sdf/data/rubin/shared/lsdb_commissioning")
hats_dir = base_output_dir / "hats" / DRP_VERSION

In [3]:
tmp_path = tempfile.TemporaryDirectory()
tmp_dir = tmp_path.name
client = Client(n_workers=4, threads_per_worker=1, local_directory=tmp_dir)

Let"s load the nested Rubin catalogs:

In [4]:
dia_object_lc = lsdb.read_hats(hats_dir / "dia_object_lc").map_partitions(
    lambda df: df.assign(
        **{
            lc_column: df[lc_column].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes[lc_column])
            )
            for lc_column in ["diaSource", "diaObjectForcedSource"]
        }
    )
)
dia_object_lc

In [5]:
object_lc = lsdb.read_hats(hats_dir / "object_lc").map_partitions(
    lambda df: df.assign(
        **{
            "objectForcedSource": df["objectForcedSource"].astype(
                NestedDtype.from_pandas_arrow_dtype(df.dtypes["objectForcedSource"])
            )
        }
    )
)
object_lc

### Crossmatch with ZTF

In [None]:
ztf_xmatch_radius_arcsec = 0.2

Let"s load ZTF DR22 with nested list lightcurves:

In [None]:
ztf_dr22_lc = lsdb.read_hats(
    "https://data.lsdb.io/hats/ztf_dr22/ztf_lc",
    margin_cache="https://data.lsdb.io/hats/ztf_dr22/ztf_lc_10arcs",
)

In [None]:
ztf_list_columns = ["hmjd", "mag", "magerr", "catflags", "clrcoeff"]

nested_ztf = ztf_dr22_lc.nest_lists(
    base_columns=[col for col in ztf_dr22_lc.columns if col not in ztf_list_columns],
    list_columns=ztf_list_columns,
    name="ztf_lc",
)
nested_ztf

ZTF DR22 filters are specified in a base column (`filterid`). For the crossmatch to be accurate we need to flatten the nested sources and repack them to get light curves for all the bands.

In [None]:
def convert_to_flat(df, nested):
    index_name = df.index.name
    df = df.reset_index(drop=False)
    flat = df[nested].nest.to_flat()
    del df[nested]
    df = df.join(flat, how="inner")
    df = df.set_index(index_name)
    return df


ztf = nested_ztf.map_partitions(convert_to_flat, nested="ztf_lc")

In [None]:
HIVE_COLUMNS = {paths.PARTITION_ORDER, paths.PARTITION_DIR, paths.PARTITION_PIXEL}
original_ztf_cols = [
    f"{col}_ztf" for col in ztf_dr22_lc.columns if col not in HIVE_COLUMNS
]

In [None]:
def crossmatch_with_ztf(catalog, object_column):
    dist_column = "lsst_ztf_sep"
    matched = catalog.crossmatch(
        ztf,
        radius_arcsec=ztf_xmatch_radius_arcsec,
        n_neighbors=20,
        suffixes=("", "_ztf"),
    )
    matched._ddf = matched._ddf.rename(columns={"_dist_arcsec": dist_column})
    # Repack light curves with `join_nested` based on the Rubin object ID
    lsst_lc_x_ztf = catalog.join_nested(
        matched[original_ztf_cols + [object_column, dist_column]],
        nested_column_name="ztf_lc",
        left_on=object_column,
        right_on=object_column,
    )
    return lsst_lc_x_ztf

In [None]:
for catalog, object_column in zip(
    [object_lc, dia_object_lc], ["objectId", "diaObjectId"]
):
    catalog_name = f"{catalog.name}_x_ztf_dr22"
    lsst_lc_x_ztf = crossmatch_with_ztf(catalog, object_column)
    lsst_lc_x_ztf.to_hats(hats_dir / catalog_name, catalog_name=catalog_name)
    print(f"Saved {catalog_name}")

### Crossmatch with PanSTARRS

In [6]:
panstarrs = lsdb.read_hats(
    UPath("s3://stpubdata/panstarrs/ps1/public/hats/otmo", anon=True),
    margin_cache=UPath(
        "s3://stpubdata/panstarrs/ps1/public/hats/otmo_10arcs", anon=True
    ),
)
panstarrs

In [7]:
ps1_xmatch_radius_arcsec = 0.1

In [8]:
for catalog in [object_lc, dia_object_lc]:
    dist_column = "lsst_ztf_sep"
    catalog_name = f"{catalog.name}_x_ps1"
    lsst_lc_x_ps1_dr22 = catalog.crossmatch(
        panstarrs, radius_arcsec=ps1_xmatch_radius_arcsec, suffixes=("", "_ps1")
    )
    lsst_lc_x_ps1_dr22._ddf = lsst_lc_x_ps1_dr22._ddf.rename(
        columns={"_dist_arcsec": dist_column}
    )
    lsst_lc_x_ps1_dr22.to_hats(hats_dir / catalog_name, catalog_name=catalog_name)
    print(f"Saved {catalog_name}")

In [9]:
client.close()
tmp_path.cleanup()