In [None]:
# for inspiration: https://colab.research.google.com/drive/1jGja4e-x97wxfdLyAA_XIGLwAd35JUNb

Install lsdb (and transitively hipscat)

In [None]:
!pip install -r requirements.txt --quiet

In [None]:
import lsdb
from lsdb.core.search import BoxSearch, ConeSearch, PolygonSearch
from hipscat.inspection import plot_pixels
import hipscat as hc
from hipscat.inspection.almanac import Almanac
from hipscat.inspection.almanac_info import AlmanacInfo

"Version of lsdb is %s" % lsdb.__version__

In [None]:
import dask.distributed

# initialize dask client
Client = dask.distributed.Client(n_workers=2, threads_per_worker=1, memory_limit="8Gb")

Client

In [None]:
# These are locations where the catalogs are stored if you want to access via internet

# GAIA
# gaia_path = "https://epyc.astro.washington.edu/~lincc-frameworks/hipscat_surveys/gaia_dr3/gaia"

# ZTF
# ztf_object_path = "https://epyc.astro.washington.edu/~lincc-frameworks/hipscat_surveys/ztf/ztf_dr14/"
# ztf_source_path = "https://epyc.astro.washington.edu/~lincc-frameworks/hipscat_surveys/ztf/ztf_source/"

# but we have these catalogs available via AWS buckets
BUCKET = "irsa-mast-tike-spitzer-data"

GAIA_BASE = f"s3://{BUCKET}/data/GAIA/dr3/gaiasource/hipscat"
GAIA_OBJECT_PATH = f"{GAIA_BASE}/gaia-dr3-gaiasource-hipscat"
GAIA_OBJECT_MARGIN_PATH = f"{GAIA_BASE}/gaia-dr3-gaiasource-hipscat_margin_10arcsec"

ZTF_BASE = f"s3://{BUCKET}/data/ZTF/dr20"
ZTF_OBJECT_PATH = f"{ZTF_BASE}/objects/hipscat/ztf-dr20-objects-hipscat"
ZTF_SOURCE_PATH = f"{ZTF_BASE}/lc/hipscat/ztf-dr20-lc-hipscat"

In [None]:
%%time

# Load Gaia DR3 margin cache
gaia_margin = lsdb.read_hipscat(GAIA_OBJECT_MARGIN_PATH)
# Load lite version of Gaia DR3 for quick cone searches
# we load only ra, dec columns, and small sections of the sky
gaia_lite = lsdb.read_hipscat(
    GAIA_OBJECT_PATH,
    columns=["ra", "dec"],
    margin_cache=gaia_margin,
    search_filter=ConeSearch(ra=-60, dec=20, radius_arcsec=1 * 3600),
)
gaia_cone = lsdb.read_hipscat(
    GAIA_OBJECT_PATH,
    margin_cache=gaia_margin,
    search_filter=ConeSearch(ra=-60, dec=20, radius_arcsec=1 * 3600),
)
# This is loading the full GAIA catalog
gaia = lsdb.read_hipscat(GAIA_OBJECT_PATH, margin_cache=gaia_margin)

# This is loading the full ZTF catalog
ztf = lsdb.read_hipscat(ZTF_OBJECT_PATH)
ztf_source = lsdb.read_hipscat(ZTF_SOURCE_PATH, columns=["objra", "objdec"])

In [None]:
# Plot the Pixel Density Maps for Gaia and ZTF

plot_pixels(gaia.hc_structure)
plot_pixels(ztf.hc_structure)
plot_pixels(ztf_source.hc_structure)

In [None]:
%%time
# Cross-match Gaia and ZTF
# Observe this is only lazy loaded
xmatch_object = ztf.crossmatch(gaia_lite)  # gaia_lite on the right because it has a margin cache
# xmatch_object = gaia_lite.crossmatch(ztf)
xmatch_object

In [None]:
%%time
# Load into memory, the first partition
xmatch_object.head(5)

We can maintain an "almanac" of known catalogs. In this way, we can find new catalogs, or refer to them with simpler names.

In [None]:
almanac = Almanac(dirs="/home/jovyan/efs/lincc/almanac")
almanac.catalogs()

In [None]:
# gaia_hc = almanac.get_catalog("gaia")
# gaia = lsdb.read_hipscat(gaia_hc.catalog_base_dir)

In [None]:
%%time
ztf.head(1)

In [None]:
ztf.head(1).columns

You'll need to set the suffixes of the resulting join table, so you can query using those column names.

```
... , suffixes=("_gaia", "_ztf") ...
```

In [None]:
%%time
# crossmatch ZTF + Gaia
# _all_sky_object = gaia_cone.crossmatch(ztf, suffixes=("_gaia", "_ztf")).query(
_all_sky_object = ztf.crossmatch(gaia_cone, suffixes=("_ztf", "_gaia")).query(
    "parallax_gaia > 0 and parallax_over_error_gaia > 5 and \
    teff_gspphot_gaia > 5380 and teff_gspphot_gaia < 7220 and logg_gspphot_gaia > 4.5 \
    and logg_gspphot_gaia < 4.72 and classprob_dsc_combmod_star_gaia > 0.5"
)

In [None]:
%time
# fails at 4 moment
all_sky_object = _all_sky_object.compute()

In [None]:
all_sky_object

In [None]:
# plot the distribution
# p