In [1]:
%pip uninstall -y lsdb hipscat

# %pip install git+http://hipscat.rubin.science
# %pip install git+http://lsdb.rubin.science

%pip install 'lsdb<0.4'

[0mNote: you may need to restart the kernel to use updated packages.
Collecting lsdb<0.4
  Using cached lsdb-0.3.0-py3-none-any.whl.metadata (5.8 kB)
Collecting dask[complete] (from lsdb<0.4)
  Downloading dask-2024.10.0-py3-none-any.whl.metadata (3.7 kB)
Collecting deprecated (from lsdb<0.4)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting hipscat>=0.3.8 (from lsdb<0.4)
  Using cached hipscat-0.3.9-py3-none-any.whl.metadata (6.0 kB)
Collecting lsst-sphgeom (from lsdb<0.4)
  Downloading lsst_sphgeom-27.2024.4400-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting nested-dask (from lsdb<0.4)
  Using cached nested_dask-0.2.1-py3-none-any.whl.metadata (5.0 kB)
Collecting nested-pandas (from lsdb<0.4)
  Using cached nested_pandas-0.2.2-py3-none-any.whl.metadata (4.1 kB)
Collecting pyarrow (from lsdb<0.4)
  Downloading pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.3 kB)
Collecting s

  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->hipscat>=0.3.8->lsdb<0.4)
  Downloading frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->hipscat>=0.3.8->lsdb<0.4)
  Downloading multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Collecting yarl<2.0,>=1.12.0 (from aiohttp->hipscat>=0.3.8->lsdb<0.4)
  Downloading yarl-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (64 kB)
Collecting async-timeout<5.0,>=4.0 (from aiohttp->hipscat>=0.3.8->lsdb<0.4)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting pyerfa>=2.0.1.1 (from astropy->hipscat>=0.3.8->lsdb<0.4)
  Downloading pyerfa-2.0.1.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting astropy-iers-data>=0.2024.8.27.10.28.29 

In [2]:
WITH_GAIA = True

In [3]:
catalog_name = 'ztf_ps1'
if WITH_GAIA:
    catalog_name += '_gaia'

In [4]:
%%time

from pathlib import Path

import dask.distributed
import lsdb
import pyarrow as pa
import pyarrow.dataset
import pyarrow.parquet

CPU times: user 27.3 s, sys: 2.17 s, total: 29.5 s
Wall time: 22.4 s


In [5]:
%%time

# HIPSCAT_PATH = Path('/ocean/projects/phy210048p/shared/hipscat/catalogs/')
HIPSCAT_PATH = Path('/data3/epyc/data3/hipscat/catalogs/')

PS1_BANDS = 'riz'

ztf_dr17_coord = lsdb.read_hipscat(
    'hipscat/ztf_dr17_coord',
    margin_cache='hipscat/ztf_dr17_coord_2arcsec',
).query('filter == 2')  # r-band only
gaia_distances = lsdb.read_hipscat(
    HIPSCAT_PATH / 'gaia_dr3' / 'gaia_edr3_distances',
    margin_cache = str(HIPSCAT_PATH / 'gaia_dr3' / 'gaia_edr3_distances_10arcs'),
    # Remove RA and Dec - we have them from the main Gaia catalog
    columns=[
        'source_id',
        'r_med_geo', 'r_lo_geo', 'r_hi_geo',
        'r_med_photogeo', 'r_lo_photogeo', 'r_hi_photogeo',
        'flag',
    ]
)
gaia = lsdb.read_hipscat(
    HIPSCAT_PATH / 'gaia_dr3' / 'gaia',
    # We must do str() due to
    # https://github.com/astronomy-commons/lsdb/issues/380
    margin_cache=str(HIPSCAT_PATH / 'gaia_dr3' / 'gaia_10arcs'),
    columns=[
        'ra', 'dec',
        'source_id',
        'ruwe',
        'parallax', 'parallax_over_error',
        'pmra', 'pmdec', 'pmra_error', 'pmdec_error',
        'teff_gspphot', 'teff_gspphot_lower', 'teff_gspphot_upper',
        'logg_gspphot', 'logg_gspphot_lower', 'logg_gspphot_upper',
        # 'ag_gspphot', 'ag_gspphot_lower', 'ag_gspphot_upper',
    ],
).query(
    "teff_gspphot_upper < 3800"
    " and parallax_over_error > 3"

    # " and ruwe < 1.4"
    # "parallax_over_error > 10.0"
    # " and teff_gspphot_upper < 3800"
    # " and (teff_gspphot_upper - teff_gspphot_lower) < 400"
    # " and logg_gspphot_lower > 4.5"
    # " and (logg_gspphot_upper - logg_gspphot_lower) < 0.2"
)
panstarrs = lsdb.read_hipscat(
    HIPSCAT_PATH / 'ps1' / 'ps1_otmo',
    margin_cache=str(HIPSCAT_PATH / 'ps1' / 'ps1_otmo_10arcs'),
    columns=
        ['raMean', 'decMean']
        + [f'{b}MeanPSFMag' for b in PS1_BANDS]
        + [f'{b}MeanPSFMagErr' for b in PS1_BANDS],
).query(
    "((rMeanPSFMag - iMeanPSFMag) + (rMeanPSFMagErr + iMeanPSFMagErr)) > 0.42"
    " and ((iMeanPSFMag - zMeanPSFMag) + (iMeanPSFMagErr + zMeanPSFMagErr)) > 0.23"
    " and rMeanPSFMagErr < 0.1 and iMeanPSFMagErr < 0.1 and zMeanPSFMagErr < 0.1"
)



CPU times: user 10.3 s, sys: 858 ms, total: 11.2 s
Wall time: 11.9 s


In [6]:
%%time

with dask.distributed.Client(
        n_workers=64,
        memory_limit='4GB',
        threads_per_worker=1
) as client:
    display(client)
    print(ztf_dr17_coord['oid'].shape[0].compute())

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38491 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:38491/status,

0,1
Dashboard: http://127.0.0.1:38491/status,Workers: 64
Total threads: 64,Total memory: 238.42 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38843,Workers: 64
Dashboard: http://127.0.0.1:38491/status,Total threads: 64
Started: Just now,Total memory: 238.42 GiB

0,1
Comm: tcp://127.0.0.1:46388,Total threads: 1
Dashboard: http://127.0.0.1:32930/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39548,
Local directory: /tmp/dask-scratch-space-1401309/worker-qr1blymx,Local directory: /tmp/dask-scratch-space-1401309/worker-qr1blymx

0,1
Comm: tcp://127.0.0.1:42337,Total threads: 1
Dashboard: http://127.0.0.1:43894/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:45899,
Local directory: /tmp/dask-scratch-space-1401309/worker-3ibscw7y,Local directory: /tmp/dask-scratch-space-1401309/worker-3ibscw7y

0,1
Comm: tcp://127.0.0.1:34934,Total threads: 1
Dashboard: http://127.0.0.1:40730/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33790,
Local directory: /tmp/dask-scratch-space-1401309/worker-dit0ae5e,Local directory: /tmp/dask-scratch-space-1401309/worker-dit0ae5e

0,1
Comm: tcp://127.0.0.1:43152,Total threads: 1
Dashboard: http://127.0.0.1:46386/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:34593,
Local directory: /tmp/dask-scratch-space-1401309/worker-bhb9ts5v,Local directory: /tmp/dask-scratch-space-1401309/worker-bhb9ts5v

0,1
Comm: tcp://127.0.0.1:33793,Total threads: 1
Dashboard: http://127.0.0.1:43987/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46292,
Local directory: /tmp/dask-scratch-space-1401309/worker-zf2_64hu,Local directory: /tmp/dask-scratch-space-1401309/worker-zf2_64hu

0,1
Comm: tcp://127.0.0.1:41158,Total threads: 1
Dashboard: http://127.0.0.1:42104/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:36928,
Local directory: /tmp/dask-scratch-space-1401309/worker-2p8ncmac,Local directory: /tmp/dask-scratch-space-1401309/worker-2p8ncmac

0,1
Comm: tcp://127.0.0.1:46538,Total threads: 1
Dashboard: http://127.0.0.1:35415/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:41406,
Local directory: /tmp/dask-scratch-space-1401309/worker-c8qgo0im,Local directory: /tmp/dask-scratch-space-1401309/worker-c8qgo0im

0,1
Comm: tcp://127.0.0.1:41316,Total threads: 1
Dashboard: http://127.0.0.1:43380/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40832,
Local directory: /tmp/dask-scratch-space-1401309/worker-44jjhcbd,Local directory: /tmp/dask-scratch-space-1401309/worker-44jjhcbd

0,1
Comm: tcp://127.0.0.1:41616,Total threads: 1
Dashboard: http://127.0.0.1:33610/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:42812,
Local directory: /tmp/dask-scratch-space-1401309/worker-fo1t7cnd,Local directory: /tmp/dask-scratch-space-1401309/worker-fo1t7cnd

0,1
Comm: tcp://127.0.0.1:37166,Total threads: 1
Dashboard: http://127.0.0.1:46609/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37740,
Local directory: /tmp/dask-scratch-space-1401309/worker-rk7jbbcn,Local directory: /tmp/dask-scratch-space-1401309/worker-rk7jbbcn

0,1
Comm: tcp://127.0.0.1:45940,Total threads: 1
Dashboard: http://127.0.0.1:40286/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37125,
Local directory: /tmp/dask-scratch-space-1401309/worker-dds29tir,Local directory: /tmp/dask-scratch-space-1401309/worker-dds29tir

0,1
Comm: tcp://127.0.0.1:35367,Total threads: 1
Dashboard: http://127.0.0.1:41387/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:42320,
Local directory: /tmp/dask-scratch-space-1401309/worker-bgtka20k,Local directory: /tmp/dask-scratch-space-1401309/worker-bgtka20k

0,1
Comm: tcp://127.0.0.1:37801,Total threads: 1
Dashboard: http://127.0.0.1:40910/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32934,
Local directory: /tmp/dask-scratch-space-1401309/worker-divsercx,Local directory: /tmp/dask-scratch-space-1401309/worker-divsercx

0,1
Comm: tcp://127.0.0.1:39782,Total threads: 1
Dashboard: http://127.0.0.1:34052/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46199,
Local directory: /tmp/dask-scratch-space-1401309/worker-9fuu4i1d,Local directory: /tmp/dask-scratch-space-1401309/worker-9fuu4i1d

0,1
Comm: tcp://127.0.0.1:40590,Total threads: 1
Dashboard: http://127.0.0.1:40414/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:34151,
Local directory: /tmp/dask-scratch-space-1401309/worker-tksuq0kl,Local directory: /tmp/dask-scratch-space-1401309/worker-tksuq0kl

0,1
Comm: tcp://127.0.0.1:37693,Total threads: 1
Dashboard: http://127.0.0.1:41058/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35685,
Local directory: /tmp/dask-scratch-space-1401309/worker-a544i5sw,Local directory: /tmp/dask-scratch-space-1401309/worker-a544i5sw

0,1
Comm: tcp://127.0.0.1:37759,Total threads: 1
Dashboard: http://127.0.0.1:42573/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39281,
Local directory: /tmp/dask-scratch-space-1401309/worker-r2hegro8,Local directory: /tmp/dask-scratch-space-1401309/worker-r2hegro8

0,1
Comm: tcp://127.0.0.1:34260,Total threads: 1
Dashboard: http://127.0.0.1:34285/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:41399,
Local directory: /tmp/dask-scratch-space-1401309/worker-42hn8m1i,Local directory: /tmp/dask-scratch-space-1401309/worker-42hn8m1i

0,1
Comm: tcp://127.0.0.1:36557,Total threads: 1
Dashboard: http://127.0.0.1:37597/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40322,
Local directory: /tmp/dask-scratch-space-1401309/worker-q_b2pi8g,Local directory: /tmp/dask-scratch-space-1401309/worker-q_b2pi8g

0,1
Comm: tcp://127.0.0.1:38689,Total threads: 1
Dashboard: http://127.0.0.1:44463/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35841,
Local directory: /tmp/dask-scratch-space-1401309/worker-rqbyt3jm,Local directory: /tmp/dask-scratch-space-1401309/worker-rqbyt3jm

0,1
Comm: tcp://127.0.0.1:37346,Total threads: 1
Dashboard: http://127.0.0.1:41613/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44538,
Local directory: /tmp/dask-scratch-space-1401309/worker-dene6sme,Local directory: /tmp/dask-scratch-space-1401309/worker-dene6sme

0,1
Comm: tcp://127.0.0.1:42253,Total threads: 1
Dashboard: http://127.0.0.1:43760/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40781,
Local directory: /tmp/dask-scratch-space-1401309/worker-arnkriw4,Local directory: /tmp/dask-scratch-space-1401309/worker-arnkriw4

0,1
Comm: tcp://127.0.0.1:33892,Total threads: 1
Dashboard: http://127.0.0.1:36598/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44661,
Local directory: /tmp/dask-scratch-space-1401309/worker-ujnvu3nj,Local directory: /tmp/dask-scratch-space-1401309/worker-ujnvu3nj

0,1
Comm: tcp://127.0.0.1:37772,Total threads: 1
Dashboard: http://127.0.0.1:33588/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43644,
Local directory: /tmp/dask-scratch-space-1401309/worker-cxcc_il1,Local directory: /tmp/dask-scratch-space-1401309/worker-cxcc_il1

0,1
Comm: tcp://127.0.0.1:34035,Total threads: 1
Dashboard: http://127.0.0.1:38692/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32875,
Local directory: /tmp/dask-scratch-space-1401309/worker-i39hjli3,Local directory: /tmp/dask-scratch-space-1401309/worker-i39hjli3

0,1
Comm: tcp://127.0.0.1:39624,Total threads: 1
Dashboard: http://127.0.0.1:33015/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40389,
Local directory: /tmp/dask-scratch-space-1401309/worker-m3ll_0ke,Local directory: /tmp/dask-scratch-space-1401309/worker-m3ll_0ke

0,1
Comm: tcp://127.0.0.1:44665,Total threads: 1
Dashboard: http://127.0.0.1:45497/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37529,
Local directory: /tmp/dask-scratch-space-1401309/worker-bqkiyg7q,Local directory: /tmp/dask-scratch-space-1401309/worker-bqkiyg7q

0,1
Comm: tcp://127.0.0.1:33014,Total threads: 1
Dashboard: http://127.0.0.1:44244/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:41207,
Local directory: /tmp/dask-scratch-space-1401309/worker-_m4ss2j2,Local directory: /tmp/dask-scratch-space-1401309/worker-_m4ss2j2

0,1
Comm: tcp://127.0.0.1:39827,Total threads: 1
Dashboard: http://127.0.0.1:42840/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35266,
Local directory: /tmp/dask-scratch-space-1401309/worker-lovaghsd,Local directory: /tmp/dask-scratch-space-1401309/worker-lovaghsd

0,1
Comm: tcp://127.0.0.1:42914,Total threads: 1
Dashboard: http://127.0.0.1:36764/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:45051,
Local directory: /tmp/dask-scratch-space-1401309/worker-0j1hpbar,Local directory: /tmp/dask-scratch-space-1401309/worker-0j1hpbar

0,1
Comm: tcp://127.0.0.1:41632,Total threads: 1
Dashboard: http://127.0.0.1:37261/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:32851,
Local directory: /tmp/dask-scratch-space-1401309/worker-m3kqop69,Local directory: /tmp/dask-scratch-space-1401309/worker-m3kqop69

0,1
Comm: tcp://127.0.0.1:38222,Total threads: 1
Dashboard: http://127.0.0.1:37494/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:38457,
Local directory: /tmp/dask-scratch-space-1401309/worker-05rc382i,Local directory: /tmp/dask-scratch-space-1401309/worker-05rc382i

0,1
Comm: tcp://127.0.0.1:37989,Total threads: 1
Dashboard: http://127.0.0.1:40455/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:36777,
Local directory: /tmp/dask-scratch-space-1401309/worker-do92wqc3,Local directory: /tmp/dask-scratch-space-1401309/worker-do92wqc3

0,1
Comm: tcp://127.0.0.1:46841,Total threads: 1
Dashboard: http://127.0.0.1:44651/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35387,
Local directory: /tmp/dask-scratch-space-1401309/worker-suxmdr3m,Local directory: /tmp/dask-scratch-space-1401309/worker-suxmdr3m

0,1
Comm: tcp://127.0.0.1:37926,Total threads: 1
Dashboard: http://127.0.0.1:40563/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43964,
Local directory: /tmp/dask-scratch-space-1401309/worker-qre0xdxj,Local directory: /tmp/dask-scratch-space-1401309/worker-qre0xdxj

0,1
Comm: tcp://127.0.0.1:44842,Total threads: 1
Dashboard: http://127.0.0.1:42699/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:45278,
Local directory: /tmp/dask-scratch-space-1401309/worker-bqmhqoa5,Local directory: /tmp/dask-scratch-space-1401309/worker-bqmhqoa5

0,1
Comm: tcp://127.0.0.1:42185,Total threads: 1
Dashboard: http://127.0.0.1:43814/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39302,
Local directory: /tmp/dask-scratch-space-1401309/worker-xqnf_bs4,Local directory: /tmp/dask-scratch-space-1401309/worker-xqnf_bs4

0,1
Comm: tcp://127.0.0.1:43851,Total threads: 1
Dashboard: http://127.0.0.1:34627/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43428,
Local directory: /tmp/dask-scratch-space-1401309/worker-qfje_kg8,Local directory: /tmp/dask-scratch-space-1401309/worker-qfje_kg8

0,1
Comm: tcp://127.0.0.1:36737,Total threads: 1
Dashboard: http://127.0.0.1:34893/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46483,
Local directory: /tmp/dask-scratch-space-1401309/worker-zkv1g7pf,Local directory: /tmp/dask-scratch-space-1401309/worker-zkv1g7pf

0,1
Comm: tcp://127.0.0.1:44347,Total threads: 1
Dashboard: http://127.0.0.1:40805/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:38844,
Local directory: /tmp/dask-scratch-space-1401309/worker-wacjvmbf,Local directory: /tmp/dask-scratch-space-1401309/worker-wacjvmbf

0,1
Comm: tcp://127.0.0.1:39986,Total threads: 1
Dashboard: http://127.0.0.1:45981/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39715,
Local directory: /tmp/dask-scratch-space-1401309/worker-rntvvk0i,Local directory: /tmp/dask-scratch-space-1401309/worker-rntvvk0i

0,1
Comm: tcp://127.0.0.1:45566,Total threads: 1
Dashboard: http://127.0.0.1:39559/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35951,
Local directory: /tmp/dask-scratch-space-1401309/worker-z1_hlrt0,Local directory: /tmp/dask-scratch-space-1401309/worker-z1_hlrt0

0,1
Comm: tcp://127.0.0.1:43495,Total threads: 1
Dashboard: http://127.0.0.1:43856/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35639,
Local directory: /tmp/dask-scratch-space-1401309/worker-c9b1vmqj,Local directory: /tmp/dask-scratch-space-1401309/worker-c9b1vmqj

0,1
Comm: tcp://127.0.0.1:46396,Total threads: 1
Dashboard: http://127.0.0.1:46205/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:42665,
Local directory: /tmp/dask-scratch-space-1401309/worker-z2pcvjce,Local directory: /tmp/dask-scratch-space-1401309/worker-z2pcvjce

0,1
Comm: tcp://127.0.0.1:41070,Total threads: 1
Dashboard: http://127.0.0.1:44393/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43053,
Local directory: /tmp/dask-scratch-space-1401309/worker-mq7ui0og,Local directory: /tmp/dask-scratch-space-1401309/worker-mq7ui0og

0,1
Comm: tcp://127.0.0.1:41324,Total threads: 1
Dashboard: http://127.0.0.1:39239/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:38503,
Local directory: /tmp/dask-scratch-space-1401309/worker-6u92usci,Local directory: /tmp/dask-scratch-space-1401309/worker-6u92usci

0,1
Comm: tcp://127.0.0.1:42423,Total threads: 1
Dashboard: http://127.0.0.1:46363/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:41195,
Local directory: /tmp/dask-scratch-space-1401309/worker-ku3yhbro,Local directory: /tmp/dask-scratch-space-1401309/worker-ku3yhbro

0,1
Comm: tcp://127.0.0.1:35157,Total threads: 1
Dashboard: http://127.0.0.1:42159/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37382,
Local directory: /tmp/dask-scratch-space-1401309/worker-vihf2h80,Local directory: /tmp/dask-scratch-space-1401309/worker-vihf2h80

0,1
Comm: tcp://127.0.0.1:37243,Total threads: 1
Dashboard: http://127.0.0.1:38945/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37277,
Local directory: /tmp/dask-scratch-space-1401309/worker-2uc_kwsk,Local directory: /tmp/dask-scratch-space-1401309/worker-2uc_kwsk

0,1
Comm: tcp://127.0.0.1:42225,Total threads: 1
Dashboard: http://127.0.0.1:46074/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:37734,
Local directory: /tmp/dask-scratch-space-1401309/worker-j2wrgo19,Local directory: /tmp/dask-scratch-space-1401309/worker-j2wrgo19

0,1
Comm: tcp://127.0.0.1:36729,Total threads: 1
Dashboard: http://127.0.0.1:41203/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44925,
Local directory: /tmp/dask-scratch-space-1401309/worker-z4ryec_0,Local directory: /tmp/dask-scratch-space-1401309/worker-z4ryec_0

0,1
Comm: tcp://127.0.0.1:44085,Total threads: 1
Dashboard: http://127.0.0.1:33622/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35637,
Local directory: /tmp/dask-scratch-space-1401309/worker-cg1u7pn4,Local directory: /tmp/dask-scratch-space-1401309/worker-cg1u7pn4

0,1
Comm: tcp://127.0.0.1:34409,Total threads: 1
Dashboard: http://127.0.0.1:33012/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40162,
Local directory: /tmp/dask-scratch-space-1401309/worker-zm5u__1g,Local directory: /tmp/dask-scratch-space-1401309/worker-zm5u__1g

0,1
Comm: tcp://127.0.0.1:36959,Total threads: 1
Dashboard: http://127.0.0.1:41680/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:40113,
Local directory: /tmp/dask-scratch-space-1401309/worker-maynb3ty,Local directory: /tmp/dask-scratch-space-1401309/worker-maynb3ty

0,1
Comm: tcp://127.0.0.1:34527,Total threads: 1
Dashboard: http://127.0.0.1:41082/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43593,
Local directory: /tmp/dask-scratch-space-1401309/worker-alp06q4o,Local directory: /tmp/dask-scratch-space-1401309/worker-alp06q4o

0,1
Comm: tcp://127.0.0.1:33181,Total threads: 1
Dashboard: http://127.0.0.1:37707/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43662,
Local directory: /tmp/dask-scratch-space-1401309/worker-g3h0ls8z,Local directory: /tmp/dask-scratch-space-1401309/worker-g3h0ls8z

0,1
Comm: tcp://127.0.0.1:44425,Total threads: 1
Dashboard: http://127.0.0.1:34769/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33030,
Local directory: /tmp/dask-scratch-space-1401309/worker-xvnhhd64,Local directory: /tmp/dask-scratch-space-1401309/worker-xvnhhd64

0,1
Comm: tcp://127.0.0.1:46413,Total threads: 1
Dashboard: http://127.0.0.1:36726/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43657,
Local directory: /tmp/dask-scratch-space-1401309/worker-if5usgkk,Local directory: /tmp/dask-scratch-space-1401309/worker-if5usgkk

0,1
Comm: tcp://127.0.0.1:36857,Total threads: 1
Dashboard: http://127.0.0.1:40368/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:39173,
Local directory: /tmp/dask-scratch-space-1401309/worker-ley71aqx,Local directory: /tmp/dask-scratch-space-1401309/worker-ley71aqx

0,1
Comm: tcp://127.0.0.1:42386,Total threads: 1
Dashboard: http://127.0.0.1:39015/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33735,
Local directory: /tmp/dask-scratch-space-1401309/worker-k9z23z5a,Local directory: /tmp/dask-scratch-space-1401309/worker-k9z23z5a

0,1
Comm: tcp://127.0.0.1:36195,Total threads: 1
Dashboard: http://127.0.0.1:41249/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43215,
Local directory: /tmp/dask-scratch-space-1401309/worker-og0okdj6,Local directory: /tmp/dask-scratch-space-1401309/worker-og0okdj6

0,1
Comm: tcp://127.0.0.1:34267,Total threads: 1
Dashboard: http://127.0.0.1:44678/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33490,
Local directory: /tmp/dask-scratch-space-1401309/worker-l1ff0jq8,Local directory: /tmp/dask-scratch-space-1401309/worker-l1ff0jq8

0,1
Comm: tcp://127.0.0.1:42526,Total threads: 1
Dashboard: http://127.0.0.1:39531/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:34234,
Local directory: /tmp/dask-scratch-space-1401309/worker-h34pgwge,Local directory: /tmp/dask-scratch-space-1401309/worker-h34pgwge

0,1
Comm: tcp://127.0.0.1:39798,Total threads: 1
Dashboard: http://127.0.0.1:40905/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:38207,
Local directory: /tmp/dask-scratch-space-1401309/worker-hx6_rlez,Local directory: /tmp/dask-scratch-space-1401309/worker-hx6_rlez


2274767552




CPU times: user 7min 10s, sys: 4min 54s, total: 12min 4s
Wall time: 14min 49s


In [7]:
%%time

if WITH_GAIA:
    catalog = ztf_dr17_coord.crossmatch(
        gaia,
        radius_arcsec=1,
        n_neighbors=1,
        suffixes=['', '_gaia'],
    )
else:
    catalog = ztf_dr17_coord
    
catalog = catalog.crossmatch(
    panstarrs,
    radius_arcsec=1,
    n_neighbors=1,
    suffixes=['', ''],
)

# We must do join after cross-match to not lose margin cache due to
# https://github.com/astronomy-commons/lsdb/issues/382
if WITH_GAIA:
    catalog = catalog.join(gaia_distances, left_on='source_id_gaia', right_on='source_id', suffixes=['', '_edr3dist'])

CPU times: user 35min 37s, sys: 3min 36s, total: 39min 13s
Wall time: 39min 14s


In [None]:
%%time

with dask.distributed.Client(
        n_workers=16,
        memory_limit='16GB',
        threads_per_worker=1
) as client:
    display(client)
    catalog.to_hipscat(catalog_name)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45429 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:45429/status,

0,1
Dashboard: http://127.0.0.1:45429/status,Workers: 16
Total threads: 16,Total memory: 238.42 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:37522,Workers: 16
Dashboard: http://127.0.0.1:45429/status,Total threads: 16
Started: Just now,Total memory: 238.42 GiB

0,1
Comm: tcp://127.0.0.1:45648,Total threads: 1
Dashboard: http://127.0.0.1:35622/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:45399,
Local directory: /tmp/dask-scratch-space-1401309/worker-s4si9omf,Local directory: /tmp/dask-scratch-space-1401309/worker-s4si9omf

0,1
Comm: tcp://127.0.0.1:41193,Total threads: 1
Dashboard: http://127.0.0.1:34509/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:33402,
Local directory: /tmp/dask-scratch-space-1401309/worker-5jz_pou0,Local directory: /tmp/dask-scratch-space-1401309/worker-5jz_pou0

0,1
Comm: tcp://127.0.0.1:33357,Total threads: 1
Dashboard: http://127.0.0.1:46699/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:34915,
Local directory: /tmp/dask-scratch-space-1401309/worker-kr_fa_ar,Local directory: /tmp/dask-scratch-space-1401309/worker-kr_fa_ar

0,1
Comm: tcp://127.0.0.1:33131,Total threads: 1
Dashboard: http://127.0.0.1:44805/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:35226,
Local directory: /tmp/dask-scratch-space-1401309/worker-bmykzgx8,Local directory: /tmp/dask-scratch-space-1401309/worker-bmykzgx8

0,1
Comm: tcp://127.0.0.1:44287,Total threads: 1
Dashboard: http://127.0.0.1:35158/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:45069,
Local directory: /tmp/dask-scratch-space-1401309/worker-t70n9yf2,Local directory: /tmp/dask-scratch-space-1401309/worker-t70n9yf2

0,1
Comm: tcp://127.0.0.1:35410,Total threads: 1
Dashboard: http://127.0.0.1:35556/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:42178,
Local directory: /tmp/dask-scratch-space-1401309/worker-ospaw6xx,Local directory: /tmp/dask-scratch-space-1401309/worker-ospaw6xx

0,1
Comm: tcp://127.0.0.1:37535,Total threads: 1
Dashboard: http://127.0.0.1:42653/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:37142,
Local directory: /tmp/dask-scratch-space-1401309/worker-bu6bs0ca,Local directory: /tmp/dask-scratch-space-1401309/worker-bu6bs0ca

0,1
Comm: tcp://127.0.0.1:44174,Total threads: 1
Dashboard: http://127.0.0.1:44307/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:41831,
Local directory: /tmp/dask-scratch-space-1401309/worker-4ywl7q9s,Local directory: /tmp/dask-scratch-space-1401309/worker-4ywl7q9s

0,1
Comm: tcp://127.0.0.1:37344,Total threads: 1
Dashboard: http://127.0.0.1:45046/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:44407,
Local directory: /tmp/dask-scratch-space-1401309/worker-fd5qlnh6,Local directory: /tmp/dask-scratch-space-1401309/worker-fd5qlnh6

0,1
Comm: tcp://127.0.0.1:45738,Total threads: 1
Dashboard: http://127.0.0.1:37989/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:44181,
Local directory: /tmp/dask-scratch-space-1401309/worker-7xuukmg6,Local directory: /tmp/dask-scratch-space-1401309/worker-7xuukmg6

0,1
Comm: tcp://127.0.0.1:39634,Total threads: 1
Dashboard: http://127.0.0.1:38719/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:41362,
Local directory: /tmp/dask-scratch-space-1401309/worker-eoqgu41f,Local directory: /tmp/dask-scratch-space-1401309/worker-eoqgu41f

0,1
Comm: tcp://127.0.0.1:44108,Total threads: 1
Dashboard: http://127.0.0.1:45834/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:38911,
Local directory: /tmp/dask-scratch-space-1401309/worker-36ixu9m_,Local directory: /tmp/dask-scratch-space-1401309/worker-36ixu9m_

0,1
Comm: tcp://127.0.0.1:42396,Total threads: 1
Dashboard: http://127.0.0.1:36769/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:46075,
Local directory: /tmp/dask-scratch-space-1401309/worker-w1in7v5i,Local directory: /tmp/dask-scratch-space-1401309/worker-w1in7v5i

0,1
Comm: tcp://127.0.0.1:40489,Total threads: 1
Dashboard: http://127.0.0.1:41950/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:39969,
Local directory: /tmp/dask-scratch-space-1401309/worker-pzoymzws,Local directory: /tmp/dask-scratch-space-1401309/worker-pzoymzws

0,1
Comm: tcp://127.0.0.1:33984,Total threads: 1
Dashboard: http://127.0.0.1:45530/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:44081,
Local directory: /tmp/dask-scratch-space-1401309/worker-cczgz0of,Local directory: /tmp/dask-scratch-space-1401309/worker-cczgz0of

0,1
Comm: tcp://127.0.0.1:33844,Total threads: 1
Dashboard: http://127.0.0.1:36502/status,Memory: 14.90 GiB
Nanny: tcp://127.0.0.1:37958,
Local directory: /tmp/dask-scratch-space-1401309/worker-c7moqbdo,Local directory: /tmp/dask-scratch-space-1401309/worker-c7moqbdo


This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [11]:
%%time

dataset = pyarrow.dataset.parquet_dataset(
    Path(catalog_name) / '_metadata',
).sort_by('oid')
pa.dataset.write_dataset(
    dataset,
    f"{catalog_name}_sorted",
    format='parquet',
    partitioning=pyarrow.dataset.partitioning(
        flavor="filename",
        schema=pyarrow.schema([dataset.schema.field('fieldid')]),
    ),
)

CPU times: user 8min 12s, sys: 3min 15s, total: 11min 27s
Wall time: 3min 45s
