In [1]:
# %pip install -U lsdb hats-import

Collecting hats-import
  Downloading hats_import-0.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting dask>=2025.3.0 (from dask[complete]>=2025.3.0->lsdb)
  Downloading dask-2025.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting distributed==2025.3.0 (from dask>=2025.3.0->dask[complete]>=2025.3.0->lsdb)
  Downloading distributed-2025.3.0-py3-none-any.whl.metadata (3.4 kB)
Downloading hats_import-0.5.2-py3-none-any.whl (55 kB)
Downloading dask-2025.3.0-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading distributed-2025.3.0-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dask, distributed, hats-import
  Attempting uninstall: dask
    Found existing installation: dask 2025.4.1
    Uninstalling dask-2025.4.1:
      Successfully uninstalled dask-2025.4.1
 

In [2]:
from functools import cache
from pathlib import Path

import lsdb
import pandas as pd
from dask.distributed import Client
from hats_import.pipeline import pipeline_with_client
from hats_import.catalog.arguments import ImportArguments

In [3]:
# Cache the function output to not hurt the SNAD server too much
@cache
def get_tns_df():
    df = pd.read_json("https://tns.snad.space/api/v1/all")
    df = df.drop("coord", axis=1)
    df = df.rename(columns={"declination": "dec"})
    # Fast hack to make discovery date filtering simplier
    df['discoveryyear'] = df['discoverydate'].str[:4].astype(int)
    return df

In [4]:
# Create lsdb.Catalog object
tns = lsdb.from_dataframe(
    get_tns_df()
).query(
    '2018 <= discoveryyear <= 2023',
)

In [5]:
# Load ZTF alerts catalog metadata
# "Nest" light curves, references and non-detections
raw_catalog = lsdb.read_hats(
    'https://data.lsdb.io/hats/alerce/',
)
display(raw_catalog)

# Pack all list-columns into single column
catalog_with_lc = raw_catalog.nest_lists(
    base_columns=[col for col in raw_catalog.columns if not col.startswith("lc_")],
    name="lc",
)

# Pack non-detections
catalog_with_nondet = catalog_with_lc.nest_lists(
    base_columns=[col for col in catalog_with_lc.columns if not col.startswith("nondet_")],
    name="nondet",
)

# Pack ZTF references
alerce = catalog_with_nondet.nest_lists(
    base_columns=[col for col in catalog_with_nondet.columns if not col.startswith("ref_")],
    name="ref",
)

alerce

Unnamed: 0_level_0,oid,mean_ra,mean_dec,lc_ra,lc_dec,lc_candid,lc_mjd,lc_fid,lc_pid,lc_diffmaglim,lc_isdiffpos,lc_nid,lc_magpsf,lc_sigmapsf,lc_magap,lc_sigmagap,lc_distnr,lc_rb,lc_rbversion,lc_drb,lc_drbversion,lc_magapbig,lc_sigmagapbig,lc_rfid,lc_magpsf_corr,lc_sigmapsf_corr,lc_sigmapsf_corr_ext,lc_corrected,lc_dubious,lc_parent_candid,lc_has_stamp,lc_step_id_corr,nondet_mjd,nondet_fid,nondet_diffmaglim,ref_rfid,ref_candid,ref_fid,ref_rcid,ref_field,ref_magnr,ref_sigmagnr,ref_chinr,ref_sharpnr,ref_ranr,ref_decnr,ref_mjdstartref,ref_mjdendref,ref_nframesref,Norder,Dir,Npix
npartitions=113,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
"Order: 1, Pixel: 0",string[pyarrow],double[pyarrow],double[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: string>[pyarrow],list<element: double>[pyarrow],list<element: string>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: bool>[pyarrow],list<element: bool>[pyarrow],list<element: double>[pyarrow],list<element: bool>[pyarrow],list<element: string>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: int64>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: double>[pyarrow],list<element: int64>[pyarrow],int8[pyarrow],int64[pyarrow],int64[pyarrow]
"Order: 2, Pixel: 4",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 0, Pixel: 10",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 0, Pixel: 11",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


Unnamed: 0_level_0,oid,mean_ra,mean_dec,Norder,Dir,Npix,lc,nondet,ref
npartitions=113,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Order: 1, Pixel: 0",string[pyarrow],double[pyarrow],double[pyarrow],int8[pyarrow],int64[pyarrow],int64[pyarrow],"nested<lc_ra: [double], lc_dec: [double], lc_c...","nested<nondet_mjd: [double], nondet_fid: [int6...","nested<ref_rfid: [int64], ref_candid: [int64],..."
"Order: 2, Pixel: 4",...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...
"Order: 0, Pixel: 10",...,...,...,...,...,...,...,...,...
"Order: 0, Pixel: 11",...,...,...,...,...,...,...,...,...


In [6]:
# Create a cross-matched catalog object
xmatch = tns.crossmatch(
    alerce,
    radius_arcsec=3.0,
    n_neighbors=1,
    suffixes=['_tns', ''],
)



In [7]:
%%time

# Actually do the job and write results to disk

name = 'tns_alerce'
with Client(n_workers=16, threads_per_worker=1, memory_limit='8GB') as client:
    display(client)
    xmatch.to_hats(
        Path(".") / name,  # path
        catalog_name=name,  # HATS catalog name
    )

Perhaps you already have a cluster running?
Hosting the HTTP server on port 34921 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:34921/status,

0,1
Dashboard: http://127.0.0.1:34921/status,Workers: 16
Total threads: 16,Total memory: 119.21 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:42979,Workers: 16
Dashboard: http://127.0.0.1:34921/status,Total threads: 16
Started: Just now,Total memory: 119.21 GiB

0,1
Comm: tcp://127.0.0.1:40005,Total threads: 1
Dashboard: http://127.0.0.1:36512/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:40361,
Local directory: /tmp/dask-scratch-space-1401309/worker-bib584j6,Local directory: /tmp/dask-scratch-space-1401309/worker-bib584j6

0,1
Comm: tcp://127.0.0.1:37743,Total threads: 1
Dashboard: http://127.0.0.1:32801/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:38270,
Local directory: /tmp/dask-scratch-space-1401309/worker-u3rtj908,Local directory: /tmp/dask-scratch-space-1401309/worker-u3rtj908

0,1
Comm: tcp://127.0.0.1:43288,Total threads: 1
Dashboard: http://127.0.0.1:43144/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:38153,
Local directory: /tmp/dask-scratch-space-1401309/worker-3kcvglh0,Local directory: /tmp/dask-scratch-space-1401309/worker-3kcvglh0

0,1
Comm: tcp://127.0.0.1:38645,Total threads: 1
Dashboard: http://127.0.0.1:37225/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:35990,
Local directory: /tmp/dask-scratch-space-1401309/worker-tyunl2vj,Local directory: /tmp/dask-scratch-space-1401309/worker-tyunl2vj

0,1
Comm: tcp://127.0.0.1:44673,Total threads: 1
Dashboard: http://127.0.0.1:36246/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:40919,
Local directory: /tmp/dask-scratch-space-1401309/worker-a5oxlz3_,Local directory: /tmp/dask-scratch-space-1401309/worker-a5oxlz3_

0,1
Comm: tcp://127.0.0.1:33469,Total threads: 1
Dashboard: http://127.0.0.1:38567/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:42572,
Local directory: /tmp/dask-scratch-space-1401309/worker-dx71khlq,Local directory: /tmp/dask-scratch-space-1401309/worker-dx71khlq

0,1
Comm: tcp://127.0.0.1:39521,Total threads: 1
Dashboard: http://127.0.0.1:40383/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43276,
Local directory: /tmp/dask-scratch-space-1401309/worker-o6184xxx,Local directory: /tmp/dask-scratch-space-1401309/worker-o6184xxx

0,1
Comm: tcp://127.0.0.1:44352,Total threads: 1
Dashboard: http://127.0.0.1:41564/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:36174,
Local directory: /tmp/dask-scratch-space-1401309/worker-dcw4q5p1,Local directory: /tmp/dask-scratch-space-1401309/worker-dcw4q5p1

0,1
Comm: tcp://127.0.0.1:45835,Total threads: 1
Dashboard: http://127.0.0.1:46603/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:35919,
Local directory: /tmp/dask-scratch-space-1401309/worker-w6gg9hus,Local directory: /tmp/dask-scratch-space-1401309/worker-w6gg9hus

0,1
Comm: tcp://127.0.0.1:44390,Total threads: 1
Dashboard: http://127.0.0.1:40413/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43474,
Local directory: /tmp/dask-scratch-space-1401309/worker-7u8lwwur,Local directory: /tmp/dask-scratch-space-1401309/worker-7u8lwwur

0,1
Comm: tcp://127.0.0.1:39400,Total threads: 1
Dashboard: http://127.0.0.1:40750/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:37599,
Local directory: /tmp/dask-scratch-space-1401309/worker-jhqoro0_,Local directory: /tmp/dask-scratch-space-1401309/worker-jhqoro0_

0,1
Comm: tcp://127.0.0.1:39054,Total threads: 1
Dashboard: http://127.0.0.1:32943/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:34715,
Local directory: /tmp/dask-scratch-space-1401309/worker-udbek23g,Local directory: /tmp/dask-scratch-space-1401309/worker-udbek23g

0,1
Comm: tcp://127.0.0.1:36228,Total threads: 1
Dashboard: http://127.0.0.1:43902/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44525,
Local directory: /tmp/dask-scratch-space-1401309/worker-lt69jqm1,Local directory: /tmp/dask-scratch-space-1401309/worker-lt69jqm1

0,1
Comm: tcp://127.0.0.1:46822,Total threads: 1
Dashboard: http://127.0.0.1:45560/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:41536,
Local directory: /tmp/dask-scratch-space-1401309/worker-lwkz46zq,Local directory: /tmp/dask-scratch-space-1401309/worker-lwkz46zq

0,1
Comm: tcp://127.0.0.1:43052,Total threads: 1
Dashboard: http://127.0.0.1:35713/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:38316,
Local directory: /tmp/dask-scratch-space-1401309/worker-m_p066qv,Local directory: /tmp/dask-scratch-space-1401309/worker-m_p066qv

0,1
Comm: tcp://127.0.0.1:45903,Total threads: 1
Dashboard: http://127.0.0.1:34233/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:41834,
Local directory: /tmp/dask-scratch-space-1401309/worker-n3fy4j2i,Local directory: /tmp/dask-scratch-space-1401309/worker-n3fy4j2i


This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


CPU times: user 30.8 s, sys: 12.9 s, total: 43.7 s
Wall time: 1min 26s


In [8]:
%%time

# DOESN'T WORK RIGHT NOW, I KEEP IT FOR A BUG REPORT

# Resample the catalog for larger parquet files,
# up to 1000 objects per file

new_name = f'{name}_1k'
args = ImportArguments.reimport_from_hats(
    Path(".") / name,
    Path("."),
    output_artifact_name=new_name,
    pixel_threshold=1_000,
)

with Client(n_workers=16, threads_per_worker=1, memory_limit='8GB') as client:
    display(client)
    pipeline_with_client(args, client)

Validating catalog at path tns_alerce ... 
Found 113 partitions.
Approximate coverage is 97.92 % of the sky.


Perhaps you already have a cluster running?
Hosting the HTTP server on port 40948 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:40948/status,

0,1
Dashboard: http://127.0.0.1:40948/status,Workers: 16
Total threads: 16,Total memory: 119.21 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:45411,Workers: 16
Dashboard: http://127.0.0.1:40948/status,Total threads: 16
Started: Just now,Total memory: 119.21 GiB

0,1
Comm: tcp://127.0.0.1:35430,Total threads: 1
Dashboard: http://127.0.0.1:39049/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:37367,
Local directory: /tmp/dask-scratch-space-1401309/worker-1u4kil_b,Local directory: /tmp/dask-scratch-space-1401309/worker-1u4kil_b

0,1
Comm: tcp://127.0.0.1:46012,Total threads: 1
Dashboard: http://127.0.0.1:38478/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:36831,
Local directory: /tmp/dask-scratch-space-1401309/worker-9m5aoe00,Local directory: /tmp/dask-scratch-space-1401309/worker-9m5aoe00

0,1
Comm: tcp://127.0.0.1:35084,Total threads: 1
Dashboard: http://127.0.0.1:43164/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:41530,
Local directory: /tmp/dask-scratch-space-1401309/worker-4lqt5tu3,Local directory: /tmp/dask-scratch-space-1401309/worker-4lqt5tu3

0,1
Comm: tcp://127.0.0.1:34147,Total threads: 1
Dashboard: http://127.0.0.1:36549/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43984,
Local directory: /tmp/dask-scratch-space-1401309/worker-00ttpb07,Local directory: /tmp/dask-scratch-space-1401309/worker-00ttpb07

0,1
Comm: tcp://127.0.0.1:43736,Total threads: 1
Dashboard: http://127.0.0.1:41079/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:42121,
Local directory: /tmp/dask-scratch-space-1401309/worker-akfidjse,Local directory: /tmp/dask-scratch-space-1401309/worker-akfidjse

0,1
Comm: tcp://127.0.0.1:39911,Total threads: 1
Dashboard: http://127.0.0.1:46284/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:38156,
Local directory: /tmp/dask-scratch-space-1401309/worker-7_brqj5x,Local directory: /tmp/dask-scratch-space-1401309/worker-7_brqj5x

0,1
Comm: tcp://127.0.0.1:43552,Total threads: 1
Dashboard: http://127.0.0.1:37874/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43947,
Local directory: /tmp/dask-scratch-space-1401309/worker-6ns26nk1,Local directory: /tmp/dask-scratch-space-1401309/worker-6ns26nk1

0,1
Comm: tcp://127.0.0.1:39480,Total threads: 1
Dashboard: http://127.0.0.1:41585/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:34865,
Local directory: /tmp/dask-scratch-space-1401309/worker-sf0_0elq,Local directory: /tmp/dask-scratch-space-1401309/worker-sf0_0elq

0,1
Comm: tcp://127.0.0.1:37736,Total threads: 1
Dashboard: http://127.0.0.1:36401/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:43741,
Local directory: /tmp/dask-scratch-space-1401309/worker-g6mw1_uw,Local directory: /tmp/dask-scratch-space-1401309/worker-g6mw1_uw

0,1
Comm: tcp://127.0.0.1:38147,Total threads: 1
Dashboard: http://127.0.0.1:38016/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:42428,
Local directory: /tmp/dask-scratch-space-1401309/worker-xt8qypbg,Local directory: /tmp/dask-scratch-space-1401309/worker-xt8qypbg

0,1
Comm: tcp://127.0.0.1:40764,Total threads: 1
Dashboard: http://127.0.0.1:34491/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45137,
Local directory: /tmp/dask-scratch-space-1401309/worker-edpfea5n,Local directory: /tmp/dask-scratch-space-1401309/worker-edpfea5n

0,1
Comm: tcp://127.0.0.1:42125,Total threads: 1
Dashboard: http://127.0.0.1:37586/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45149,
Local directory: /tmp/dask-scratch-space-1401309/worker-omnfsgem,Local directory: /tmp/dask-scratch-space-1401309/worker-omnfsgem

0,1
Comm: tcp://127.0.0.1:35603,Total threads: 1
Dashboard: http://127.0.0.1:43467/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:46810,
Local directory: /tmp/dask-scratch-space-1401309/worker-fefx1yuu,Local directory: /tmp/dask-scratch-space-1401309/worker-fefx1yuu

0,1
Comm: tcp://127.0.0.1:35999,Total threads: 1
Dashboard: http://127.0.0.1:42405/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:38053,
Local directory: /tmp/dask-scratch-space-1401309/worker-urwbzqbv,Local directory: /tmp/dask-scratch-space-1401309/worker-urwbzqbv

0,1
Comm: tcp://127.0.0.1:37481,Total threads: 1
Dashboard: http://127.0.0.1:39895/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:41830,
Local directory: /tmp/dask-scratch-space-1401309/worker-hphbye9p,Local directory: /tmp/dask-scratch-space-1401309/worker-hphbye9p

0,1
Comm: tcp://127.0.0.1:35897,Total threads: 1
Dashboard: http://127.0.0.1:41017/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44586,
Local directory: /tmp/dask-scratch-space-1401309/worker-lp6uc439,Local directory: /tmp/dask-scratch-space-1401309/worker-lp6uc439


Planning  :   0%|          | 0/4 [00:00<?, ?it/s]

Mapping   :   0%|          | 0/113 [00:00<?, ?it/s]

Binning   :   0%|          | 0/2 [00:00<?, ?it/s]

TypeError: cannot unpack non-iterable numpy.int32 object