# Cross-match OGLE III and ZTF DR22

### using LINCC Frameworks' [LSDB](https://lsdb.io) and [SNAD](https://snad.space)'s OGLE III mirror

In [1]:
# Install LSDB
%pip install -U lsdb


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from functools import cache

import lsdb
import pandas as pd
from astropy.coordinates import Angle
from dask.distributed import Client

In [3]:
# Get OGLE III through SNAD's mirror
# I cache it to not hurt the server too much when re-running the code bellow

@cache
def get_ogle3_df():
    df = pd.read_csv(
        "https://ogle3.snad.space/api/v1/all",
        sep='\t',
        dtype_backend='pyarrow',
    )
    # Convert to decimal degrees
    df['ra'] = Angle(df['RA'], unit='hour').deg
    df['dec'] = Angle(df['Decl'], unit='deg').deg
    return df

In [4]:
# Convert dataframe to LSDB Catalog
ogle3 = lsdb.from_dataframe(get_ogle3_df(), catalog_name="ogle3")
ogle3

Unnamed: 0_level_0,ID,Field,StarID,RA,Decl,Type,Subtype,I,V,P_1,A_1,ID_OGLE_II,ID_MACHO,ID_ASAS,ID_GCVS,ID_OTHER,Remarks,ra,dec
npartitions=4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
"Order: 1, Pixel: 28",string[pyarrow],string[pyarrow],int64[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],double[pyarrow],double[pyarrow]
"Order: 1, Pixel: 32",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 2, Pixel: 146",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 1, Pixel: 41",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [5]:
# Create ZTF DR22 Catalog object. No catalog data is read on this stage,
# metadata only is fetched

ztf_dr22 = lsdb.read_hats(
    # https://data.lsdb.io/#ZTF/ZTF_DR22
    'https://data.lsdb.io/hats/ztf_dr22/ztf_lc',
    margin_cache='https://data.lsdb.io/hats/ztf_dr22/ztf_lc_10arcs',
).nest_lists(
    # Make "nested" light-curve column from list-columns,
    # see nested-pandas pckage for more details:
    # https://nested-pandas.readthedocs.io/en/latest/
    list_columns=["hmjd", "mag", "magerr", "clrcoeff", "catflags"],  # light-curve columns
    base_columns=None,  # everything else is object metadta columns
    name="lc",
).query(
    # Select "good" observations
    "lc.catflags == 0",
)
ztf_dr22

Unnamed: 0_level_0,objectid,filterid,fieldid,rcid,objra,objdec,nepochs,Norder,Dir,Npix,lc
npartitions=10839,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Order: 4, Pixel: 0",int64[pyarrow],int8[pyarrow],int16[pyarrow],int8[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],uint8[pyarrow],uint64[pyarrow],uint64[pyarrow],"nested<hmjd: [double], mag: [float], magerr: [..."
"Order: 4, Pixel: 1",...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 12286",...,...,...,...,...,...,...,...,...,...,...
"Order: 5, Pixel: 12287",...,...,...,...,...,...,...,...,...,...,...


In [6]:
# Build cross-matched catalog
# Still no data fetched on this stage - just planning the future compute

xmatched = ogle3.crossmatch(
    ztf_dr22,
    radius_arcsec=1,
    # ZTF DR has multiple objects for the same sky source,
    # so we query for multiple matches
    n_neighbors=20,
    suffixes=["_ogle", "_ztf"],  # column suffixes
)
xmatched

Unnamed: 0_level_0,ID_ogle,Field_ogle,StarID_ogle,RA_ogle,Decl_ogle,Type_ogle,Subtype_ogle,I_ogle,V_ogle,P_1_ogle,A_1_ogle,ID_OGLE_II_ogle,ID_MACHO_ogle,ID_ASAS_ogle,ID_GCVS_ogle,ID_OTHER_ogle,Remarks_ogle,ra_ogle,dec_ogle,objectid_ztf,filterid_ztf,fieldid_ztf,rcid_ztf,objra_ztf,objdec_ztf,nepochs_ztf,lc_ztf,_dist_arcsec
npartitions=76,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
"Order: 4, Pixel: 1798",string[pyarrow],string[pyarrow],int64[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],string[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],int8[pyarrow],int16[pyarrow],int8[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],"nested<hmjd: [double], mag: [float], magerr: [...",double[pyarrow]
"Order: 5, Pixel: 7196",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 29277",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 29279",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [None]:
# Create Dask client to run things in parallel and run the compute
# It may took few hours and will produce a new HATS catalog.
# You can read it with LSDB or any other parquet tool such as Pandas,
# Dask, astropy, polars, etc.

with Client(n_workers=4, memory_limit="32GB", threads_per_worker=1) as client:
    # Use the link it gives to track the progress
    display(client)
    xmatched.to_hats("ogle3-x-ztf_dr22", catalog_name="ogle3-x-ztf_dr22")

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 4,Total memory: 119.21 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:42262,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:41577,Total threads: 1
Dashboard: http://127.0.0.1:46776/status,Memory: 29.80 GiB
Nanny: tcp://127.0.0.1:44899,
Local directory: /tmp/dask-scratch-space-1401309/worker-1ck7xi35,Local directory: /tmp/dask-scratch-space-1401309/worker-1ck7xi35

0,1
Comm: tcp://127.0.0.1:43781,Total threads: 1
Dashboard: http://127.0.0.1:37409/status,Memory: 29.80 GiB
Nanny: tcp://127.0.0.1:38555,
Local directory: /tmp/dask-scratch-space-1401309/worker-lbpwfk_p,Local directory: /tmp/dask-scratch-space-1401309/worker-lbpwfk_p

0,1
Comm: tcp://127.0.0.1:46365,Total threads: 1
Dashboard: http://127.0.0.1:36081/status,Memory: 29.80 GiB
Nanny: tcp://127.0.0.1:44415,
Local directory: /tmp/dask-scratch-space-1401309/worker-vm9l6ktl,Local directory: /tmp/dask-scratch-space-1401309/worker-vm9l6ktl

0,1
Comm: tcp://127.0.0.1:37154,Total threads: 1
Dashboard: http://127.0.0.1:33058/status,Memory: 29.80 GiB
Nanny: tcp://127.0.0.1:35641,
Local directory: /tmp/dask-scratch-space-1401309/worker-5p0kh5c_,Local directory: /tmp/dask-scratch-space-1401309/worker-5p0kh5c_


This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
