In [None]:
import lsdb
from tape import Ensemble, ColumnMapper
import matplotlib.pyplot as plt
import dask
dask.config.set({'temporary_directory': '/data/epyc/users/brantd/tmp'})

from dask.distributed import Client
client = Client(n_workers=10, threads_per_worker=1,
                memory_limit="60G", processes=True)

client

# Generating a custom-sized subset of ZTF for TAPE Analysis

In [None]:
# path to ZTF AXS
ztf_object_path = "/astro/store/epyc3/data3/hipscat/catalogs/ztf_axs/ztf_dr14"
ztf_source_path = "/astro/store/epyc3/data3/hipscat/catalogs/ztf_axs/ztf_source/"

In [None]:
#Load and Subsample using LSDB

object_cols = ["ps1_objid", "ra", "dec"]
source_cols = ["ps1_objid", "ra", "dec", "mjd", "mag", "magerr", "band"]

# Load into LSDB catalog objects
ztf_object = lsdb.read_hipscat(ztf_object_path) # ZTF Object
ztf_source = lsdb.read_hipscat(ztf_source_path, columns=source_cols) # ZTF Source

# Box Search to filter down to a small subset
ra = (340, 342)
dec = (10, 12)
ztf_object = ztf_object.box(ra=ra, dec=dec)
ztf_source = ztf_source.box(ra=ra, dec=dec)

# Join Source to Object to set proper object-level _hipscat_index
joined_source = ztf_object.join(
    ztf_source, left_on="ps1_objid", right_on="ps1_objid", suffixes=("_object", "")
)

In [None]:
# Load into TAPE

# ColumnMapper Establishes which table columns map to timeseries quantities
colmap = ColumnMapper(
        id_col='_hipscat_index',
        time_col='mjd',
        flux_col='mag',
        err_col='magerr',
        band_col='band',
      )

ens = Ensemble(client=client)

ens.from_lsdb(joined_source, ztf_object, column_mapper=colmap)

In [None]:
print(ens.object.npartitions, ens.source.npartitions)

In [None]:
# Save to parquet for easy loading
import numpy as np
ens.source.index=ens.source.index.astype(np.uint64) # need this until lsdb 0.1.3
ens.save_ensemble(".", "ztf_small_ensemble")