# Use ZTF DR with TAPE

In [1]:
# First, install package to load the data as dask dataframes
# python -mpip install git+https://github.com/hombit/load_ztfdr_for_tape

In [2]:
from itertools import chain
from pathlib import Path

import light_curve as licu
from dask.distributed import Client
from load_ztfdr_for_tape import load_object_source_frames_from_path
from tape import Ensemble, ColumnMapper

### Get Dask dataframes

They are not going to be read right now, we are just planning

In [3]:
%%time

# This is PSC path to the whole DR.
ztf_dr_path = Path('/ocean/projects/phy210048p/shared/hipscat/raw/ztf-dr19/')

# Here we would use a single field only
# For dense fields, like 807, a Galactic one, we are out of memory
# So let's use extragalactic field 795 here
data_path = ztf_dr_path / '0' / 'field000795'

objects, sources = load_object_source_frames_from_path(data_path)

CPU times: user 1.48 s, sys: 209 ms, total: 1.69 s
Wall time: 1.27 s


### Create Dask client
It would allow us to run things in parallel

In [4]:
client = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 16
Total threads: 128,Total memory: 247.07 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39425,Workers: 16
Dashboard: http://127.0.0.1:8787/status,Total threads: 128
Started: Just now,Total memory: 247.07 GiB

0,1
Comm: tcp://127.0.0.1:35581,Total threads: 8
Dashboard: http://127.0.0.1:37833/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:38911,
Local directory: /tmp/dask-scratch-space/worker-wg4ofv59,Local directory: /tmp/dask-scratch-space/worker-wg4ofv59

0,1
Comm: tcp://127.0.0.1:43759,Total threads: 8
Dashboard: http://127.0.0.1:44497/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:34163,
Local directory: /tmp/dask-scratch-space/worker-qax3h8n3,Local directory: /tmp/dask-scratch-space/worker-qax3h8n3

0,1
Comm: tcp://127.0.0.1:42995,Total threads: 8
Dashboard: http://127.0.0.1:34421/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:33869,
Local directory: /tmp/dask-scratch-space/worker-r6u5cbrd,Local directory: /tmp/dask-scratch-space/worker-r6u5cbrd

0,1
Comm: tcp://127.0.0.1:45615,Total threads: 8
Dashboard: http://127.0.0.1:45993/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:33871,
Local directory: /tmp/dask-scratch-space/worker-_0f4gk_f,Local directory: /tmp/dask-scratch-space/worker-_0f4gk_f

0,1
Comm: tcp://127.0.0.1:34797,Total threads: 8
Dashboard: http://127.0.0.1:41675/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:35469,
Local directory: /tmp/dask-scratch-space/worker-63t2fsh8,Local directory: /tmp/dask-scratch-space/worker-63t2fsh8

0,1
Comm: tcp://127.0.0.1:39829,Total threads: 8
Dashboard: http://127.0.0.1:38803/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:46685,
Local directory: /tmp/dask-scratch-space/worker-r138pzhm,Local directory: /tmp/dask-scratch-space/worker-r138pzhm

0,1
Comm: tcp://127.0.0.1:40777,Total threads: 8
Dashboard: http://127.0.0.1:33963/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:36005,
Local directory: /tmp/dask-scratch-space/worker-lpkjkxwb,Local directory: /tmp/dask-scratch-space/worker-lpkjkxwb

0,1
Comm: tcp://127.0.0.1:40485,Total threads: 8
Dashboard: http://127.0.0.1:42609/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:45725,
Local directory: /tmp/dask-scratch-space/worker-n43be07m,Local directory: /tmp/dask-scratch-space/worker-n43be07m

0,1
Comm: tcp://127.0.0.1:45931,Total threads: 8
Dashboard: http://127.0.0.1:35671/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:37719,
Local directory: /tmp/dask-scratch-space/worker-j3dztlsn,Local directory: /tmp/dask-scratch-space/worker-j3dztlsn

0,1
Comm: tcp://127.0.0.1:46613,Total threads: 8
Dashboard: http://127.0.0.1:45181/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:44651,
Local directory: /tmp/dask-scratch-space/worker-sueyadz1,Local directory: /tmp/dask-scratch-space/worker-sueyadz1

0,1
Comm: tcp://127.0.0.1:41145,Total threads: 8
Dashboard: http://127.0.0.1:34859/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:44619,
Local directory: /tmp/dask-scratch-space/worker-uqhzc677,Local directory: /tmp/dask-scratch-space/worker-uqhzc677

0,1
Comm: tcp://127.0.0.1:40963,Total threads: 8
Dashboard: http://127.0.0.1:38811/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:44843,
Local directory: /tmp/dask-scratch-space/worker-utj288fc,Local directory: /tmp/dask-scratch-space/worker-utj288fc

0,1
Comm: tcp://127.0.0.1:39123,Total threads: 8
Dashboard: http://127.0.0.1:32805/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:35387,
Local directory: /tmp/dask-scratch-space/worker-8brl_s7j,Local directory: /tmp/dask-scratch-space/worker-8brl_s7j

0,1
Comm: tcp://127.0.0.1:37125,Total threads: 8
Dashboard: http://127.0.0.1:45835/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:40595,
Local directory: /tmp/dask-scratch-space/worker-2rou3rbd,Local directory: /tmp/dask-scratch-space/worker-2rou3rbd

0,1
Comm: tcp://127.0.0.1:38497,Total threads: 8
Dashboard: http://127.0.0.1:37945/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:39537,
Local directory: /tmp/dask-scratch-space/worker-72gdtpsn,Local directory: /tmp/dask-scratch-space/worker-72gdtpsn

0,1
Comm: tcp://127.0.0.1:39869,Total threads: 8
Dashboard: http://127.0.0.1:36987/status,Memory: 15.44 GiB
Nanny: tcp://127.0.0.1:35563,
Local directory: /tmp/dask-scratch-space/worker-lg0iallv,Local directory: /tmp/dask-scratch-space/worker-lg0iallv


### Create TAPE Ensemble and plan analysis

Again, we do it lazily here

In [5]:
ens = Ensemble(client=client)
column_mapper = ColumnMapper(
    id_col='objectid',
    time_col='hmjd',
    flux_col='mag',
    err_col='magerr',
    band_col='filterid',
)
ens.from_dask_dataframe(
    object_frame=objects,
    source_frame=sources,
    column_mapper=column_mapper,
    # Do not make an initial sync of the tables
    sync_tables=False,
    # We did sort the tables by objectid with load_ztfdr_for_tape
    sorted=True,
    sort=False,
)



<tape.ensemble.Ensemble at 0x1535894aa880>

In [6]:
# Filter "bad" detections
ens.source.query("catflags == 0 & magerr > 0.0").update_ensemble()

# Count detections per object and filter by it
ens.calc_nobs()
ens.object.query("nobs_total >= 100")

features = ens.batch(licu.Amplitude(), band_to_calc=None, compute=False,
                     label='features')
max_amplitude = features['amplitude'].max()

Temporary columns dropped from Object Table: ['nobs_total']


### Run the pipeline

`.compute()` would run the computational graph and output the result

In [None]:
%%time
result = max_amplitude.compute()

result

### Turn Dask cluster off

In [None]:
client.close()