In [1]:
import lsdb
from tape import Ensemble, ColumnMapper
import matplotlib.pyplot as plt

# Demo: Cross-matching ZTF and Gaia with LSDB & Timeseries Exploration with TAPE

In this demo, we'll show a brief example LSDB+TAPE workflow using ZTF+Gaia data. The dataset we'll use is a small half-degree subset of ZTF and Gaia, hosted here: https://epyc.astro.washington.edu/~lincc-frameworks/half_degree_surveys/ 

The goal of this notebook is to provide example code for working with Gaia+ZTF data, and using this as the foundation for your own analysis with TAPE, LSDB and ZTF+Gaia is highly encouraged.

Requires LSDB >=0.1.2, hipscat>0.2.4

## Installation and Dependencies

TAPE: https://tape.readthedocs.io/en/latest/gettingstarted/installation.html 
```
pip install lf-tape
```
LSDB: 
```
pip install lsdb
```

This notebook also requires hipscat>0.2.4, at the current moment 0.2.4 is the latest release, so hipscat will need to be installed from source:
```
conda activate [YOUR CONDA ENVIRONMENT OF CHOICE]
git clone https://github.com/astronomy-commons/hipscat.git
cd hipscat
pip install .
```

## Reading into LSDB

In [2]:
#Setup Paths

# Gaia
gaia_object_path = "https://epyc.astro.washington.edu/~lincc-frameworks/half_degree_surveys/gaia_symbolic/"

# ZTF
ztf_object_path = "https://epyc.astro.washington.edu/~lincc-frameworks/half_degree_surveys/ztf/ztf_object/"
ztf_source_path = "https://epyc.astro.washington.edu/~lincc-frameworks/half_degree_surveys/ztf/ztf_source/"
# optionally can use Zubercal for ztf sources (observations) instead
#ztf_source_path = "https://epyc.astro.washington.edu/~lincc-frameworks/half_degree_surveys/ztf/zubercal_source/"

In [3]:
# Load into LSDB Catalog objects
gaia_object = lsdb.read_hipscat(gaia_object_path) # Gaia Object
ztf_object = lsdb.read_hipscat(ztf_object_path) # ZTF Object
ztf_source = lsdb.read_hipscat(ztf_source_path) # ZTF Source

In [4]:
print(len(gaia_object._ddf), len(ztf_object._ddf))

2509 9817


In [9]:
# Cross-match Gaia and ZTF

xmatch_object = ztf_object.crossmatch(gaia_object)

xmatch_object._ddf.head(5)



Unnamed: 0_level_0,ps1_objid_ztf_object_halfdegree,ra_ztf_object_halfdegree,dec_ztf_object_halfdegree,ps1_gMeanPSFMag_ztf_object_halfdegree,ps1_rMeanPSFMag_ztf_object_halfdegree,ps1_iMeanPSFMag_ztf_object_halfdegree,nobs_g_ztf_object_halfdegree,nobs_r_ztf_object_halfdegree,nobs_i_ztf_object_halfdegree,mean_mag_g_ztf_object_halfdegree,...,ag_gspphot_lower_gaia_halfdegree,ag_gspphot_upper_gaia_halfdegree,ebpminrp_gspphot_gaia_halfdegree,ebpminrp_gspphot_lower_gaia_halfdegree,ebpminrp_gspphot_upper_gaia_halfdegree,libname_gspphot_gaia_halfdegree,Norder_gaia_halfdegree,Npix_gaia_halfdegree,Dir_gaia_halfdegree,_DIST
_hipscat_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7800231224164745216,119401799937858008,179.993698,9.506274,19.1936,18.894199,18.7934,271,346,124,19.213742,...,0.0006,0.007,0.0016,0.0003,0.0039,MARCS,2,108,108,3.1e-05
7800231386677248000,119401800526045557,180.052521,9.5042,17.3981,16.2202,15.1036,289,363,128,17.479837,...,0.4366,0.4575,0.2732,0.2678,0.2804,MARCS,2,108,108,1.8e-05
7800231722905239552,119421800537485698,180.05367,9.521045,19.9639,19.621799,19.4827,235,309,116,19.96676,...,,,,,,,2,108,108,3.6e-05
7800231851490017280,119411800119230905,180.011831,9.508711,21.1581,20.547899,20.3141,96,188,87,21.037499,...,,,,,,,2,108,108,3.7e-05
7800231854853849088,119411800168234041,180.016739,9.511301,19.2582,18.2054,18.0648,266,360,125,19.123036,...,,,,,,,2,108,108,3.3e-05


In [10]:
# Filter the dataset using a SQL-like query
xmatch_object = xmatch_object.query(
    "nobs_g_ztf_object_halfdegree > 150 and nobs_r_ztf_object_halfdegree > 150"  # the column names are a bit verbose...
)

xmatch_object._ddf.head(5)

Unnamed: 0_level_0,ps1_objid_ztf_object_halfdegree,ra_ztf_object_halfdegree,dec_ztf_object_halfdegree,ps1_gMeanPSFMag_ztf_object_halfdegree,ps1_rMeanPSFMag_ztf_object_halfdegree,ps1_iMeanPSFMag_ztf_object_halfdegree,nobs_g_ztf_object_halfdegree,nobs_r_ztf_object_halfdegree,nobs_i_ztf_object_halfdegree,mean_mag_g_ztf_object_halfdegree,...,ag_gspphot_lower_gaia_halfdegree,ag_gspphot_upper_gaia_halfdegree,ebpminrp_gspphot_gaia_halfdegree,ebpminrp_gspphot_lower_gaia_halfdegree,ebpminrp_gspphot_upper_gaia_halfdegree,libname_gspphot_gaia_halfdegree,Norder_gaia_halfdegree,Npix_gaia_halfdegree,Dir_gaia_halfdegree,_DIST
_hipscat_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7800231224164745216,119401799937858008,179.993698,9.506274,19.1936,18.894199,18.7934,271,346,124,19.213742,...,0.0006,0.007,0.0016,0.0003,0.0039,MARCS,2,108,108,3.1e-05
7800231386677248000,119401800526045557,180.052521,9.5042,17.3981,16.2202,15.1036,289,363,128,17.479837,...,0.4366,0.4575,0.2732,0.2678,0.2804,MARCS,2,108,108,1.8e-05
7800231722905239552,119421800537485698,180.05367,9.521045,19.9639,19.621799,19.4827,235,309,116,19.96676,...,,,,,,,2,108,108,3.6e-05
7800231854853849088,119411800168234041,180.016739,9.511301,19.2582,18.2054,18.0648,266,360,125,19.123036,...,,,,,,,2,108,108,3.3e-05
7800232001469939712,119421800126897863,180.012578,9.522857,20.105801,18.949301,17.642099,221,355,127,20.175254,...,0.1346,0.1949,0.111,0.0913,0.1311,MARCS,2,108,108,4e-05


## Moving into TAPE for timeseries analysis

In [7]:
# We do this to get the source catalog indexed by the objects hipscat index
joined_source = xmatch_object.join(
    ztf_source, left_on="ps1_objid_ztf_object_halfdegree", right_on="ps1_objid", suffixes=("", "")
)

# Loading into TAPE
ens = Ensemble(client=False)

# You may also want to try using a distributed client, opening up access to helpful resources
# like the dask dashboard: https://docs.dask.org/en/latest/dashboard.html
#from dask.distributed import Client
#client = Client(n_workers=4)
#ens = Ensemble(client=client)

# The ColumnMapper maps dataset columns to a few timeseries quantities
colmap = ColumnMapper(
        id_col='_hipscat_index',
        time_col='mjd',
        flux_col='mag',
        err_col='magerr',
        band_col='band',
      )

ens.from_lsdb(joined_source, xmatch_object, column_mapper=colmap, sync_tables=True)

ens.object.head(5, npartitions=-1)

Unnamed: 0_level_0,ps1_objid_ztf_object_halfdegree,ra_ztf_object_halfdegree,dec_ztf_object_halfdegree,ps1_gMeanPSFMag_ztf_object_halfdegree,ps1_rMeanPSFMag_ztf_object_halfdegree,ps1_iMeanPSFMag_ztf_object_halfdegree,nobs_g_ztf_object_halfdegree,nobs_r_ztf_object_halfdegree,nobs_i_ztf_object_halfdegree,mean_mag_g_ztf_object_halfdegree,...,ag_gspphot_lower_gaia_halfdegree,ag_gspphot_upper_gaia_halfdegree,ebpminrp_gspphot_gaia_halfdegree,ebpminrp_gspphot_lower_gaia_halfdegree,ebpminrp_gspphot_upper_gaia_halfdegree,libname_gspphot_gaia_halfdegree,Norder_gaia_halfdegree,Npix_gaia_halfdegree,Dir_gaia_halfdegree,_DIST
_hipscat_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7800231224164745216,119401799937858008,179.993698,9.506274,19.1936,18.894199,18.7934,271,346,124,19.213742,...,0.0006,0.007,0.0016,0.0003,0.0039,MARCS,2,108,108,3.1e-05
7800231386677248000,119401800526045557,180.052521,9.5042,17.3981,16.2202,15.1036,289,363,128,17.479837,...,0.4366,0.4575,0.2732,0.2678,0.2804,MARCS,2,108,108,1.8e-05
7800231722905239552,119421800537485698,180.05367,9.521045,19.9639,19.621799,19.4827,235,309,116,19.96676,...,,,,,,,2,108,108,3.6e-05
7800231854853849088,119411800168234041,180.016739,9.511301,19.2582,18.2054,18.0648,266,360,125,19.123036,...,,,,,,,2,108,108,3.3e-05
7800232001469939712,119421800126897863,180.012578,9.522857,20.105801,18.949301,17.642099,221,355,127,20.175254,...,0.1346,0.1949,0.111,0.0913,0.1311,MARCS,2,108,108,4e-05
