# DRP Afterburner for Super HATS (DASH) pipeline

Useful material:
- LINCC notebooks: https://github.com/lsst-sitcom/linccf
- JIRA for this week: https://rubinobs.atlassian.net/browse/DM-48478
- https://github.com/LSSTScienceCollaborations/StackClub/tree/master

Butler information:
```python
repo = "/repo/main"
collection = "LSSTComCam/runs/DRP/DP1/w_2025_03/DM-48478"
```

In [1]:
# Generic python packages
import os

# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler

# HATS/LSDB
import lsdb
import hats_import.pipeline as runner
from hats_import.catalog.arguments import ImportArguments

from tqdm import tqdm

### Configure Butler

In [2]:
config = '/repo/main'
collections = 'LSSTComCam/runs/DRP/DP1/w_2025_03/DM-48478'
butler = dafButler.Butler(config, collections=collections)

In [2]:
out_dir = "/sdf/data/rubin/shared/lsdb_commissioning/DASH"
raw_dir = os.path.join(out_dir, "raw")
hats_dir = os.path.join(out_dir, "hats")

### Helper methods

In [4]:
def download_from_butler(dataset_type, out_dir):
    """Downloads dataset from butler"""
    count = 0
    out_path = os.path.join(out_dir, dataset_type)
    os.makedirs(out_path)
    refs = butler.query_datasets(dataset_type)
    for i, ref in enumerate(tqdm(refs)):
        table = butler.get(dataset_type, dataId=ref.dataId)
        parquet_path = os.path.join(out_path, f"{i}.parquet")
        table.to_parquet(parquet_path)
        count += len(table)
    print(f"Saved {count} rows to {out_path}")
    
def download_visits(out_dir):
    """Downloads the visitTable for LSSTComCam"""
    visits = butler.get("visitTable", dataId={'instrument': 'LSSTComCam'})
    parquet_path = os.path.join(out_dir, "visits.parquet")
    visits.to_parquet(parquet_path)
    print(f"Saved {len(visits)} visits rows to {parquet_path}")
    return visits

### DIA Object

In [None]:
download_from_butler('diaObjectTable_tract', raw_dir)

### DIA Source

In [None]:
download_from_butler('diaSourceTable_tract', raw_dir)

### DIA Forced Source

In [None]:
download_from_butler('forcedSourceOnDiaObjectTable', raw_dir)

### Object

In [None]:
download_from_butler('objectTable', raw_dir)

### Source

In [None]:
download_from_butler('sourceTable', raw_dir)

### Forced Source

In [6]:
download_from_butler('forcedSourceTable', raw_dir)

### Import data to HATS

In [7]:
%pip install -q git+https://github.com/astronomy-commons/hats-import.git@main

In [5]:
from rubin_reader import RubinParquetReader

#### DiaObject

In [None]:
dataset_type = "diaObjectTable_tract"

diaObj_default_columns = ["diaObjectId", "ra", "dec", "nDiaSources", "radecMjdTai"]

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaObject",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type, column_names=diaObj_default_columns),
    ra_column="ra",
    dec_column="dec",
    catalog_type="object",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

#### DiaSource

In [None]:
dataset_type = "diaSourceTable_tract"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="ra",
    dec_column="dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

#### DiaForcedSource

In [None]:
dataset_type = "forcedSourceOnDiaObjectTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaForcedSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=20_000_000,
    dask_n_workers=16,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

#### Object

In [None]:
cols_per_band = []
for band in list("ugrizy"):
    for flux_type in ["psf","kron"]:
        prefix = f"{band}_{flux_type}"
        cols_per_band.extend([f"{prefix}Flux", f"{prefix}FluxErr"])
    cols_per_band.append(f"{band}_kronRad")
    
obj_default_columns = [
    "objectId",
    "refFwhm",
    "shape_flag",
    "sky_object",
    "parentObjectId",
    "detect_isPrimary",
    "x",
    "y",
    "xErr",
    "yErr",
    "shape_yy", 
    "shape_xx", 
    "shape_xy", 
    "coord_ra",
    "coord_dec", 
    "coord_raErr", 
    "coord_decErr",
    "tract",
    "patch",
    "detect_isIsolated"
] + cols_per_band

obj_default_columns

In [None]:
dataset_type = "objectTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="object",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type, column_names=obj_default_columns),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="object",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=16,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

#### Source

In [None]:
dataset_type = "sourceTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="source",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="ra",
    dec_column="dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=10_000_000,
    dask_n_workers=16,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

#### ForcedSource

In [None]:
visits = download_visits(raw_dir)
visit_map = visits[["expMidptMJD"]].T.to_dict('records')[0]

In [None]:
dataset_type = "forcedSourceTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="forcedSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type, visit_map=visit_map),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="source",
    resume=False,
    dask_n_workers=16,
    pixel_threshold=10_000_000,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

### Extra: nest sources in object catalogs

In [3]:
diaObject_cat = lsdb.read_hats(os.path.join(hats_dir, "diaObject"))
diaSource_cat = lsdb.read_hats(os.path.join(hats_dir, "diaSource"))
diaForcedSource_cat = lsdb.read_hats(os.path.join(hats_dir, "diaForcedSource"))

In [4]:
diaObject_cat_nested = diaObject_cat.join_nested(
    diaSource_cat, left_on="diaObjectId", right_on="diaObjectId", nested_column_name="diaSource").join_nested(
    diaForcedSource_cat, left_on="diaObjectId", right_on="diaObjectId", nested_column_name="diaForcedSource")
diaObject_cat_nested



Unnamed: 0_level_0,diaObjectId,ra,dec,nDiaSources,radecMjdTai,Norder,Dir,Npix,diaSource,diaForcedSource
npartitions=128,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Order: 0, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],double[pyarrow],uint8[pyarrow],uint64[pyarrow],uint64[pyarrow],"nested<diaSourceId: [int64], visit: [int64], d...","nested<forcedSourceOnDiaObjectId: [int64], par..."
"Order: 0, Pixel: 4",...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 2247",...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 562",...,...,...,...,...,...,...,...,...,...


In [None]:
# diaObject_cat_nested.to_hats(os.path.join(hats_dir, "diaObject_lc"))

In [5]:
object_cat = lsdb.read_hats(os.path.join(hats_dir, "object"))
source_cat = lsdb.read_hats(os.path.join(hats_dir, "source"))
forcedSource_cat = lsdb.read_hats(os.path.join(hats_dir, "forcedSource"))

In [6]:
object_cat_nested = object_cat.join_nested(
    #source_cat, left_on="objectId", right_on="objectId", nested_column_name="source").join_nested(
    forcedSource_cat, left_on="objectId", right_on="objectId", nested_column_name="forcedSource")
object_cat_nested



Unnamed: 0_level_0,objectId,refFwhm,shape_flag,sky_object,parentObjectId,detect_isPrimary,x,y,xErr,yErr,shape_yy,shape_xx,shape_xy,coord_ra,coord_dec,coord_raErr,coord_decErr,tract,patch,detect_isIsolated,u_psfFlux,u_psfFluxErr,u_kronFlux,u_kronFluxErr,u_kronRad,g_psfFlux,g_psfFluxErr,g_kronFlux,g_kronFluxErr,g_kronRad,r_psfFlux,r_psfFluxErr,r_kronFlux,r_kronFluxErr,r_kronRad,i_psfFlux,i_psfFluxErr,i_kronFlux,i_kronFluxErr,i_kronRad,z_psfFlux,z_psfFluxErr,z_kronFlux,z_kronFluxErr,z_kronRad,y_psfFlux,y_psfFluxErr,y_kronFlux,y_kronFluxErr,y_kronRad,u_psfMag,u_psfMagErr,u_kronMag,u_kronMagErr,g_psfMag,g_psfMagErr,g_kronMag,g_kronMagErr,r_psfMag,r_psfMagErr,r_kronMag,r_kronMagErr,i_psfMag,i_psfMagErr,i_kronMag,i_kronMagErr,z_psfMag,z_psfMagErr,z_kronMag,z_kronMagErr,y_psfMag,y_psfMagErr,y_kronMag,y_kronMagErr,Norder,Dir,Npix,forcedSource
npartitions=166,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1
"Order: 5, Pixel: 32",int64[pyarrow],double[pyarrow],bool[pyarrow],bool[pyarrow],int64[pyarrow],bool[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],float[pyarrow],int64[pyarrow],int64[pyarrow],bool[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],float[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],uint8[pyarrow],uint64[pyarrow],uint64[pyarrow],"nested<forcedSourceId: [int64], parentObjectId..."
"Order: 7, Pixel: 544",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 35970",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 6, Pixel: 35971",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [None]:
# object_cat_nested.to_hats(os.path.join(hats_dir, "object_lc"))