# DRP Afterburner for Super HATS (DASH) pipeline

In [3]:
# Generic python packages
import os
import numpy as np
import pandas as pd
import astropy.units as u

# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler

# HATS import
import hats_import.pipeline as runner
from hats_import.catalog.arguments import ImportArguments

### Configure Butler

In [4]:
config = '/repo/main'
collections = 'LSSTComCam/runs/DRP/DP1/w_2025_03/DM-48478'
butler = dafButler.Butler(config, collections=collections)

In [5]:
out_dir = "/sdf/data/rubin/shared/lsdb_commissioning/DASH"
raw_dir = os.path.join(out_dir, "raw")
hats_dir = os.path.join(out_dir, "hats")

### Helper methods

In [6]:
def download_from_butler(dataset_type, out_dir):
    """Downloads dataset from butler"""
    count = 0
    out_path = os.path.join(out_dir, dataset_type)
    os.makedirs(out_path)
    # TODO: Remove slicing
    refs = butler.query_datasets(dataset_type)
    for i, ref in enumerate(refs):
        table = butler.get(dataset_type, dataId=ref.dataId)
        parquet_path = os.path.join(out_path, f"{i}.parquet")
        table.to_parquet(parquet_path)
        count += len(table)
    print(f"Saved {count} rows to {out_path}")
    
def download_visits(out_dir):
    """Downloads the visitTable for LSSTComCam"""
    visits = butler.get("visitTable", dataId={'instrument': 'LSSTComCam'})
    parquet_path = os.path.join(out_dir, "visits.parquet")
    visits.to_parquet(parquet_path)
    print(f"Saved {len(visits)} visits rows to {parquet_path}")
    return visits

def append_mag_and_magerr(df, flux_col_prefixes):
    """Calculate magnitudes and their errors for flux columns."""
    mag_cols = {}
    
    for prefix in flux_col_prefixes:
        # Magnitude
        flux = df[f"{prefix}Flux"]
        mag = u.nJy.to(u.ABmag, flux)
        mag_cols[f"{prefix}Mag"] = mag

        # Magnitude error, if flux error exists
        fluxErr_col = f"{prefix}FluxErr"
        if fluxErr_col in df.columns:
            fluxErr = df[fluxErr_col]
            upper_mag = u.nJy.to(u.ABmag, flux+fluxErr)
            lower_mag = u.nJy.to(u.ABmag, flux-fluxErr)
            magErr = -(upper_mag-lower_mag)/2
            mag_cols[f"{prefix}MagErr"] = magErr
        
    mag_df = pd.DataFrame(mag_cols, dtype=np.float64, index=df.index)
    return pd.concat([df, mag_df], axis=1)

### DIA Object

In [None]:
download_from_butler('diaObjectTable_tract', raw_dir)

### DIA Source

In [None]:
download_from_butler('diaSourceTable_tract', raw_dir)

### DIA Forced Source

In [None]:
download_from_butler('forcedSourceOnDiaObjectTable', raw_dir)

### Object

In [None]:
download_from_butler('objectTable', raw_dir)

### Source

In [None]:
download_from_butler('sourceTable', raw_dir)

### Forced Source

In [None]:
download_from_butler('forcedSourceTable', raw_dir)

### Import data to HATS

In [None]:
%pip install -q git+https://github.com/astronomy-commons/hats-import.git@main

In [7]:
from rubin_reader import RubinParquetReader

In [None]:
dataset_type = "diaObjectTable_tract"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaObject",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="ra",
    dec_column="dec",
    catalog_type="object",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=1,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

In [None]:
dataset_type = "diaSourceTable_tract"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="ra",
    dec_column="dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

In [None]:
dataset_type = "forcedSourceOnDiaObjectTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="diaForcedSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

In [None]:
cols_per_band = []
for band in list("ugrizy"):
    for flux_type in ["psf","kron"]:
        prefix = f"{band}_{flux_type}"
        cols_per_band.extend([f"{prefix}Flux", f"{prefix}FluxErr"])
    cols_per_band.append(f"{band}_kronRad")
    
obj_default_columns = [
    "refFwhm",
    "shape_flag",
    "sky_object",
    "parentObjectId",
    "detect_isPrimary",
    "x",
    "y",
    "xErr",
    "yErr",
    "shape_yy", 
    "shape_xx", 
    "shape_xy", 
    "coord_ra",
    "coord_dec", 
    "coord_raErr", 
    "coord_decErr",
    "tract", 
    "patch",
    "detect_isIsolated"
] + cols_per_band

dataset_type = "objectTable"

# TODO: read_columns isn't working

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="object",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type, read_columns=obj_default_columns),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="object",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

In [None]:
dataset_type = "sourceTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="source",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type),
    ra_column="ra",
    dec_column="dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)

In [None]:
visits = download_visits(raw_dir)
visit_map = visits[["expMidptMJD"]].T.to_dict('records')[0]

In [None]:
dataset_type = "forcedSourceTable"

args = ImportArguments(
    output_path=hats_dir,
    output_artifact_name="forcedSource",
    input_path=os.path.join(raw_dir, dataset_type),
    file_reader=RubinParquetReader(dataset_type=dataset_type, visit_map=visit_map),
    ra_column="coord_ra",
    dec_column="coord_dec",
    catalog_type="source",
    resume=False,
    pixel_threshold=300_000,
    dask_n_workers=4,
    dask_threads_per_worker=1,
)
runner.pipeline(args)