# Generate weekly JSON

Generate JSON with metadata for the weekly to be published on `data.lsdb.io`.

In [1]:
import os
import json
import lsdb
import subprocess

from pathlib import Path

In [2]:
DRP_VERSION = os.environ["DRP_VERSION"]
COLLECTION_TAG = os.environ["COLLECTION_TAG"]
print(f"DRP_VERSION: {DRP_VERSION}")
print(f"COLLECTION_TAG: {COLLECTION_TAG}")
base_output_dir = Path(f"/sdf/data/rubin/shared/lsdb_commissioning")
hats_dir = base_output_dir / "hats" / DRP_VERSION

In [3]:
def get_directory_size(path):
    SIZE_UNITS = {"G": "GiB", "M": "MiB", "K": "KiB", "T": "TiB"}
    result = subprocess.run(
        ["du", "-sh", path], capture_output=True, text=True, check=True
    )
    size_str = result.stdout.split("\t")[0]
    size_unit = size_str[-1]
    return f"{size_str[:-1]} {SIZE_UNITS[size_unit]}"


def generate_json(catalog_name, margin_catalog_name=None):
    catalog_path = hats_dir / catalog_name
    catalog = lsdb.read_hats(catalog_path)

    margin_catalog = {}

    if margin_catalog_name is not None:
        margin_catalog = {"margin_catalog": f"{hats_dir / margin_catalog_name}"}

    return {
        "label": f"{DRP_VERSION}/{catalog_name}",
        "name": f"DP1 {DRP_VERSION} {catalog_name}",
        "description": f"DRP/DP1/{DRP_VERSION}/{COLLECTION_TAG} {catalog_name}",
        "urls": {"catalog": f"{hats_dir / catalog_name}", **margin_catalog},
        "other_urls": [
            {
                "label": "Column descriptions",
                "url": "https://sdm-schemas.lsst.io/imsim.html",
            },
            {
                "label": "Jira Ticket",
                "url": f"https://rubinobs.atlassian.net/browse/{COLLECTION_TAG}",
            },
        ],
        "metadata": {
            "numRows": len(catalog),
            "numColumns": len(catalog.columns),
            "numPartitions": len(catalog.get_healpix_pixels()),
            "sizeOnDisk": get_directory_size(catalog_path),
        },
        "badges": ["Available only on USDF"],
    }

In [None]:
nested_catalogs_json = [
    generate_json("dia_object_lc", "dia_object_lc_5arcs"),
    generate_json("object_lc", "object_lc_5arcs"),
]
other_catalogs_json = [
    generate_json(catalog)
    for catalog in [
        # "dia_object_lc_x_ztf_dr22",
        "dia_object_lc_x_ps1",
        # "object_lc_x_ztf_dr22",
        "object_lc_x_ps1",
    ]
]
weekly_json = nested_catalogs_json + other_catalogs_json

with open(f"{DRP_VERSION}.json", "w") as file:
    json.dump(weekly_json, file)