# Generate JSON

Generate JSON with metadata for the collections to be published on `data.lsdb.io`.

In [None]:
import os
import json
import lsdb
import subprocess

from pathlib import Path

In [None]:
INSTRUMENT = os.environ["INSTRUMENT"]
RUN = os.environ["RUN"]
VERSION = os.environ["VERSION"]
COLLECTION = os.environ["COLLECTION"]
OUTPUT_DIR = Path(os.environ["OUTPUT_DIR"])

print(f"INSTRUMENT: {INSTRUMENT}")
print(f"RUN: {RUN}")
print(f"VERSION: {VERSION}")
print(f"COLLECTION: {COLLECTION}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

hats_dir = OUTPUT_DIR / "hats" / VERSION

In [None]:
def get_directory_size(path):
    SIZE_UNITS = {"G": "GiB", "M": "MiB", "K": "KiB", "T": "TiB"}
    result = subprocess.run(
        ["du", "-sh", path], capture_output=True, text=True, check=True
    )
    size_str = result.stdout.split("\t")[0]
    size_unit = size_str[-1]
    return f"{size_str[:-1]} {SIZE_UNITS[size_unit]}"


def generate_json(catalog_path):
    catalog = lsdb.read_hats(catalog_path)
    return {
        "label": f"{VERSION}/{catalog.name}",
        "name": f"{RUN} {VERSION} {catalog.name}",
        "description": f"DRP/{RUN}/{VERSION}/{COLLECTION} {catalog.name}",
        "urls": {"catalog": str(catalog_path)},
        "other_urls": [
            {
                "label": "Column descriptions",
                "url": "https://sdm-schemas.lsst.io/imsim.html",
            },
            {
                "label": "Jira Ticket",
                "url": f"https://rubinobs.atlassian.net/browse/{COLLECTION}",
            },
        ],
        "metadata": {
            "numRows": len(catalog),
            "numColumns": len(catalog.columns),
            "numPartitions": len(catalog.get_healpix_pixels()),
            "sizeOnDisk": get_directory_size(catalog_path),
        },
        "badges": ["Available only on USDF"],
    }

In [None]:
collections_json = []
subproducts_json = []

for collection_name in ["dia_object_collection", "object_collection"]:
    collection_path = hats_dir / collection_name
    base_catalog_name = str(collection_name.replace("collection", "lc"))
    base_catalog_path = collection_path / base_catalog_name

    # Add main rubin collections
    collections_json.append(generate_json(base_catalog_path))

    # As well as other crossmatch subproducts
    ps1_xmatch_json = generate_json(f"{base_catalog_path}_x_ps1")
    ztf_xmatch_json = generate_json(f"{base_catalog_path}_x_ztf_dr22")
    subproducts_json.extend([ps1_xmatch_json, ztf_xmatch_json])

with open(f"{VERSION}.json", "w") as file:
    json.dump(collections_json + subproducts_json, file)