# Generate JSON

Generate JSON with metadata for the collections to be published on `data.lsdb.io`.

In [None]:
import os
import json
import lsdb
import subprocess

from pathlib import Path

In [None]:
INSTRUMENT = os.environ["INSTRUMENT"]
RUN = os.environ["RUN"]
VERSION = os.environ["VERSION"]
COLLECTION = os.environ["COLLECTION"]
OUTPUT_DIR = Path(os.environ["OUTPUT_DIR"])

print(f"INSTRUMENT: {INSTRUMENT}")
print(f"RUN: {RUN}")
print(f"VERSION: {VERSION}")
print(f"COLLECTION: {COLLECTION}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

hats_dir = OUTPUT_DIR / "hats" / VERSION

In [None]:
def get_directory_size(path):
    SIZE_UNITS = {"G": "GiB", "M": "MiB", "K": "KiB", "T": "TiB"}
    result = subprocess.run(
        ["du", "-sh", path], capture_output=True, text=True, check=True
    )
    size_str = result.stdout.split("\t")[0]
    size_unit = size_str[-1]
    return f"{size_str[:-1]} {SIZE_UNITS[size_unit]}"


def generate_json(collection_name):
    collection_path = hats_dir / collection_name
    catalog = lsdb.read_hats(collection_path)
    return {
        "label": f"{VERSION}/{collection_name}",
        "name": f"{RUN} {VERSION} {collection_name}",
        "description": f"DRP/{RUN}/{VERSION}/{COLLECTION} {collection_name}",
        "urls": {"catalog": str(collection_path)},
        "other_urls": [
            {
                "label": "Column descriptions",
                "url": "https://sdm-schemas.lsst.io/imsim.html",
            },
            {
                "label": "Jira Ticket",
                "url": f"https://rubinobs.atlassian.net/browse/{COLLECTION}",
            },
        ],
        "metadata": {
            "numRows": len(catalog),
            "numColumns": len(catalog.all_columns),
            "numPartitions": len(catalog.get_healpix_pixels()),
            "sizeOnDisk": get_directory_size(collection_path),
        },
        "badges": ["Available only on USDF"],
    }

In [None]:
collections_json = [
    generate_json(f"{collection_name}_collection")
    for collection_name in ["dia_object", "object"]
]
with open(f"{VERSION}.json", "w") as file:
    json.dump(collections_json, file)