In [58]:
import pystac
from pathlib import Path
import shutil
import shlex
import subprocess
OUTPUT_DIR = '../../stac_catalog'

In [44]:
col1 = pystac.Collection.from_file('../../stac_catalog/raster/collection.json')
col2 = pystac.Collection.from_file('../../stac_catalog/vector/collection.json')

In [45]:

catalog = pystac.Catalog(id="main-catalog", description="Tutorial catalog.")


In [46]:
catalog.add_children([col1,col2])

[<Link rel=child target=<Collection id=raster>>,
 <Link rel=child target=<Collection id=vector>>]

In [47]:
col1.describe()

* <Collection id=raster>
  * <Item id=Age_of_Imperviousness>
  * <Item id=Flow_Duration_Index>
  * <Item id=HSPF_Land_Cover_Type>
  * <Item id=Hydrologic_Response_Units>
  * <Item id=Imperviousness>
  * <Item id=Land_Cover>
  * <Item id=Land_Use>
  * <Item id=Population_Density>
  * <Item id=Precipitation_mm>
  * <Item id=Runoff_mm>
  * <Item id=Slope>
  * <Item id=Slope_Categories>
  * <Item id=Soils>
  * <Item id=Total_Copper_Concentration>
  * <Item id=Total_Kjeldahl_Nitrogen_Concentration>
  * <Item id=Total_Phosphorus_Concentration>
  * <Item id=Total_Suspended_Solids_Concentration>
  * <Item id=Total_Zinc_Concentration>
  * <Item id=Traffic>
  * <Item id=copper_concentration_ug_per_L>


In [48]:
catalog.normalize_hrefs(root_href="https://storage.googleapis.com/swhm_data/public/layers/")
#catalog.set_self_href("https://storage.googleapis.com/swhm_data/public/layers/catalog.json")
catalog.save(catalog_type="ABSOLUTE_PUBLISHED", dest_href=OUTPUT_DIR)

# Upload all jsons to GCP 

In [57]:
def upload_stac_assets(root_dir, bucket, prefix, dry_run=False, return_summary=False):
    """
    Finds and uploads STAC asset JSON files to Google Cloud Storage.

    - Uploads all .json files in the root directory (regardless of name).
    - Uploads .json files in subdirectories that match the directory name (e.g. 'foo/foo.json').

    Args:
        root_dir (str): Absolute or relative path to the root directory.
        bucket (str): GCS bucket name.
        prefix (str): Path prefix within the GCS bucket.
        dry_run (bool): If True, simulate uploads without executing them.
        return_summary (bool): If True, return a summary dictionary of results.

    Returns:
        dict (optional): Summary of uploaded, skipped, and failed files.
    """
    root_path = Path(root_dir).resolve()

    if not root_path.is_dir():
        print(f"ERROR: {root_path} is not a valid directory.")
        return

    if not shutil.which("gsutil"):
        print("ERROR: 'gsutil' command not found in PATH.")
        return

    if not prefix.endswith("/"):
        prefix += "/"

    print(f"Scanning: {root_path}")
    print(f"Uploading to: gs://{bucket}/{prefix}")
    print("-" * 40)

    uploaded = []
    skipped = []
    failed = []

    # Helper: Upload a single file
    def upload_file(file_path):
        relative_path = file_path.relative_to(root_path)
        gcs_path = f"gs://{bucket}/{prefix}{relative_path.as_posix()}"

        print(f"Found: {file_path}")
        print(f"  -> GCS Path: {gcs_path}")

        if dry_run:
            print(f"  -> DRY RUN: Skipping actual upload.")
            skipped.append(str(file_path))
            return

        try:
            gc_cmd = ["gsutil", "cp", str(file_path), gcs_path]
            print(f"  -> Executing: {' '.join(shlex.quote(arg) for arg in gc_cmd)}")
            subprocess.run(gc_cmd, capture_output=True, text=True, check=True)
            print("  -> Upload successful!")
            uploaded.append(str(file_path))
        except subprocess.CalledProcessError as e:
            print(f"  -> ERROR: Upload failed for {file_path}")
            print(f"     stderr: {e.stderr}")
            failed.append((str(file_path), e.stderr))

        print("-" * 40)

    # Upload all .json files in root directory
    for json_file in root_path.glob("*.json"):
        upload_file(json_file)

    # Upload matching <dir>/<dir>.json files in subdirectories
    for dir_path in root_path.rglob("*"):
        if dir_path.is_dir():
            expected_json = dir_path / f"{dir_path.name}.json"
            if expected_json.exists():
                upload_file(expected_json)

    if return_summary:
        return {
            "uploaded": uploaded,
            "skipped": skipped,
            "failed": failed,
        }

In [59]:
#  Google Cloud Storage bucket and prefix (folder) where COGs are located.
GCS_BUCKET = "swhm_data"  # e.g., "my-imagery-bucket"
GCS_PREFIX = "public/layers/"   # e.g., "sentinel-2/l2a/" or leave empty for root
upload_stac_assets(bucket=GCS_BUCKET,prefix=GCS_PREFIX,root_dir='../../stac_catalog/')

Scanning: /Users/christiannilsen/Documents/repos/swmh-stac-catalog/catalog/stac_catalog
Uploading to: gs://swhm_data/public/layers/
----------------------------------------
Found: /Users/christiannilsen/Documents/repos/swmh-stac-catalog/catalog/stac_catalog/catalog.json
  -> GCS Path: gs://swhm_data/public/layers/catalog.json
  -> Executing: gsutil cp /Users/christiannilsen/Documents/repos/swmh-stac-catalog/catalog/stac_catalog/catalog.json gs://swhm_data/public/layers/catalog.json
  -> Upload successful!
----------------------------------------
Found: /Users/christiannilsen/Documents/repos/swmh-stac-catalog/catalog/stac_catalog/raster/Traffic/Traffic.json
  -> GCS Path: gs://swhm_data/public/layers/raster/Traffic/Traffic.json
  -> Executing: gsutil cp /Users/christiannilsen/Documents/repos/swmh-stac-catalog/catalog/stac_catalog/raster/Traffic/Traffic.json gs://swhm_data/public/layers/raster/Traffic/Traffic.json
  -> Upload successful!
----------------------------------------
Found: /U