In [7]:
import pystac
from google.cloud import storage
import subprocess

# --- Configuration ---
# The name of your GCS bucket
BUCKET_NAME = 'swhm_data'

# The "folder" or prefix where your STAC items are located
PREFIX = 'public/layers/raster/'

# The desired output filename for your catalog
OUTPUT_FILE = 'catalog.json'
# --- End Configuration ---



In [5]:
"""
    Scans a GCS bucket for STAC item JSON files and returns a list of their public URLs.
    """
def create_gcs_catalog():    # The base public URL for all items in the bucket.
    root_href = f"https://storage.googleapis.com/{BUCKET_NAME}/"

    print(f"🔍 Scanning gs://{BUCKET_NAME}/{PREFIX} for .json files...")
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(BUCKET_NAME, prefix=PREFIX)

    urls = []

    for blob in blobs:
        if blob.name.endswith('.json'):
            item_url = f"{root_href}{blob.name}"
            urls.append(item_url)

    print(f"✅ Found {len(urls)} STAC items.")
    return urls

In [6]:
create_gcs_catalog()

🔍 Scanning gs://swhm_data/public/layers/raster/ for .json files...
✅ Found 21 STAC items.


['https://storage.googleapis.com/swhm_data/public/layers/raster/Age_of_Imperviousness/Age_of_Imperviousness.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Flow_Duration_Index/Flow_Duration_Index.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/HSPF_Land_Cover_Type/HSPF_Land_Cover_Type.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Hydrologic_Response_Units/Hydrologic_Response_Units.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Imperviousness/Imperviousness.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Land_Cover/Land_Cover.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Land_Use/Land_Use.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Population_Density/Population_Density.json',
 'https://storage.googleapis.com/swhm_data/public/layers/raster/Precipitation_mm/Precipitation_mm.json',
 'https://storage.googleapis.com/swhm_dat

In [None]:

def create_gcs_catalog():
    """
    Scans a GCS bucket for STAC items and generates a root catalog with absolute URLs.
    """
    # The base public URL for all items in the bucket.
    root_href = f"https://storage.googleapis.com/{BUCKET_NAME}/"

    # 1. Initialize a new STAC Catalog
    root_catalog = pystac.Catalog(
        id='gcp-root-catalog',
        description='Root catalog for all STAC items stored on Google Cloud Storage.'
    )

    # 2. Connect to GCS and list the item files
    print(f"🔍 Scanning gs://{BUCKET_NAME}/{PREFIX} for .json files...")
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(BUCKET_NAME, prefix=PREFIX)

    item_count = 0
    for blob in blobs:
        # We only want to link to the item JSON files
        if blob.name.endswith('.json'):
            item_count += 1
            
            # Construct the full, public URL for the item
            item_url = f"{root_href}{blob.name}"
            print(item_url)
            print(f"  -> Adding link to: {item_url}")
            
            # 3. Add a link to the item in the catalog
            # This does not download the file, it just creates the link
            root_catalog.add_link(
                pystac.Link(rel="item",
                            target=item_url,
                            media_type="application/json")
            )

    if item_count == 0:
        print("⚠️ No .json files found. Please check your BUCKET_NAME and PREFIX.")
        return

    # 4. Save the catalog file
    # CatalogType.ABSOLUTE_PUBLISHED is best for catalogs with absolute online URLs.
    root_catalog.normalize_hrefs(root_href) # Sets the base for any relative links
    root_catalog.save(catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED,
                      dest_href=OUTPUT_FILE) 
    
    print(f"\n✅ Success! Catalog with {item_count} items saved to '{OUTPUT_FILE}'.")
    print(f"You should upload '{OUTPUT_FILE}' to gs://{BUCKET_NAME}/")



In [5]:
create_gcs_catalog()

🔍 Scanning gs://swhm_data/public/layers/raster/ for .json files...
https://storage.googleapis.com/swhm_data/public/layers/raster/Age_of_Imperviousness/Age_of_Imperviousness.json
  -> Adding link to: https://storage.googleapis.com/swhm_data/public/layers/raster/Age_of_Imperviousness/Age_of_Imperviousness.json
https://storage.googleapis.com/swhm_data/public/layers/raster/Flow_Duration_Index/Flow_Duration_Index.json
  -> Adding link to: https://storage.googleapis.com/swhm_data/public/layers/raster/Flow_Duration_Index/Flow_Duration_Index.json
https://storage.googleapis.com/swhm_data/public/layers/raster/HSPF_Land_Cover_Type/HSPF_Land_Cover_Type.json
  -> Adding link to: https://storage.googleapis.com/swhm_data/public/layers/raster/HSPF_Land_Cover_Type/HSPF_Land_Cover_Type.json
https://storage.googleapis.com/swhm_data/public/layers/raster/Hydrologic_Response_Units/Hydrologic_Response_Units.json
  -> Adding link to: https://storage.googleapis.com/swhm_data/public/layers/raster/Hydrologic_Res