In [9]:
import pystac
import geopandas as gpd
from shapely.geometry import mapping
from datetime import datetime, timezone
import os


In [5]:
fc_url = "https://storage.googleapis.com/swhm_data/public/cig_grid_wgs.geojson"
print(f"Reading data from {fc_url}...")
gdf = gpd.read_file(fc_url)


Reading data from https://storage.googleapis.com/swhm_data/public/cig_grid_wgs.geojson...


In [None]:

# Ensure the GeoDataFrame is not empty
if gdf.empty:
    print("Input GeoJSON is empty. Aborting.")
    return


In [6]:

# Extract spatial and temporal metadata
bounds = gdf.total_bounds
bbox = [bounds, bounds, bounds, bounds]
footprint_geom = mapping(gdf.unary_union.convex_hull)


  footprint_geom = mapping(gdf.unary_union.convex_hull)


In [7]:

# Use current time as a placeholder for the item's datetime
item_datetime = datetime.now(timezone.utc)

# --- Step 2: Create the root Catalog and Collection ---
# The Catalog is the top-level entry point
catalog_id = "vector-catalog-example"
catalog_description = "A catalog of example vector datasets."
catalog = pystac.Catalog(id=catalog_id, description=catalog_description)

# The Collection groups related items and holds shared metadata
collection_id = "national-parks-collection"
collection_description = "Boundaries of National Parks in the USA."
collection_license = "PDDL-1.0" # Public Domain Dedication and License

# Define the full extent of the collection
spatial_extent = pystac.SpatialExtent(bboxes=[bbox])
temporal_extent = pystac.TemporalExtent(intervals=[[item_datetime, None]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)


In [10]:
catalog = pystac.Catalog.from_file('https://storage.googleapis.com/swhm_data/public/layers/raster/catalog.json')
collection = pystac.Collection(
    id=collection_id,
    description=collection_description,
    extent=collection_extent,
    license=collection_license,
    title="National Parks"
)

# Add the collection as a child of the root catalog
catalog.add_child(collection)


In [12]:

# --- Step 3: Create the STAC Item ---
item_id = "cig_grid_wgs"

item_id

'cig_grid_wgs'

In [13]:

item = pystac.Item(
    id=item_id,
    geometry=footprint_geom,
    bbox=bbox,
    datetime=item_datetime,
    properties={}, # Custom properties can be added here
    collection=collection
)


In [14]:

# --- Step 4: Create the Asset and add Extensions ---
asset_href = os.path.abspath(fc_url)


In [15]:

# Create the main data asset
asset = pystac.Asset(
    href=asset_href,
    media_type='application/geo+json', # pystac provides common media types
    title="National Parks GeoJSON",
    roles=["data"]
)


In [16]:

# Enable and populate the Projection Extension
proj_ext = pystac.extensions.projection.ProjectionExtension.ext(asset, add_if_missing=False)
if gdf.crs:
    proj_ext.epsg = gdf.crs.to_epsg()


In [None]:

# Enable and populate the Table Extension to describe attributes
# table_ext = pystac.extensions.table.TableExtension.ext(asset, add_if_missing=False)
# columns = []
# for col_name, dtype in gdf.dtypes.items():
#     if col_name!= 'geometry': # Exclude the geometry column
#         columns.append(
#             {
# "name": col_name,
# "type": str(dtype),
# "description": f"Attribute column for {col_name}"
# }
#         )
# table_ext.columns = columns


In [18]:

# Add the fully described asset to the item
item.add_asset("GeoJSON_data", asset)


In [24]:

CATALOG_JSON_DEST = "https://storage.googleapis.com/swhm_data/public/layers/raster/"
catalog.normalize_hrefs(root_href=CATALOG_JSON_DEST)

catalog.describe()



* <Catalog id=swhm-catalog>
    * <Collection id=raster>
      * <Item id=Age_of_Imperviousness>
      * <Item id=Flow_Duration_Index>
      * <Item id=HSPF_Land_Cover_Type>
      * <Item id=Hydrologic_Response_Units>
      * <Item id=Imperviousness>
      * <Item id=Land_Cover>
      * <Item id=Land_Use>
      * <Item id=Population_Density>
      * <Item id=Precipitation_mm>
      * <Item id=Runoff_mm>
      * <Item id=Slope>
      * <Item id=Slope_Categories>
      * <Item id=Soils>
      * <Item id=Total_Copper_Concentration>
      * <Item id=Total_Kjeldahl_Nitrogen_Concentration>
      * <Item id=Total_Phosphorus_Concentration>
      * <Item id=Total_Suspended_Solids_Concentration>
      * <Item id=Total_Zinc_Concentration>
      * <Item id=Traffic>
      * <Item id=copper_concentration_ug_per_L>
    * <Collection id=national-parks-collection>


In [26]:

OUTPUT_DIR = "../../stac_catalog"
catalog.save(pystac.CatalogType.ABSOLUTE_PUBLISHED, dest_href=OUTPUT_DIR)
# --- Step 5: Save the static catalog ---
# Set the HREFs to be relative and save to the output directory
print(f"Saving catalog to {output_dir}...")
catalog.normalize_hrefs(output_dir)
catalog.save(catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED,dest_href=OUTPUT_DIR)
print("Catalog generation complete.")
catalog.describe()


TypeError: Object of type ndarray is not JSON serializable

In [None]:
#os.makedirs("./temp_data")
output_stac_dir = "./stac_output"

    # Run the creation process
#create_vector_stac_catalog("https://storage.googleapis.com/swhm_data/public/cig_grid_wgs.geojson", output_stac_dir)

In [None]:


if __name__ == '__main__':
    # Create dummy data for the example
    if not os.path.exists("./temp_data"):
        os.makedirs("./temp_data")
    
    dummy_fc_url = "./temp_data/national_parks.gpkg"
    d = {'name': ['Yellowstone', 'Yosemite'],
         'state': ['WY', 'CA'],
         'geometry': [gpd.points_from_xy(, ).buffer(0.5), 
                      gpd.points_from_xy([-1], [-2]).buffer(0.5)]}
    gdf = gpd.GeoDataFrame(d, crs="EPSG:4326")
    gdf.to_file(dummy_fc_url, driver="GPKG")

    # Define output directory
    output_stac_dir = "./stac_output"

    # Run the creation process
    create_vector_stac_catalog(dummy_fc_url, output_stac_dir)

In [None]:
    # --- Step 2: Create the root Catalog and Collection ---
catalog = pystac.Catalog(
        id="vector-catalog-example",
        description="A catalog of example vector datasets."
    )

def create_vector_stac_catalog(fc_url: str, output_dir: str):
    """
    Generates a STAC Catalog for a single GeoJSON vector file.

    Args:
        fc_url (str): The full path to the input GeoJSON file.
        output_dir (str): The directory where the STAC catalog will be saved.
    """
    # --- Step 1: Read vector data and extract core metadata ---
    print(f"Reading data from {fc_url}...")
    gdf = gpd.read_file(fc_url)

    if gdf.empty:
        print("Input GeoJSON is empty. Aborting.")
        return

    bounds = gdf.total_bounds
    bbox = [list(bounds)]
    footprint_geom = mapping(gdf.unary_union.convex_hull)
    item_datetime = datetime.now(timezone.utc)



    spatial_extent = pystac.SpatialExtent(bboxes=bbox)
    temporal_extent = pystac.TemporalExtent(intervals=[[item_datetime, None]])
    collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

    collection = pystac.Collection(
        id="national-parks-collection",
        description="Boundaries of National Parks in the USA.",
        extent=collection_extent,
        license="PDDL-1.0",
        title="National Parks"
    )

    catalog.add_child(collection)

    # --- Step 3: Create the STAC Item ---
    item_id = os.path.splitext(os.path.basename(fc_url))[0]

    item = pystac.Item(
        id=item_id,
        geometry=footprint_geom,
        bbox=list(bounds),
        datetime=item_datetime,
        properties={},
    )

    collection.add_item(item)

    # --- Step 4: Create the Asset and add Extensions ---
    asset_href = os.path.abspath(fc_url)
    asset = pystac.Asset(
        href=asset_href,
        media_type='application/geo+json',
        title="National Parks GeoJSON",
        roles=["data"]
    )

    # First, add the asset to the item
    item.add_asset("GeoJSON_data", asset)

    # Retrieve asset with owner set
    asset = item.assets["GeoJSON_data"]

    # Add projection extension
    proj_ext = projection.ProjectionExtension.ext(asset, add_if_missing=True)
    if gdf.crs:
        epsg = gdf.crs.to_epsg()
        if epsg:
            proj_ext.epsg = epsg

    # Add table extension
    table_ext = table.TableExtension.ext(asset, add_if_missing=True)
    columns = []
    for col_name, dtype in gdf.dtypes.items():
        if col_name != 'geometry':
            columns.append({
                "name": col_name,
                "type": str(dtype),
                "description": f"Attribute column for {col_name}"
            })
    table_ext.columns = columns

    # --- Step 5: Save the catalog ---
    print(f"Saving catalog to {output_dir}...")
    catalog.normalize_hrefs(output_dir)
    catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)
    print("Catalog generation complete.")
    catalog.describe()

In [32]:
from pystac.extensions import projection, table
output_stac_dir = "./stac_output"
create_vector_stac_catalog("https://storage.googleapis.com/swhm_data/public/cig_grid_wgs.geojson", output_stac_dir)

Reading data from https://storage.googleapis.com/swhm_data/public/cig_grid_wgs.geojson...
Saving catalog to ./stac_output...


  footprint_geom = mapping(gdf.unary_union.convex_hull)


Catalog generation complete.
* <Catalog id=vector-catalog-example>
    * <Collection id=national-parks-collection>
      * <Item id=cig_grid_wgs>
