<a href="https://colab.research.google.com/github/ua-datalab/Geospatial_Workshops/blob/main/notebooks/STAC_crawl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## SpatioTemporal Asset Catalog (STAC)
This notebook demonstrates the use of pystac_client python library to crawl through and access geospatial assets from a STAC complient API.

In [None]:
# Install pystac_client. This library is used to crawl SpatioTemporal Asset Catalogs (STAC)
!pip install pystac_client --quiet
!pip install geopandas --quiet
!pip install folium --quiet

In [1]:
#Import the libraries into the current session

import pystac_client
import geopandas
import folium

In [2]:
catalog = pystac_client.Client.open(
    "https://stac.cyverse.org"
)

In [3]:
#Let's look at the collections within the root catalog
collections = list(catalog.get_collections())

# Print the number of collections
print(f"Number of collections in the base catalog: {len(collections)}")

# Print the names (or IDs) and descriptions of each collection
for collection in collections:
    print(f"ID: {collection.id}")

Number of collections in the base catalog: 1
ID: Open Forest Observatory


In [4]:
#Search the collection to find the number of items

search = catalog.search(collections=["Open Forest Observatory"])
items = search.item_collection()
len(items)

321

In [None]:
###Show the collection boundaries and the item geometries on a map

# Create a base folium map centered on [0, 0] (change as desired) 
m = folium.Map(location=[0, 0], zoom_start=2)

# Loop over each collection and add bounding boxes
for coll in collections:
    for bbox in coll.extent.spatial.bboxes:
        # bbox is typically [west, south, east, north]
        west, south, east, north = bbox

        # Format into a list of lat/lon pairs in Leaflet-friendly order: [lat, lon]
        coords = [
            [south, west],
            [south, east],
            [north, east],
            [north, west],
            [south, west]
        ]

        # Create a polygon for this bbox and add it to the map
        folium.Polygon(locations=coords, fill=False).add_to(m)

# Example style function that returns a style dictionary
def style_function(feature):
    return {
        "color": "orange",         # the outline color for polygons/lines
        "weight": 2,            # line/polygon edge thickness
        "fillOpacity": 0.0,     # fill opacity (0.0 fully transparent -> 1.0 fully opaque)
    }

for item in items:
    # item.geometry should be a valid GeoJSON geometry 
    # (polygon, multipolygon, etc.)
    if item.geometry:
        # Directly add GeoJSON geometry
        folium.GeoJson(
            item.geometry,
            style_function=style_function,
            tooltip=f"Item ID: {item.id}"
        ).add_to(m)


# Show the map
m



In [None]:
#Create a custom spatial and temporal filter to find items of interest

time_range = "2023-01-01/2024-12-31"
bbox = [-123.621, 38.32, -119.67, 40.293] #SW corner longitude/latitude ; NE corner longitude/latitude

In [9]:
#Search the collection to find imagery assets within my time-range and bounding box.

search = catalog.search(collections=["Open Forest Observatory"], bbox=bbox, datetime=time_range)
items = search.item_collection()
len(items)



167

In [None]:
# Show the filtered search on the map
m = folium.Map(location=[0, 0], zoom_start=2)

for item in items:
    # item.geometry should be a valid GeoJSON geometry 
    # (polygon, multipolygon, etc.)
    if item.geometry:
        # Directly add GeoJSON geometry
        folium.GeoJson(
            item.geometry,
            style_function=style_function,
            tooltip=f"Item ID: {item.id}"
        ).add_to(m)

# Show the map
m


In [None]:
catalog = pystac_client.Client.open(
    "https://stac.cyverse.org"
)

search = catalog.search(
    collections=["Open Forest Observatory"],
    # Force the API to include all properties
    fields={
        "include": ["properties", "geometry", "bbox", "assets", "links"],
        # or selectively include: "properties.platform", "properties.license", etc.
        "exclude": []
    }
)

items = list(search.item_collection())
print(f"Retrieved {len(items)} items")

# Now check if 'platform' or 'license' appear
for item in items:
    print(item.id, item.properties.get("platform"), item.properties.get("license"))




In [5]:
search = catalog.search(
    collections=["Open Forest Observatory"],
    fields={"include": ["properties", "geometry", "bbox", "assets", "links"], "exclude": []}
)
items = list(search.item_collection())
print(len(items), "items total.")

# Inspect the first itemâ€™s properties
if items:
    sample = items[0]
    for k, v in sample.properties.items():
        print(k, ":", v)


321 items total.
title : None
description : None
start_datetime : None
end_datetime : None
created : 2025-03-03T22:25:58.390568+00:00
updated : 2025-03-03T22:25:58.390568+00:00
platform : eBee X
instruments : None
constellation : None
mission : None
providers : None
gsd : None
datetime : 2024-05-17T20:03:07+00:00
license : CC-BY-SA-4.0
