In [102]:
from pystac_client import Client
import pystac
from shapely.geometry import shape
from shapely.ops import unary_union
import pandas as pd
import geopandas as gpd
from datetime import datetime

In [48]:
# from https://gis.stackexchange.com/a/419241
from sklearn.cluster import AgglomerativeClustering

def cluster_shapes_by_distance(geodf, distance):
    """
    Make groups for all shapes within a defined distance. For a shape to be 
    excluded from a group, it must be greater than the defined distance
    from *all* shapes in the group.
    Distances are calculated using shape centroids.

    Parameters
    ----------
    geodf : data.frame
        A geopandas data.frame of polygons. Should be a projected CRS where the
        unit is in meters. 
    distance : float
        Maximum distance between elements. In meters.

    Returns
    -------
    np.array
        Array of numeric labels assigned to each row in geodf.

    """
    assert geodf.crs.is_projected, 'geodf should be a projected crs with meters as the unit'
    centers = [p.centroid for p in geodf.geometry]
    centers_xy = [[c.x, c.y] for c in centers]
    
    cluster = AgglomerativeClustering(n_clusters=None, 
                                      linkage='single',
                                      affinity='euclidean',
                                      distance_threshold=distance)
    cluster.fit(centers_xy)
    
    return cluster.labels_

### Pull all items from the catalog, this can take a few minutes to parse

In [25]:
umbra_catalog = pystac.read_file("https://s3.us-west-2.amazonaws.com/umbra-open-data-catalog/stac/catalog.json")
items = list(umbra_catalog.get_all_items())

In [111]:
item_gdf = gpd.GeoDataFrame(
    [dict(
        id=i.id, 
        datetime=i.properties['datetime'], 
        geometry=shape(i.geometry)) 
     for i in items],
    geometry='geometry',
    crs='EPSG:4326'
)
item_gdf['datetime'] = pd.to_datetime(item_gdf['datetime'].str.strip('Z'), format='ISO8601')



### Make groupings of nearby scenes, and make a centroid of those scenes along with some properties

In [112]:
item_gdf['nearby_group'] = cluster_shapes_by_distance(item_gdf.to_crs('EPSG:4087'), distance=500)

group_points = item_gdf.groupby('nearby_group').agg(
    n_scenes = ('id','count'),
    date_range = pd.NamedAgg('datetime', aggfunc=lambda x: '{} - {}'.format(x.min().date(), x.max().date())),
    geometry = pd.NamedAgg(column='geometry', aggfunc = lambda x: unary_union(x.values).centroid)
).reset_index(drop=True)
# Put this back into a geodataframe
group_points = gpd.GeoDataFrame(group_points, geometry='geometry', crs='EPSG:4326')

### Plot the map

In [117]:
# This needs to be a str again for plotting
item_gdf['datetime'] = item_gdf['datetime'].astype(str)

m = item_gdf.explore()
group_points.explore(m=m,column='n_scenes', marker_kwds=dict(radius=8, fill=True, opacity=1))
m