In [9]:
from pystac_client import Client
from pystac.extensions.eo import EOExtension as eo
import planetary_computer as pc
import geopandas as gpd
import rasterio
from rasterio import warp, windows


In [39]:
def fetch_s2_hrefs(catalog, aoi, time_range, max_nodata=20, max_cloud=0.5, n_scenes=15):
    search = catalog.search(
        collections=["sentinel-2-l2a"],
        bbox=aoi.bounds,
        datetime=time_range,
        query={
            "s2:nodata_pixel_percentage": {"lt": max_nodata},
            "s2:high_proba_clouds_percentage": {"lt": max_cloud}
        }
    )

    # find items satisfying query
    items = list(search.get_items())
    if len(items) == 0:
        raise ValueError("No items satisfy query!")
    items = sorted(items, key=lambda z: z.properties["eo:cloud_cover"])[:n_scenes]

    # for each item, get hrefs to each band
    links, properties = {}, {}
    for item in items:
        bands = {}
        for k, v in item.assets.items():
            if k.startswith("B"):
                bands[k] = pc.sign(v.href)
        id = item.properties["s2:product_uri"]
        properties[id] = item.properties
        links[id] = bands

    return properties, links


def write_rasters(scenes, aoi):
    for id, bands in scenes.items():
        write_bands(id, bands, aoi)

def write_bands(id, bands, aoi):
    one_band = next(iter(bands.values()))
    meta = rasterio.open(one_band).meta
    meta["count"] = len(bands)
    
    aoi_bounds = warp.transform_bounds("epsg:4326", meta["crs"], *aoi.bounds)
    aoi_window = windows.from_bounds(transform=meta["transform"], *aoi_bounds)
    with rasterio.open(f"{id}.tif", "w", **meta) as dst:
        for i, (k, v) in enumerate(bands.items()):
            dst.write_band(i + 1, rasterio.open(v).read(window=aoi_window).squeeze())
            dst.set_band_description(i + 1, k)

In [68]:
import pandas as pd

catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
lakes = gpd.read_file("../../data/GL_3basins_2015.shp")
aoi = lakes.geometry.envelope.scale(5, 5)[3]
properties, links = fetch_hrefs(catalog, aoi, "2019-01-01/2020-01-01")
write_rasters(links, aoi)

properties = pd.DataFrame.from_records(properties).T
properties.to_csv("properties.csv")