## Sentinel2 data extraction with STAC

In [None]:
import numpy as np
import xarray as xr
import stackstac
import pystac_client

import hvplot.xarray
import warnings
import panel as pn

In [None]:
warnings.filterwarnings("ignore")

### Create a Dask cluster

We're going to process a large amount of data. To cut down on the execution time, we'll use a Dask cluster to do the computation in parallel, adaptively scaling to add and remove workers as needed. See [Scale With Dask](../quickstarts/scale-with-dask.ipynb) for more on using Dask.

In [None]:
from dask.distributed import Client

In [None]:
client = Client()

In [None]:
client

In [None]:
#client.close();cluster.shutdown()

### Discover data

In this example, we define our area of interest as a GeoJSON object. 

In [None]:
bbox_lonlat = [40.09, -2.98, 40.61, -2.46]   #africa
bbox_lonlat = [151.2957545, -33.7390216, 151.312234, -33.7012561] # AUS, coastsat

In [None]:
cloud_max = 60

Using `pystac_client` we can search STAC endpoints for items matching our query parameters.

#### Try AWS Sentinel2 data

In [None]:
stac_api_endpoint = 'https://earth-search.aws.element84.com/v0'
stac = pystac_client.Client.open(stac_api_endpoint)

##### Each API endpoint has a different collection name for the Sentinel2 level 2a data

In [None]:
for collection in stac.get_all_collections():
    print(collection)

In [None]:
search = stac.search(
    bbox=bbox_lonlat,
    datetime="2017-12-01/2018-01-01",
    collections=["sentinel-s2-l2a-cogs"],
    limit=500,  # fetch items in batches of 500
    query={"eo:cloud_cover": {"lt": cloud_max}},
)

items = list(search.get_items())
print(len(items))

In [None]:
items_as_dict = [item.to_dict() for item in items]

In [None]:
da = (
    stackstac.stack(
        items_as_dict,
        bounds_latlon = bbox_lonlat,
        assets=["B04", "B03", "B02"],  # red, green, blue
        chunksize=4096,    
        resolution=10,
    )
    .where(lambda x: x > 0, other=np.nan)  # sentinel-2 uses 0 as nodata
#    .assign_coords(band=lambda x: x.common_name.rename("band"))  # use common names
)
da

In [None]:
drop_coords = [x for x in list(da.coords) if not x in ['x','y','time','band']]

In [None]:
da = da.drop_vars(drop_coords)

In [None]:
dmean = float(da.mean())
dstd = float(da.std())
vmin = max(dmean - 2*dstd,0)
vmax = dmean + 2*dstd

In [None]:
da2 = da/vmax

In [None]:
da2.hvplot.rgb(x='x', y='y',  bands='band', crs=32756, rasterize=True, 
                 frame_width=200, widgets={'time': pn.widgets.Select})