In [1]:
import pystac_client
import pystac
import odc.stac
import geopandas
import xarray 
import rioxarray
import planetary_computer
import pathlib
import pandas
import numpy
import folium
import plotly.express
import cartopy 
import dask.distributed
import branca.element, branca.colormap # Remove whitespace around small folium map
import bokeh
import hvplot.xarray


In [2]:
def geopandas_bounds_to_plot(dataframe, crs=4326):
    """ Changing bounding box representation to leaflet notation ``(lon1, lat1, lon2, lat2) -> ((lat1, lon1), (lat2, lon2))`` """
    x1, y1, x2, y2 = dataframe.to_crs(crs).total_bounds
    return ((y1, x1), (y2, x2))

In [3]:
def update_raster_defaults(raster):
    # works on DataArrays and Datasets and for ints and floats
    if isinstance(raster, xarray.Dataset):
        for key in raster.data_vars:
            raster[key].rio.write_crs(raster[key].rio.crs, inplace=True)
            if raster[key].data.dtype == 'uint16':
                raster[key].rio.write_nodata(0, encoded=True, inplace=True)
            else: # assume float
                raster[key].rio.write_nodata(numpy.nan, encoded=True, inplace=True)
    raster.rio.write_crs(raster.rio.crs, inplace=True)
    if isinstance(raster, xarray.DataArray):
        if raster.data.dtype == 'uint16':
            raster.rio.write_nodata(0, encoded=True, inplace=True)
        else: # assume float
            raster.rio.write_nodata(numpy.nan, encoded=True, inplace=True)

In [4]:
def normalise_rgb(data):
    rgb_normalize = data.copy(deep=True)
    if rgb_normalize.data.dtype == 'uint16':
        rgb_normalize = rgb_normalize.astype("float32").where(rgb_normalize != 0, numpy.nan)
    for colour in ["red", "green", "blue"]:
        colour_slice = rgb_normalize.loc[colour]
        rgb_normalize.loc[colour] = ((colour_slice - colour_slice.min()) / (colour_slice.max() - colour_slice.min()))
    return rgb_normalize

In [5]:
def plot_rgb(data):
    #data = normalise_rgb(data)
    plot = data.hvplot.rgb(x='x', y='y', bands='rgb', data_aspect=1, xaxis=False, yaxis=None, subplots=True, by='time', robust=True) # , flip_yaxis=True
    return plot

In [6]:
def plot_layer(layer):
    plot = layer.hvplot.image(x='x', y='y', data_aspect=1, subplots=True, by='time', coastline="10m") # , flip_yaxis=True
    return plot

# Dask for performance

In [7]:
client = dask.distributed.Client()
odc.stac.configure_rio(cloud_defaults=True, client=client)
display(client)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 62955 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:62955/status,

0,1
Dashboard: http://127.0.0.1:62955/status,Workers: 4
Total threads: 8,Total memory: 31.73 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:62960,Workers: 4
Dashboard: http://127.0.0.1:62955/status,Total threads: 8
Started: Just now,Total memory: 31.73 GiB

0,1
Comm: tcp://127.0.0.1:62992,Total threads: 2
Dashboard: http://127.0.0.1:62993/status,Memory: 7.93 GiB
Nanny: tcp://127.0.0.1:62963,
Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-i70y12j8,Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-i70y12j8

0,1
Comm: tcp://127.0.0.1:62986,Total threads: 2
Dashboard: http://127.0.0.1:62989/status,Memory: 7.93 GiB
Nanny: tcp://127.0.0.1:62967,
Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-mcgpluux,Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-mcgpluux

0,1
Comm: tcp://127.0.0.1:62982,Total threads: 2
Dashboard: http://127.0.0.1:62983/status,Memory: 7.93 GiB
Nanny: tcp://127.0.0.1:62970,
Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-1v7i1679,Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-1v7i1679

0,1
Comm: tcp://127.0.0.1:62985,Total threads: 2
Dashboard: http://127.0.0.1:62987/status,Memory: 7.93 GiB
Nanny: tcp://127.0.0.1:62972,
Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-dql71ozs,Local directory: C:\Users\PEARSO~1\AppData\Local\Temp\dask-scratch-space\worker-dql71ozs


# Path setup for data within the repository

In [8]:
data_path = pathlib.Path.cwd() / ".." / "data"
crs_wsg = 4326
crs = 2193
name = "waikouaiti"
filter_cloud_percentage = 30
ocean_cloud_percentage = 10
date_format = "%Y-%m-%d"
(data_path / "rasters" / name).mkdir(parents=True, exist_ok=True)

# STAC band names
* information on Planetary Computer Catalogue - https://stacindex.org/catalogs/microsoft-pc#/
* Information on the Copernicus DEM's - https://object.cloud.sdsc.edu/v1/AUTH_opentopography/www/metadata/Copernicus_metadata.pdf
* Good example notebook working with Coperni# STAC band names
* information on Planetary Computer Catalogue - https://stacindex.org/catalogs/microsoft-pc#/
   * [ Sentinel-2 Level-2A ](https://stacindex.org/catalogs/microsoft-pc#/43bjKKcJQfxYaT1ir3Ep6uENfjEoQrjkzhd2)
* Information on the Copernicus DEM's - https://object.cloud.sdsc.edu/v1/AUTH_opentopography/www/metadata/Copernicus_metadata.pdf
* Good example notebook working with Copernicus Planteary Computer DEM [link](https://github.com/microsoft/PlanetaryComputerExamples/blob/main/datasets/copernicus-dem/copernicus-dem-example.ipynb)
* Notes on baseline change - may need to update normalisation - https://github.com/microsoft/PlanetaryComputerExamples/blob/main/datasets/sentinel-2-l2a/baseline-change.ipynbcus Planteary Computer DEM [link](https://github.com/microsoft/PlanetaryComputerExamples/blob/main/datasets/copernicus-dem/copernicus-dem-example.ipynb)

In [9]:
catalogue = {"url": "https://planetarycomputer.microsoft.com/api/stac/v1",
             "collections": {"sentinel": "sentinel-2-l2a", "dem": "cop-dem-glo-30"}}
bands = ["red", "green", "blue", "nir", "SCL", "swir16", "B05", "B8A"] # Band 05 - Vegetation red edge 1, Band 8A - Vegetation red edge 4
raster_defaults = {"resolution": 10, "nodata": 0, "dtype": "uint16"}
scl_dict = {"no data": 0, "defective": 1, "cast shadow": 2, "cloud shadow": 3,
            "vegetation": 4, "not vegetated": 5, "water": 6, "unclassified": 7,
            "cloud medium probability": 8, "cloud high probability": 9,
            "thin cirrus": 10, "snow": 11}

In [10]:
# use publically available stac link such as
odc.stac.configure_rio(cloud_defaults=True, aws={"aws_unsigned": True})
client = pystac_client.Client.open(catalogue["url"], modifier=planetary_computer.sign_inplace) 

# Geometry of AOI
geometry_df = geopandas.read_file(data_path / "vectors" / f"{name}.gpkg")
geometry = geometry_df.to_crs(crs_wsg).iloc[0].geometry
land = geopandas.read_file(data_path / "vectors" / f"main_islands.gpkg")

# Query for the data

In [11]:
date_YYMM = "2020-05" # "2021-12" "2017-02"
filters = {"eo:cloud_cover":{"lt":filter_cloud_percentage}}
search_sentinel = client.search(
    collections=[catalogue["collections"]["sentinel"]], intersects=geometry, datetime=date_YYMM, query=filters
) 
search_dem = client.search(collections=[catalogue["collections"]["dem"]], intersects=geometry) 

pandas.DataFrame.from_records(search_sentinel.item_collection_as_dict()['features'])

Unnamed: 0,id,bbox,type,links,assets,geometry,collection,properties,stac_extensions,stac_version
0,S2A_MSIL2A_20200531T223721_R072_T59GMK_2020091...,"[169.70489273664208, -46.139752512926016, 171....",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[171.1178...",sentinel-2-l2a,"{'datetime': '2020-05-31T22:37:21.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
1,S2A_MSIL2A_20200528T222551_R029_T59GMK_2020091...,"[169.70489273664208, -46.14152638246479, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[169.7274...",sentinel-2-l2a,"{'datetime': '2020-05-28T22:25:51.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
2,S2B_MSIL2A_20200526T223709_R072_T59GMK_2020091...,"[169.70489273664208, -46.13980900577084, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[171.1242...",sentinel-2-l2a,"{'datetime': '2020-05-26T22:37:09.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
3,S2A_MSIL2A_20200518T222551_R029_T59GMK_2020091...,"[169.70489273664208, -46.14152638246479, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[169.7274...",sentinel-2-l2a,"{'datetime': '2020-05-18T22:25:51.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
4,S2B_MSIL2A_20200516T223709_R072_T59GMK_2020090...,"[169.70489273664208, -46.13981903229499, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[171.1242...",sentinel-2-l2a,"{'datetime': '2020-05-16T22:37:09.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
5,S2A_MSIL2A_20200508T222551_R029_T59GMK_2020092...,"[169.70489273664208, -46.14152638246479, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[169.7221...",sentinel-2-l2a,"{'datetime': '2020-05-08T22:25:51.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0
6,S2B_MSIL2A_20200503T222539_R029_T59GMK_2020092...,"[169.70489273664208, -46.14152638246479, 171.1...",Feature,"[{'rel': 'collection', 'type': 'application/js...",{'AOT': {'href': 'https://sentinel2l2a01.blob....,"{'type': 'Polygon', 'coordinates': [[[169.7274...",sentinel-2-l2a,"{'datetime': '2020-05-03T22:25:39.024000Z', 'p...",[https://stac-extensions.github.io/eo/v1.0.0/s...,1.0.0


# Optional 
### Information about the catalogues

In [None]:
collections = list(client.get_collections())
print(f"Number of collections: {len(collections)}")
print("Collections IDs:")
for collection in collections:
    if "dem" in collection.id.lower():
        print(f"- {collection.id}")
search_sentinel.item_collection()

### Call below if you want to load a DEM
Currently DEM is not used

In [None]:
signed_asset = planetary_computer.sign(list(search_dem.items())[0].assets["data"])
dem = rioxarray.open_rasterio(signed_asset.href).squeeze().drop_vars("band")
dem.to_netcdf(data_path / "rasters" / name / "dem.nc")

# Download and constuct Kelp layer

In [12]:
data = odc.stac.load(search_sentinel.items(), geopolygon=geometry, bands=bands, chunks={}, groupby="solar_day",
                     resolution = raster_defaults["resolution"], dtype=raster_defaults["dtype"], nodata=raster_defaults["nodata"],
                     patch_url=planetary_computer.sign)

## Display RGB & SCL 
Display prior to filtering our dates

In [None]:
rgb = data[["red", "green","blue"]].to_array("rgb", name="all images")
update_raster_defaults(rgb)
scl_plot = plot_layer(data["SCL"])
rgb_plot = plot_rgb(rgb)
scl_plot + rgb_plot

## Remove any dates with no valid data or much cloud over the ocean

In [None]:
data["SCL"].load()
data["SCL"] = data["SCL"].rio.clip(land.to_crs(data["SCL"].rio.crs).geometry.values, invert=True)
data["SCL"].rio.write_crs(data["SCL"].rio.crs, inplace=True);
data = data.isel(time=(data["SCL"] != scl_dict["no data"]).any(dim=["x", "y"])); # 0 == no SCL data

ocean_mask = data["SCL"].isel(time=0).copy(deep=True)
ocean_mask.data[:] = 1
ocean_mask = ocean_mask.rio.clip(land.to_crs(ocean_mask.rio.crs).geometry.values, invert=True)
# Mask by time - initially sums of cloud values then true / false by time if less than cloud threshold
cloud_mask = (data["SCL"] == scl_dict["cloud high probability"]).sum(dim=["x", "y"]) 
cloud_mask += (data["SCL"] == scl_dict["cloud medium probability"]).sum(dim=["x", "y"]) 
cloud_mask += (data["SCL"] == scl_dict["cloud shadow"]).sum(dim=["x", "y"]) 
cloud_mask += (data["SCL"] == scl_dict["cast shadow"]).sum(dim=["x", "y"]) 
cloud_mask += (data["SCL"] == scl_dict["thin cirrus"]).sum(dim=["x", "y"])
cloud_mask += (data["SCL"] == scl_dict["defective"]).sum(dim=["x", "y"])
cloud_mask += (data["SCL"] == scl_dict["no data"]).sum(dim=["x", "y"]) - (ocean_mask == scl_dict["no data"]).sum(dim=["x", "y"])
print(f"Ocean cloud percentage {list(map('{:.2f}%'.format,(cloud_mask / int(ocean_mask.sum())).data*100))}")
cloud_mask_time = (cloud_mask / int(ocean_mask.sum())) < (ocean_cloud_percentage / 100)
data = data.isel(time=(cloud_mask_time));

In [None]:
rgb = data[["red", "green","blue"]].to_array("rgb", name="all images")
update_raster_defaults(data)
rgb.to_netcdf(data_path / "rasters" / name / f'rgb_2_{date_YYMM}.nc', format="NETCDF4", engine="netcdf4")

# Display RBG and SCL

In [None]:
scl_plot = plot_layer(data["SCL"])
rgb_plot = plot_rgb(rgb)
scl_plot + rgb_plot

# Caclulate kelp and plot
* Caclulate derived indices
* Mask out non-kelp areas

In [None]:
for key in data.data_vars:
    if key == "SCL": 
        continue
    data[key] = data[key].astype("float32").where(data[key] != 0, numpy.nan)
update_raster_defaults(data)
data["ndvi"] = (data.nir - data.red) / (data.nir + data.red)
data["ndwi"] = (data.green - data.nir)/(data.green + data.nir)
data["ndvri"] = (data.B05 - data.red)/(data.B05 + data.red);
data["ndwi2"] = (data.swir16 + data.B05) / (data.swir16 - data.B05)
update_raster_defaults(data)

In [None]:
data.to_netcdf(data_path / "rasters" / name / f'all_bands_{date_YYMM}.nc', format="NETCDF4", engine="netcdf4")

In [None]:
thresholds = {"min_ndvi": 0.03, "max_ndvi": 0.7, "max_ndwi": 0.1, "min_ndvri": 0.03, "max_ndwi2": -0.2,}
#data["kelp"] = data["kelp"].where(mask, numpy.nan) <= nan set where the mask values are false
data["kelp"] = (data.nir - data.red) / (data.nir + data.red)
data["kelp"] = data["kelp"].where(data["ndvi"].data > thresholds["min_ndvi"], numpy.nan)
data["kelp"] = data["kelp"].where(data["ndwi"].data < thresholds["max_ndwi"], numpy.nan)
data["kelp"] = data["kelp"].where(data["ndwi2"].data < thresholds["max_ndwi2"], numpy.nan)
#data["kelp"] = data["kelp"].where(data["ndvi"].data < thresholds["max_ndvi"], numpy.nan)
#data["kelp"] = data["kelp"].where(data["ndvri"].data > thresholds["min_ndvri"], numpy.nan)
data["kelp"] = data["kelp"].rio.clip(land.to_crs(data["kelp"].rio.crs).geometry.values, invert=True)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["cloud high probability"], numpy.nan)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["thin cirrus"], numpy.nan)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["defective"], numpy.nan)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["cast shadow"], numpy.nan)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["cloud shadow"], numpy.nan)
data["kelp"] = data["kelp"].where(data["SCL"] != scl_dict["cloud medium probability"], numpy.nan)
update_raster_defaults(data)
print(f"Area for thresholds {thresholds} is {[abs(int(data['kelp'].isel(time=index).load().notnull().sum() * data['kelp'].isel(time=index).load().x.resolution * data['kelp'].isel(time=index).load().y.resolution)) for index in range(len(data['kelp'].time))]} m2")

In [None]:
data["kelp"].to_netcdf(data_path / "rasters" / name / f'kelp_{date_YYMM}.nc', format="NETCDF4", engine="netcdf4")
#kelp_display = rioxarray.rioxarray.open_rasterio(data_path / "rasters" / name / f'kelp_{date_YYMM}.nc')
#print(f"Area for thresholds {thresholds} is {[abs(int(kelp_display.isel(time=index).load().compute().notnull().sum() * kelp_display.isel(time=index).load().x.resolution * kelp_display.isel(time=index).load().y.resolution)) for index in range(len(kelp_display.time))]} m2")

In [None]:
kelp_plot = plot_layer(data["kelp"])
rgb_plot * kelp_plot

# Table of areas

In [None]:
kelp_display = rioxarray.rioxarray.open_rasterio(data_path / "rasters" / name / f'kelp_{date_YYMM}.nc')
kelp_info = {"date": [], "area": []}
for index in range(len(kelp_display.time)):
    kelp = kelp_display.isel(time=index).load()
    kelp_info["area"].append(abs(int(kelp.notnull().sum() * kelp.x.resolution * kelp.y.resolution)))
    kelp_info["date"].append(pandas.to_datetime(data.time.data[index]).strftime(date_format))
kelp_info = pandas.DataFrame.from_dict(kelp_info, orient='columns')
kelp_info