Using the Dask distributed scheduler, what is a good chunk size to compute mean LST over all of Washington state in 2020?

In [15]:
# imports
import pystac_client
import planetary_computer as pc
import xarray as xr
import numpy as np
import dask_gateway
import odc.stac

In [2]:
# Set up the cluster
cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
cluster.scale(4)
print(cluster.dashboard_link)

https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.61b78d0fdcb54e3a8eb9cf8d458591fe/status


In [13]:
# Pull a lot of MODIS images.
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=pc.sign_inplace,
)

bbox = [-124.5, 45.9, -116.8, 49.1]

# Limit to summer, this is when fire/mortality happens
start = "2010-01-01"
end   = "2020-12-31"

# Fetch the collection of interest
modis_search = catalog.search(
    collections=["modis-11A1-061"],
    bbox=bbox,
    datetime=[start,end],
    query={"platform": {"eq": "aqua"}}
)

modis_items = modis_search.get_all_items()
print("Found {} images".format(len(modis_items)))

Found 8164 images


In [14]:
# Load everything into xarray
modis_data = odc.stac.load(
    modis_items,
    bands=["LST_Day_1km", "QC_Day"],
    bbox=bbox,
    chunks={"time": 365},
    resampling=dict(
        LST_Day_1km="bilinear",
        QC_Day="nearest"
    )
)

modis_data

Unnamed: 0,Array,Chunk
Bytes,3.51 GiB,915.75 kiB
Shape,"(4018, 384, 1221)","(1, 384, 1221)"
Count,1 Graph Layer,4018 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 3.51 GiB 915.75 kiB Shape (4018, 384, 1221) (1, 384, 1221) Count 1 Graph Layer 4018 Chunks Type uint16 numpy.ndarray",1221  384  4018,

Unnamed: 0,Array,Chunk
Bytes,3.51 GiB,915.75 kiB
Shape,"(4018, 384, 1221)","(1, 384, 1221)"
Count,1 Graph Layer,4018 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.75 GiB,457.88 kiB
Shape,"(4018, 384, 1221)","(1, 384, 1221)"
Count,1 Graph Layer,4018 Chunks
Type,uint8,numpy.ndarray
"Array Chunk Bytes 1.75 GiB 457.88 kiB Shape (4018, 384, 1221) (1, 384, 1221) Count 1 Graph Layer 4018 Chunks Type uint8 numpy.ndarray",1221  384  4018,

Unnamed: 0,Array,Chunk
Bytes,1.75 GiB,457.88 kiB
Shape,"(4018, 384, 1221)","(1, 384, 1221)"
Count,1 Graph Layer,4018 Chunks
Type,uint8,numpy.ndarray


In [None]:
mean_lst = modis_data.where(modis_data["QC_Day"] == 0).reduce(np.nanmean, "time").compute()