# Cloud Free Mosaic and Geometric Median

## Import libraries

In [None]:
import os
import dask.config

from dask.distributed import Client, LocalCluster
from datacube import Datacube
from datacube.utils.masking import create_mask_value
from datacube_compute import geomedian_with_mads
from odc.geo.geom import point
from odc.stac import configure_s3_access
from urllib.parse import urlparse

## Configure the environment

In [None]:
# Configure AWS
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

if "AWS_NO_SIGN_REQUEST" in os.environ:
    del os.environ["AWS_NO_SIGN_REQUEST"]

configure_s3_access(requester_pays=True)

dc = Datacube()

## Pick a study area

In [None]:
# These coords are in the order Y then X, or Latitude then Longitude
coords = -6.11, 105.42  # Krakatoa
aoi_point = point(coords[1], coords[0], crs="EPSG:4326")
bbox = aoi_point.buffer(0.08).boundingbox

landsat_stretch = dict(vmin=7500, vmax=12000)

datetime = "2024"

# Preview the area
bbox.explore(zoom=8)

## Load data

This uses the Datacube library to handle loading of the actual data. The `dask_chunks` argument instructs the tool to use Dask
to lazy-load the data.

In [None]:
datasets = dc.find_datasets(
    product=["ls9_c2l2_sr"],
    latitude=(bbox.bottom, bbox.top),
    longitude=(bbox.left, bbox.right),
    time=datetime,
)

print(f"Found {len(datasets)} Landsat datasets")

data = dc.load(
    datasets=datasets,
    measurements=["red", "green", "blue", "nir08", "pixel_qa"],
    output_crs="EPSG:32750",
    resolution=30,
    time="2024",
    longitude=(bbox.left, bbox.right),
    latitude=(bbox.bottom, bbox.top),
    dask_chunks={"time": 1, "x": 1000, "y": 1000},
    group_by="solar_day",
    driver="rio",
)

data

In [None]:
# Create a small subset so we can test cloud masking
subset = data.isel(time=slice(0, 6))

# Load the subset into memory
subset = subset.compute()

## Visualise data

This step uses `matplotlib` to view data as a static image. It takes a longer time to
run than previous steps, because it's actually loading the data to prepare the images.

The `to_array()` function is a trick used to be able to visualise the data as a
red, green, blue "true colour" image.

In [None]:
subset[["red", "green", "blue"]].to_array().plot.imshow(
    col="time", col_wrap=2, size=6, **landsat_stretch
)

### Cloud mask



In [None]:
categories_to_mask_landsat = {
    "cloud": "high_confidence",
    "cloud_shadow": "high_confidence",
}

mask_value, _ = create_mask_value(
    datasets[0].product.measurements["qa_pixel"].flags_definition,
    **categories_to_mask_landsat,
)

mask = (subset.pixel_qa & mask_value) != 0

# Plot the result, where white is clouds or cloud shadow and black is clear
mask.plot.imshow(col="time", col_wrap=2, size=6, cmap="gray_r", vmin=0, vmax=1)


In [None]:
# Mask the subset and preview the result
masked_subset = subset.where(~mask)

masked_subset[["red", "green", "blue"]].to_array().plot.imshow(
    col="time", col_wrap=2, size=6, **landsat_stretch
)

In [None]:
# Set up a dask local cluster for parallel processing of the GeoMAD
cluster = LocalCluster(
    n_workers=2,
    threads_per_worker=2,
    memory_limit='10GB'
)

dashboard_url = cluster.dashboard_link
port = urlparse(dashboard_url).port

jupyterhub_user = os.environ.get('JUPYTERHUB_USER')
dask.config.set(**{
    "distributed.dashboard.link": f"/user/{jupyterhub_user}/proxy/{port}/status"
})

client = Client(cluster)
client

In [None]:
# Apply the same process to the whole time series
mask = (data.pixel_qa & mask_value) != 0
masked_data = data.drop_vars("pixel_qa").where(~mask)

# Calculate the geomedian and MADs
# Note, scale and offset values are specific to Landsat 9 C2 L2 SR data
# See: https://www.usgs.gov/faqs/how-do-i-use-a-scale-factor-landsat-level-2-science-products
geomad = geomedian_with_mads(
    masked_data, scale=0.0000275, offset=-0.2, work_chunks=[1000, 1000]
).compute()

geomad

In [None]:
# Visualise the results
geomad.odc.explore(**landsat_stretch)

In [None]:
# Visualise the variance (MADs)
geomad.odc.explore(bands=["smad", "emad", "bcmad"], robust=True)

In [None]:
# # Clean up (close the dask client)
# client.close()