In [None]:
# Import required packages
import pandas as pd
from datacube import Datacube

# Set some configurations for displaying tables nicely
pd.set_option("display.max_colwidth", 200)
pd.set_option("display.max_rows", None)

In [None]:
# Connect to datacube
dc = Datacube()

## Product Discovery

In [None]:
# List Products
dc.list_products()

In [None]:
# List measurements
product = "s2_l2a"

measurements = dc.list_measurements()
measurements.loc[product]

## Dataset Searching & Querying

### Finding Dataset

In [None]:
datasets = dc.find_datasets(product="s2_l2a", limit=1)
datasets

We can also search for datasets within a specific spatial extent or time period. To do this, we supply a spatiotemporal query (i.e. a range of x- and y-coordinates defining the spatial area to load, and a range of times).

`dc.find_datasets()` will then return a subset of datasets that match this query:

In [None]:
datasets = dc.find_datasets(
    product="s2_l2a", x=(114, 115), y=(-9, -10), time=("2025-06-01", "2025-06-30")
)

print(f"Found {len(datasets)} datasets")

# Select the first dataset
dataset = datasets[0]
print(f"Here's the STAC URL for the first dataset: {dataset.uri}")

### Inspecting Dataset

In [None]:
dataset.measurements

In [None]:
print(f"The CRS of the dataset is: {dataset.crs}")

In [None]:
# Attributes and methods that are available
dir(dataset.metadata)

In [None]:
print(f"Cloud cover for the first dataset is: {dataset.metadata.cloud_cover:.3f}%")

In [None]:
# Let's find the dataset with the least cloud cover
least = 101
least_cloudy_dataset = None
for ds in datasets:
    # print(f"Cloud cover for {ds.id} is: {ds.metadata.cloud_cover:.3f}%")
    if ds.metadata.cloud_cover < least:
        least = ds.metadata.cloud_cover
        least_cloudy_dataset = ds
print(
    f"The dataset with the least cloud cover is: {least_cloudy_dataset.id} with {least:.3f}%"
)

## Load Data
Once you know the products or datasets that you are interested in, you can load data using `dc.load()`.

In [None]:
print("Available measurements:")
for meas in dataset.measurements.keys():
    print(meas)

In [None]:
data = dc.load(
    datasets=[least_cloudy_dataset],
    measurements=["red", "blue", "green"],
    output_crs=least_cloudy_dataset.crs,
    resolution=10,
    # chunks={"x": 512, "y": 512},  # Uncomment to use Dask for lazy loading
)
data

We can see that `dc.load` has returned an `xarray.Dataset` containing data from our two input datasets. 

> This `xarray.Dataset` includes:  
> **Dimensions**  
> This header identifies the number of timesteps returned (time: 2) as well as the number of resulting pixels in the `x` and `y` directions.
> 
> **Coordinates**  
> - time identifies the time attributed to each returned timestep.
> - x and y provide coordinates for each pixel within the returned data.  
> - spatial_ref provides information about the spatial grid used to load the data
> 
>**Data variables**  
> These are the measurements available for the loaded product.
> For every timestep (time) returned by the query, the measured value at each pixel (y, x) is returned as an array for each measurement.
> Each data variable is itself an `xarray.DataArray` object.
> 
> **Attributes**  
> Other important metadata or attributes for the loaded data

We can also inspect our loaded data by plotting it:

In [None]:
# Plot the data
data[["red", "green", "blue"]].to_array().squeeze().plot.imshow(robust=True)

In [None]:
# Or plot an interactive map
data.odc.explore(vmin=0, vmax=1000)