In [1]:
# Import required packages
import matplotlib.pyplot as plt
import pandas as pd
from datacube import Datacube
from odc.ui import DcViewer
from pprint import pprint
from odc.geo import resxy_

# Set some configurations for displaying tables nicely
pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_rows', None)

ModuleNotFoundError: No module named 'odc.loader'

In [None]:
# Connect to datacube
dc = Datacube(app="Products_and_measurements")

## Product Discovery

In [None]:
# List Products
dc.list_products()

In [None]:
# List measurements
dc.list_measurements()

## Dataset Searching & Querying

### Finding Dataset

In [None]:
datasets = dc.find_datasets(product="sentinel_2_l2a", limit=1)
datasets

We can also search for datasets within a specific spatial extent or time period. To do this, we supply a spatiotemporal query (i.e. a range of x- and y-coordinates defining the spatial area to load, and a range of times).

`dc.find_datasets()` will then return a subset of datasets that match this query:

In [None]:
datasets = dc.find_datasets(
    product="sentinel_2_l2a",
    x=(114, 116),
    y=(-7, -9),
    time=("2020-01-01", "2020-01-02")
)
datasets

### Inspecting Dataset

In [None]:
datasets[0].uris

In [None]:
datasets[0].measurements

In [None]:
datasets[0].crs

In [None]:
datasets[0].transform

In [None]:
# attributes and methods that are available
ds0 = datasets[0]
dir(ds0.metadata)

In [None]:
ds0.metadata.cloud_cover

In [None]:
getattr(ds0.metadata, 'cloud_cover')

In [None]:
ds0.metadata.id

In [None]:
ds0.metadata.lat

In [None]:
ds0.metadata.lat.begin

In [None]:
# pprint(vars(ds0))

## Load Data
Once you know the products or datasets that you are interested in, you can load data using `dc.load()`.

In [None]:
datasets_2 = dc.find_datasets(
    product="sentinel_2_l2a",
    x=(114, 115),
    y=(-7, -8),
    time=("2020-01-01", "2020-01-02")
)
datasets_2

In [None]:
from pyproj import CRS
crs = CRS("EPSG:9468")
print("Projected:", crs.is_projected, "Geographic:", crs.is_geographic)
print(crs)

In [None]:
pprint(datasets_2[0].measurements)

In [None]:
print("Available measurements:")
for meas in datasets_2[0].measurements.keys():
    print(meas)

In [None]:
ds2 = dc.load(
    datasets=datasets_2,
    measurements= ["red", "blue", "green"],
    output_crs="EPSG:4326", #because EPSG:9468 doesn't work here
    resolution=resxy_(-0.01, 0.01)
)

In [None]:
ds2

We can see that `dc.load` has returned an `xarray.Dataset` containing data from our two input datasets. 

> This `xarray.Dataset` includes:  
> **Dimensions**  
> This header identifies the number of timesteps returned (time: 2) as well as the number of resulting pixels in the `x` and `y` directions.
> 
> **Coordinates**  
> - time identifies the time attributed to each returned timestep.
> - x and y provide coordinates for each pixel within the returned data.  
> - spatial_ref provides information about the spatial grid used to load the data
> 
>**Data variables**  
> These are the measurements available for the loaded product.
> For every timestep (time) returned by the query, the measured value at each pixel (y, x) is returned as an array for each measurement.
> Each data variable is itself an `xarray.DataArray` object.
> 
> **Attributes**  
> Other important metadata or attributes for the loaded data

We can also inspect our loaded data by plotting it:

In [None]:
# Plot the data (using the correct variable name)
ds2.isel(time=1)[["red", "green", "blue"]].to_array().plot.imshow(robust=True)