Notebook References:

- xarray access: https://github.com/EOPF-Sample-Service/eopf-sample-notebooks/blob/main/notebooks/Sentinel-2/Sentinel-2_L1C_MSI_Zarr_product_exploration.ipynb
- xcube-eopf: https://eopf-sample-service.github.io/eopf-sample-notebooks/introduction-xcube-eopf-plugin
- xcube-stac: https://github.com/xcube-dev/xcube-stac/blob/main/examples/notebooks/cdse_sentinel_2.ipynb

`conda install xcube-stac xcube-eopf`


In [1]:
import cartopy.crs as ccrs
import numpy as np
#import matplotlib.pyplot as plt
import xarray as xr
#import xarray_eopf
import requests

import dask
from xcube.core.store import new_data_store, get_data_store_params_schema
from xcube_eopf.utils import reproject_bbox
#import xcube
#import xcube_eopf
#import xcube_stac

# for benchmarking
from dataclasses import dataclass
from typing import List
from itertools import product
import pandas as pd
import time

## 1. Read directly from link

In [2]:
# hamburg notebook returns:

# eopf:
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316?.language=de
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558?.language=de
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207?.language=de

# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316
# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558
# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207

# cdse equivalents: 
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316?.language=de
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558?.language=de
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207?.language=de

# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316
# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558
# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207

In [3]:
# todo: is it possible to access a file directly via xcube?
#path_eopf_zarr = "https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.zarr"
path_eopf_zarr = "https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:202505-s02msil2a/03/products/cpm_v256/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.zarr"
path_eopf_safe = "" # make available somehow on same bucket as zarr
path_cdse_safe = "s3://eodata/Sentinel-2/MSI/L2A/2025/05/03/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.SAFE"

## 2. Read via querry
The influence of the underlying STAC API (EOPF vs CDSE) and the implementation of the used software (xcube-stac vs xcube-eopf) affects the performance.

In [4]:
def create_aoi(bbox, reduction):
    """
    Generate a reduced bounding box or centroid based on a reduction factor.
    Helper function to easily create portions of the original bbox around the centroid.

    Parameters:
    - bbox: [min_lon, min_lat, max_lon, max_lat]
    - reduction: float between 0 and 1
        - 0 returns the centroid as (lon, lat)
        - 0 < reduction < 1 returns a scaled bounding box centered at the centroid

    Returns:
    - reduced bounding box list
    """
    if not (0 <= reduction <= 1):
        raise ValueError("Reduction must be between 0 and 1.")

    min_lon, min_lat, max_lon, max_lat = bbox

    # Compute centroid
    centroid_lon = (min_lon + max_lon) / 2
    centroid_lat = (min_lat + max_lat) / 2

    #if reduction == 0:
     #   return (centroid_lon, centroid_lat)

    # Compute reduced bounding box dimensions
    lat_span = (max_lat - min_lat) * reduction
    lon_span = (max_lon - min_lon) * reduction

    return [
        centroid_lon - lon_span / 2,
        centroid_lat - lat_span / 2,
        centroid_lon + lon_span / 2,
        centroid_lat + lat_span / 2,
    ]

In [14]:
# this is used for defining inputs 
@dataclass
class BenchmarkConfig:
    data_id: str
    bbox: List[float]
    time_range: List[str]
    spatial_res: int
    crs: str
    variables: List[str] 


In [18]:
# define data id
opt_data_id = "sentinel-2-l2a"

In [7]:
# pull the bbox from the catalog/object here
url = "https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316"
response = requests.get(url)
item = response.json()
bbox = item["bbox"]
print(bbox)

[8.99969379936479, 53.15557577629945, 10.371024273615161, 54.148104103961266]


In [8]:
# pull native crs
crs_native = item['properties']['proj:code'] # "EPSG:32632"
print(crs_native)

EPSG:32632


In [23]:
# define bboxes
# only in lat/lon, reprojection of bbox to chosen crs happens later in code
opt_bbox = [
    create_aoi(bbox, 0), # pixel
    create_aoi(bbox, 256 / 10980), # ml patch approx 256*256
    create_aoi(bbox, 0.125), # eight
    create_aoi(bbox, 0.25), # quarter
    bbox, # full  
]

In [10]:
# define crs
# mandatory in xcube
# if it differs from native crs processing is enforced (reprojection, resampling)
opt_crs = [crs_native, 
           "EPSG:4326"]

In [11]:
# define times
opt_time = [
    ["2025-05-01", "2025-06-01"], # day
    ["2025-05-01", "2025-05-07"], # month
    ["2024-01-01", "2025-01-01"] # year
]

In [12]:
# define spatial resolution
# everything deviating from native resolution enforces processing (resampling)
opt_spatial_res = [10, 20, 100]

In [13]:
# define band combinations
# choosing bands with different resolutions enforces processing (resampling)
opt_bands = [
    ["b02"],
    ["b02", "b04"],
    []
]


In [19]:
# CREATE DATA CLASS via loop and porduct()

In [24]:
# Create custom dataclass object
custom_cfg = BenchmarkConfig(
    data_id=opt_data_id,
    bbox=opt_bbox[1],
    time_range=opt_time[0],
    spatial_res=opt_spatial_res[0],
    crs=opt_crs[0],
    variables=opt_bands[0]
)

print(custom_cfg)

BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02'])


## STAC EOPF

In [26]:
store_zarr = new_data_store("eopf-zarr")

In [21]:
# store_zarr.list_data_ids()
# store_zarr.get_open_data_params_schema(data_id="sentinel-2-l2a")

In [None]:
# TO DO: Integrate in code
# bbox_utm = reproject_bbox(custom_cfg.bbox, "EPSG:4326", custom_cfg.crs)

In [31]:
%%time

bbox_repr = reproject_bbox(custom_cfg.bbox, "EPSG:4326", custom_cfg.crs)
print(bbox_repr)

ds_zarr = store_zarr.open_data(
    data_id=custom_cfg.data_id,
    bbox=bbox_repr,
    time_range=custom_cfg.time_range,
    spatial_res=custom_cfg.spatial_res,
    crs=custom_cfg.crs,
    variables=custom_cfg.variables,
)
ds_zarr.load()

(544229.9589484755, 5943707.259587298, 546367.9651581453, 5946302.084300316)
CPU times: user 5.05 s, sys: 252 ms, total: 5.31 s
Wall time: 19.9 s


Unnamed: 0,Array,Chunk
Bytes,5.57 MiB,438.40 kiB
Shape,"(13, 261, 215)","(1, 261, 215)"
Dask graph,13 chunks in 14 graph layers,13 chunks in 14 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 5.57 MiB 438.40 kiB Shape (13, 261, 215) (1, 261, 215) Dask graph 13 chunks in 14 graph layers Data type float64 numpy.ndarray",215  261  13,

Unnamed: 0,Array,Chunk
Bytes,5.57 MiB,438.40 kiB
Shape,"(13, 261, 215)","(1, 261, 215)"
Dask graph,13 chunks in 14 graph layers,13 chunks in 14 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## STAC CDSE

In [22]:
credentials = {
    "key": "FTE4ZT820RDZTHOU6I8C",
    "secret": "EdSaK2k1DjJm1rTlbucDaaSsmSSawWFz9da9Wemz",
}

In [23]:
# get_data_store_params_schema("stac-cdse")

In [24]:
store_safe = new_data_store("stac-cdse", stack_mode=True, **credentials)

DataStoreError: Unknown data store "stac-cdse" (may be due to missing xcube plugin)

In [25]:
# bands have to be renamed
bands = ['B02']

In [26]:
%%time
ds_safe = store_safe.open_data(
    data_id="sentinel-2-l2a",
    bbox=bbox_utm,
    time_range=time,
    spatial_res=spat_res,
    crs=crs_utm,
    #variables=bands, # --> different names in xcube-cdse and excube-eopf
    asset_names=bands, 
)
ds_safe.load()

NameError: name 'store_safe' is not defined