Notebook References:

- xarray access: https://github.com/EOPF-Sample-Service/eopf-sample-notebooks/blob/main/notebooks/Sentinel-2/Sentinel-2_L1C_MSI_Zarr_product_exploration.ipynb
- xcube-eopf: https://eopf-sample-service.github.io/eopf-sample-notebooks/introduction-xcube-eopf-plugin
- xcube-stac: https://github.com/xcube-dev/xcube-stac/blob/main/examples/notebooks/cdse_sentinel_2.ipynb

`conda install xcube-stac xcube-eopf`


In [5]:
import cartopy.crs as ccrs
import numpy as np
#import matplotlib.pyplot as plt
import xarray as xr
#import xarray_eopf
import requests

import dask
from xcube.core.store import new_data_store, get_data_store_params_schema
from xcube_eopf.utils import reproject_bbox
#import xcube
#import xcube_eopf
#import xcube_stac

#for direct loading
import rioxarray
import fsspec
import s3fs

# for benchmarking
from dataclasses import dataclass
from typing import List
from itertools import product
import pandas as pd
import time

## 1. Read directly from link

In [2]:
# hamburg notebook returns:

# eopf:
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316?.language=de
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558?.language=de
# https://stac.browser.user.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207?.language=de

# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316
# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558
# https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207

# cdse equivalents: 
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316?.language=de
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558?.language=de
# https://browser.stac.dataspace.copernicus.eu/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207?.language=de

# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316
# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2C_MSIL2A_20250501T104041_N0511_R008_T32UNE_20250501T161558
# https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2B_MSIL2A_20250506T103629_N0511_R008_T32UNE_20250506T115207

In [3]:
# todo: is it possible to access a file directly via xcube?
#path_eopf_zarr = "https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.zarr"
path_eodc_zarr = "https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:202505-s02msil2a/03/products/cpm_v256/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.zarr"
path_eodc_safe = "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:notebook-data/SAFE/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.SAFE"
path_cdse_safe = "s3://eodata/Sentinel-2/MSI/L2A/2025/05/03/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.SAFE"

### EOPF Zarr on EODC

In [4]:
dt = xr.open_datatree(path_eodc_zarr, engine="zarr", chunks={})

  dt = xr.open_datatree(path_eodc_zarr, engine="zarr", chunks={})
  dt = xr.open_datatree(path_eodc_zarr, engine="zarr", chunks={})


In [5]:
%%time
band_eodc_zarr = dt["measurements/reflectance/r10m"]["b04"].load()

CPU times: user 2.2 s, sys: 787 ms, total: 2.99 s
Wall time: 1.38 s


  return self.func(*new_argspec)
  return nan_reducer(block, axis)
  return self.func(*new_argspec)
  return nan_reducer(block, axis)


In [6]:
band_eodc_zarr

### EOPF SAFE on EODC

In [7]:
%%time
# Full URL to the B04 10m band file, from f"{path_eopf_zarr}/manifest.safe"
b04_url = (
    f"{path_eodc_safe}/GRANULE/L2A_T32UNE_A051514_20250503T103937/IMG_DATA/R10m/"
    "T32UNE_20250503T103701_B04_10m.jp2"
)

# Open and read the band
fs = fsspec.filesystem("http")
with fs.open(b04_url) as f:
    band_eodc_safe = rioxarray.open_rasterio(f, masked=True)

band_eodc_safe.name = "B04"
band_eodc_safe

CPU times: user 122 ms, sys: 118 ms, total: 240 ms
Wall time: 335 ms


### SAFE on CDSE S3

In [8]:
# cdse credentials
credentials = {
    "key": "FTE4ZT820RDZTHOU6I8C",
    "secret": "EdSaK2k1DjJm1rTlbucDaaSsmSSawWFz9da9Wemz",
}

In [9]:
%%time

# Your CDSE credentials
fs = s3fs.S3FileSystem(
    key=credentials["key"],
    secret=credentials["secret"],
    client_kwargs={
        "region_name": "eu-central-1",
        "endpoint_url": "https://s3.dataspace.copernicus.eu"
    }
)

# Correct path from manifest.safe
band_path = (
    "eodata/Sentinel-2/MSI/L2A/2025/05/03/"
    "S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316.SAFE/"
    "GRANULE/L2A_T32UNE_A051514_20250503T103937/IMG_DATA/R10m/"
    "T32UNE_20250503T103701_B04_10m.jp2"
)

# Open the file from S3
with fs.open(band_path, mode="rb") as f:
    band = rioxarray.open_rasterio(f, masked=True)

band.name = "B04"
band

CPU times: user 680 ms, sys: 218 ms, total: 898 ms
Wall time: 3.88 s


## 2. Read via querry
The influence of the underlying STAC API (EOPF vs CDSE) and the implementation of the used software (xcube-stac vs xcube-eopf) affects the performance.

### Functions

In [6]:
def create_aoi(bbox, reduction):
    """
    Generate a reduced bounding box or centroid based on a reduction factor.
    Helper function to easily create portions of the original bbox around the centroid.

    Parameters:
    - bbox: [min_lon, min_lat, max_lon, max_lat]
    - reduction: float between 0 and 1
        - 0 returns the centroid as (lon, lat)
        - 0 < reduction < 1 returns a scaled bounding box centered at the centroid

    Returns:
    - reduced bounding box list
    """
    if not (0 <= reduction <= 1):
        raise ValueError("Reduction must be between 0 and 1.")

    min_lon, min_lat, max_lon, max_lat = bbox

    # Compute centroid
    centroid_lon = (min_lon + max_lon) / 2
    centroid_lat = (min_lat + max_lat) / 2

    #if reduction == 0:
     #   return (centroid_lon, centroid_lat)

    # Compute reduced bounding box dimensions
    lat_span = (max_lat - min_lat) * reduction
    lon_span = (max_lon - min_lon) * reduction

    return [
        centroid_lon - lon_span / 2,
        centroid_lat - lat_span / 2,
        centroid_lon + lon_span / 2,
        centroid_lat + lat_span / 2,
    ]

In [7]:
# this is used for defining inputs 
@dataclass
class BenchmarkConfig:
    data_id: str
    bbox: List[float]
    time_range: List[str]
    spatial_res: int
    crs: str
    variables: List[str] 


In [8]:
# access function eopf
def access_eopf(cfg: BenchmarkConfig):
    return store_zarr.open_data(
        data_id=cfg.data_id,
        bbox=reproject_bbox(cfg.bbox, "EPSG:4326", cfg.crs), # has to be done for xcube # TODO: throws error with dfg.crs = EPSG:4326
        time_range=cfg.time_range,
        spatial_res=cfg.spatial_res,
        crs=cfg.crs,
        variables=cfg.variables,
    ).load()


In [9]:
# access function safe
def access_safe(cfg: BenchmarkConfig):
    return store_safe.open_data(
        data_id=cfg.data_id,
        bbox=reproject_bbox(cfg.bbox, "EPSG:4326", cfg.crs), # has to be done for xcube # TODO: throws error with dfg.crs = EPSG:4326
        time_range=cfg.time_range,
        spatial_res=cfg.spatial_res,
        crs=cfg.crs,
        asset_names=[v.upper() for v in cfg.variables],
    ).load()


In [10]:
# benchmark function
# loops through the given configs
def benchmark_data_access(configs, access_fn):
    results = []
    
    for cfg in configs:
        print(f"Running: {cfg}")
        start = time.perf_counter()
        ds = access_fn(cfg)
        end = time.perf_counter()

        n_pixels_xy = ds.sizes['x'] * ds.sizes['y'] # get pixel count
        results.append({
            "data_id": cfg.data_id,
            "bbox": cfg.bbox,
            "time_range": cfg.time_range,
            "spat_res": cfg.spatial_res,
            "crs": cfg.crs,
            "variables": cfg.variables,
            "n_pixels_xy": n_pixels_xy,
            "duration_sec": round(end - start, 4)
        })
        
    return pd.DataFrame(results)


### Parameter Definitions

In [11]:
# pull the bbox from the catalog/object here
url = "https://stac.core.eopf.eodc.eu/collections/sentinel-2-l2a/items/S2A_MSIL2A_20250503T103701_N0511_R008_T32UNE_20250503T173316"
response = requests.get(url)
item = response.json()
bbox = item["bbox"]
print(bbox)

[8.99969379936479, 53.15557577629945, 10.371024273615161, 54.148104103961266]


In [12]:
# pull native crs
crs_native = item['properties']['proj:code'] # "EPSG:32632"
print(crs_native)

EPSG:32632


In [13]:
# define data id
opt_data_id = [
    "sentinel-2-l2a"
]

In [14]:
# define bboxes
# only in lat/lon, reprojection of bbox to chosen crs happens later in code
opt_bbox = [
    create_aoi(bbox, 0), # pixel
    create_aoi(bbox, 256 / 10980), # ml patch approx 256*256
    create_aoi(bbox, 0.125), # eight
    create_aoi(bbox, 0.25), # quarter
    bbox, # full  
]

In [17]:
opt_bbox[1]

[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631]

In [27]:
# define crs
# mandatory in xcube
# if it differs from native crs processing is enforced (reprojection, resampling)
opt_crs = [
    crs_native, 
    #"EPSG:4326", # # TODO: reproject_bbox in access_ throws error with dfg.crs = EPSG:4326
    #"EPSG:3035",
]

In [20]:
# define times
opt_time_range = [
    ["2025-05-01", "2025-06-01"], # day
    ["2025-05-01", "2025-05-07"], # month
    ["2024-01-01", "2025-01-01"] # year
]

In [21]:
# define spatial resolution
# everything deviating from native resolution enforces processing (resampling)
opt_spatial_res = [
    10, 
    #20, 
    100,
]

In [22]:
# define band combinations
# choosing bands with different resolutions enforces processing (resampling)
opt_variables = [
    ["b02"],
    ["b02", "b04"],
    #[]
]


In [28]:
# create all combinations of parameters specified above
configs = [
    BenchmarkConfig(data_id, bbox, time_range, spatial_res, crs, variables)
    for data_id, bbox, time_range, spatial_res, crs, variables in product(
        opt_data_id,
        opt_bbox,
        opt_time_range,
        opt_spatial_res,
        opt_crs,
        opt_variables
    )
]
print(f"Number of configs: {len(configs)}")
print(configs[0])

Number of configs: 36
BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02'])


In [29]:
# Create custom dataclass object
custom_cfg = BenchmarkConfig(
    data_id=opt_data_id[0],
    bbox=opt_bbox[0],
    time_range=opt_time_range[0],
    spatial_res=opt_spatial_res[0],
    crs=opt_crs[0],
    variables=opt_variables[0]
)
print(custom_cfg)

BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02'])


## STAC EOPF

In [30]:
store_zarr = new_data_store("eopf-zarr") #  store_zarr.list_data_ids(); store_zarr.get_open_data_params_schema(data_id="sentinel-2-l2a")

In [31]:
df_benchm_eopf = benchmark_data_access(configs[0:5], access_eopf)

Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02', 'b04'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=100, crs='EPSG:32632', variables=['b02'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=100, crs='EPSG:32632', variables=['b02', 'b04'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.6693726703056

In [32]:
df_benchm_eopf # IS THERE CACHING GOING ON, TWO BANDS QUICKER THAN ONE? HOW TO DISABLE CACHING? COLD START?

Unnamed: 0,data_id,bbox,time_range,spat_res,crs,variables,n_pixels_xy,duration_sec
0,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",10,EPSG:32632,[b02],56115,12.6284
1,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",10,EPSG:32632,"[b02, b04]",56115,10.3112
2,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",100,EPSG:32632,[b02],621,13.5496
3,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",100,EPSG:32632,"[b02, b04]",621,13.8165
4,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-05-07]",10,EPSG:32632,[b02],56115,2.704


In [33]:
%%time

bbox_repr = reproject_bbox(custom_cfg.bbox, "EPSG:4326", custom_cfg.crs)
print(bbox_repr)

ds_zarr = store_zarr.open_data(
    data_id=custom_cfg.data_id,
    bbox=bbox_repr,
    time_range=custom_cfg.time_range,
    spatial_res=custom_cfg.spatial_res,
    crs=custom_cfg.crs,
    variables=custom_cfg.variables,
)
ds_zarr.load()

(544229.9589484755, 5943707.259587298, 546367.9651581453, 5946302.084300316)
CPU times: user 7.75 s, sys: 494 ms, total: 8.24 s
Wall time: 14.1 s


## STAC CDSE

In [34]:
#credentials = {
#    "key": "xxx",
#    "secret": "xxx",
#}

In [37]:
store_safe = new_data_store("stac-cdse", stack_mode=True, **credentials) # get_data_store_params_schema("stac-cdse")

In [49]:
df_benchm_safe = benchmark_data_access(configs[0:5], access_safe)

Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=10, crs='EPSG:32632', variables=['b02', 'b04'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=100, crs='EPSG:32632', variables=['b02'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.669372670305636, 53.64026948239441, 9.701345402674315, 53.66341039786631], time_range=['2025-05-01', '2025-06-01'], spatial_res=100, crs='EPSG:32632', variables=['b02', 'b04'])
Running: BenchmarkConfig(data_id='sentinel-2-l2a', bbox=[9.6693726703056

In [50]:
df_benchm_safe

Unnamed: 0,data_id,bbox,time_range,spat_res,crs,variables,n_pixels_xy,duration_sec
0,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",10,EPSG:32632,[b02],56115,190.4242
1,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",10,EPSG:32632,"[b02, b04]",56115,378.9656
2,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",100,EPSG:32632,[b02],621,117.8965
3,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-06-01]",100,EPSG:32632,"[b02, b04]",621,231.2503
4,sentinel-2-l2a,"[9.669372670305636, 53.64026948239441, 9.70134...","[2025-05-01, 2025-05-07]",10,EPSG:32632,[b02],56115,37.9513


In [51]:
# bands have to be renamed
bands_cdse = ['B02']

In [53]:
%%time
ds_safe = store_safe.open_data(
    data_id=custom_cfg.data_id,
    bbox=bbox_repr,
    time_range=custom_cfg.time_range,
    spatial_res=custom_cfg.spatial_res,
    crs=custom_cfg.crs,
    asset_names=bands_cdse,
)
ds_safe.load()

CPU times: user 5min 35s, sys: 9.56 s, total: 5min 44s
Wall time: 3min 18s
