# SENTINEL-2  EOPF Zarr TO H3 GRID CONVERSION

This notebook converts Sentinel-2 reflectance data from UTM projection 
to H3 hexagonal grid system for spatial analysis

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from tqdm import tqdm
import h3
from pyproj import Transformer
import xdggs

1. Open data and preparation

In [2]:
# Open EOPF zarr product
zarr_path = "/home/ubuntu/project/eopf-safe-2-healpix/src/notebook/S2B_MSIL2A_20240526T001109_N0510_R073_T56KKB_20240526T013407.zarr"
dt = xr.open_datatree(zarr_path, engine="zarr", mask_and_scale=False, chunks={})
ds = dt.measurements.reflectance.r60m
ds

Unnamed: 0,Array,Chunk
Bytes,7.15 kiB,1.19 kiB
Shape,"(1830,)","(305,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 7.15 kiB 1.19 kiB Shape (1830,) (305,) Dask graph 6 chunks in 2 graph layers Data type float32 numpy.ndarray",1830  1,

Unnamed: 0,Array,Chunk
Bytes,7.15 kiB,1.19 kiB
Shape,"(1830,)","(305,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.15 kiB,1.19 kiB
Shape,"(1830,)","(305,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 7.15 kiB 1.19 kiB Shape (1830,) (305,) Dask graph 6 chunks in 2 graph layers Data type float32 numpy.ndarray",1830  1,

Unnamed: 0,Array,Chunk
Bytes,7.15 kiB,1.19 kiB
Shape,"(1830,)","(305,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 25.55 MiB 726.76 kiB Shape (1830, 1830) (305, 305) Dask graph 36 chunks in 2 graph layers Data type float64 numpy.ndarray",1830  1830,

Unnamed: 0,Array,Chunk
Bytes,25.55 MiB,726.76 kiB
Shape,"(1830, 1830)","(305, 305)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


2. Coordinate transformation preparation

In [3]:
# Extract Band 04 (Red) at 60m resolution
band = "b02"
# Generate mesh of x/y coordinates
x = ds[band]['x'].values
y = ds[band]['y'].values
xx, yy = np.meshgrid(x, y)
print(f"Coordinate grid shape: {xx.shape}")
print(f"X range: {x.min():.0f} to {x.max():.0f}")
print(f"Y range: {y.min():.0f} to {y.max():.0f}")

Coordinate grid shape: (1830, 1830)
X range: 200010 to 309750
Y range: 7590250 to 7699990


In [4]:
# Setup coordinate transformation from UTM to WGS84 lat/lon
utm_crs = dt.other_metadata['horizontal_CRS_code']
transformer = Transformer.from_crs(utm_crs, "EPSG:4326", always_xy=True)
lon, lat = transformer.transform(xx, yy)
# Convert UTM coordinates to latitude/longitude
lon, lat = transformer.transform(xx, yy)

3. lon lat prepration for h3 conversion

In [5]:
# Flatten 2D arrays to 1D for processing
vals = ds[band].values.ravel()  # Reflectance values
lat = lat.ravel()          # Latitude values
lon = lon.ravel()          # Longitude values

print(f"Total pixels to process: {len(vals):,}")

# Remove NaN values to speed up processing
valid_mask = ~np.isnan(vals)
vals = vals[valid_mask]
lat = lat[valid_mask]
lon = lon[valid_mask]

print(f"Valid pixels (non-NaN): {len(vals):,}")

Total pixels to process: 3,348,900
Valid pixels (non-NaN): 3,348,900


H3 Grid conversion

In [6]:
resolution = 10
# Map each pixel to its H3 cell
h3_cells = [h3.latlng_to_cell(lat[i], lon[i], resolution)
            for i in tqdm(range(len(lat)), desc="Mapping to H3 cells")]
h3_ids = [h3.str_to_int(c) for c in h3_cells]  # convert to uint64

Mapping to H3 cells: 100%|██████████| 3348900/3348900 [00:04<00:00, 720119.36it/s]


Spatial Aggregation

In [7]:
# Aggregate to H3 cell average
df = pd.DataFrame({'cell_id': h3_ids, 'value': vals})
agg = df.groupby('cell_id').mean().reset_index()

In [8]:
h3_dataset = xr.Dataset(
    data_vars={
        "b02": xr.DataArray(
            data=agg["value"].values.astype(np.float64),
            dims=["cells"],
            coords={"cell_ids": ("cells", agg["cell_id"].values)},
        )
    },
    attrs={
        "Conventions": "COARDS",
        "title": "Sentinel-2 Band 02 Reflectance (H3 Aggregated)",
        "description": (
            "TOA reflectance from MSI acquisition at spectral band 02"
            f"aggregated into H3 hexagonal cells at resolution {resolution}."
        ),

    }
)

h3_dataset.coords["cell_ids"].attrs["grid_name"] = "h3"
h3_dataset.coords["cell_ids"].attrs["level"] = resolution

In [9]:
# save new zarr product
h3_save_path = zarr_path.replace(".zarr","_h3.zarr")
h3_dataset.to_zarr("sentinel_b02_h3_dataset.zarr", mode="w")

<xarray.backends.zarr.ZarrStore at 0x7173159e4dc0>

In [10]:
# Open New zarr and pipe with xdggs
original_ds = xr.open_zarr(h3_save_path)
ds = original_ds.pipe(xdggs.decode)
ds

Unnamed: 0,Array,Chunk
Bytes,5.53 MiB,353.83 kiB
Shape,"(724626,)","(45290,)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 5.53 MiB 353.83 kiB Shape (724626,) (45290,) Dask graph 16 chunks in 2 graph layers Data type int64 numpy.ndarray",724626  1,

Unnamed: 0,Array,Chunk
Bytes,5.53 MiB,353.83 kiB
Shape,"(724626,)","(45290,)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.53 MiB,353.83 kiB
Shape,"(724626,)","(45290,)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 5.53 MiB 353.83 kiB Shape (724626,) (45290,) Dask graph 16 chunks in 2 graph layers Data type float64 numpy.ndarray",724626  1,

Unnamed: 0,Array,Chunk
Bytes,5.53 MiB,353.83 kiB
Shape,"(724626,)","(45290,)"
Dask graph,16 chunks in 2 graph layers,16 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [11]:
cell_centers = ds.dggs.cell_centers()
cell_boundaries = ds.dggs.cell_boundaries()

In [12]:
ds["b02"].compute().dggs.explore(cmap="jet", alpha=0.33)

Map(custom_attribution='', layers=(SolidPolygonLayer(filled=True, get_fill_color=arro3.core.ChunkedArray<Fixed…