In [2]:
import pystac
import xarray as xr
import pandas as pd
from datetime import datetime, date
import pystac_client
from pystac_client import Client
import numpy as np
import copernicusmarine
from copernicusmarine.core_functions import custom_open_zarr

import warnings
warnings.filterwarnings("ignore")


  from .autonotebook import tqdm as notebook_tqdm


### Open the root catalog

Use pystac-client to connect to a STAC API endpoint (https://catalog.dive.edito.eu/).  We can also connect to a static STAC json catalog, that conforms to STAC and view modelled data from Bio-Oracle.  Also available in Zarr format.

In [3]:
# STAC API root URL
URL = 'https://api.dive.edito.eu/data/collections'
# URL = 'https://s3.waw3-1.cloudferro.com/emodnet/bio_oracle/stac/catalog.json'
# custom headers
headers = []

cat = Client.open(URL, headers=headers)
cat

In [4]:
colls = cat.get_collections()

for coll in colls:
    print(coll.id)

climate_forecast-age_of_sea_ice
climate_forecast-air_density
climate_forecast-air_pressure
climate_forecast-air_pressure_at_mean_sea_level
climate_forecast-air_temperature
climate_forecast-atmosphere_upward_relative_vorticity
climate_forecast-barotropic_eastward_sea_water_velocity
climate_forecast-barotropic_northward_sea_water_velocity
climate_forecast-beaufort_wind_force
climate_forecast-charnock_coefficient_for_surface_roughness_length_for_momentum_in_air
climate_forecast-concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate
climate_forecast-dew_point_temperature
climate_forecast-divergence_of_wind
climate_forecast-downwelling_photosynthetic_photon_flux_in_sea_water
climate_forecast-eastward_sea_ice_velocity
climate_forecast-eastward_sea_water_velocity
climate_forecast-eastward_sea_water_velocity_assuming_no_tide
climate_forecast-eastward_sea_water_velocity_due_to_ekman_drift
climate_forecast-eastward_wind
c

### Browse the Catalog
Navigate through the root catalog to find sub-catalogs and collections of interest


In [5]:
collections = cat.get_collections()
for collection in collections:
    if 'chlorophyll' in collection.id:
        print(collection.id)
        for item in collection.get_all_items():
            print(item.id)
            print(item.assets)
            break

climate_forecast-mass_concentration_of_chlorophyll_a_in_sea_water
58a30a8c-f2dd-585f-87b4-61aa1eefc70c
{'arco-time-series': <Asset href=https://s3.waw3-1.cloudferro.com/mdl-arco-geo-001/arco/BALTICSEA_ANALYSISFORECAST_BGC_003_007/cmems_mod_bal_bgc_anfc_7-10days_P1D-m_202411/geoChunked.zarr>, 'wmts': <Asset href=https://wmts.marine.copernicus.eu/teroWmts/BALTICSEA_ANALYSISFORECAST_BGC_003_007/cmems_mod_bal_bgc_anfc_7-10days_P1D-m_202411?layer=BALTICSEA_ANALYSISFORECAST_BGC_003_007/cmems_mod_bal_bgc_anfc_7-10days_P1D-m_202411/chl>, 'arco-time-series-datalab-data-explorer': <Asset href=https://datalab.dive.edito.eu/data-explorer?source=https://s3.waw3-1.cloudferro.com/mdl-arco-geo-001/arco/BALTICSEA_ANALYSISFORECAST_BGC_003_007/cmems_mod_bal_bgc_anfc_7-10days_P1D-m_202411/geoChunked.zarr>}
climate_forecast-mass_concentration_of_chlorophyll_in_sea_water
59afc1f2-24a0-5087-8609-a2374dc6ae08
{'arco-time-series': <Asset href=https://s3.waw3-1.cloudferro.com/mdl-arco-geo-060/arco/INSITU_NWS_PH

### Search for a Collection
Identify a collection based on your variables (e.g., temperature, salinity). You can filter by collection metadata like keywords or spatial/temporal bounds.

In [6]:
all_items = []
collection_selection = ['oxygen', 'habitat', 'eunis', 'elevation', 'temperature']
for collection in cat.get_collections():
    for selector in collection_selection:
        if selector in collection.id:
            collection_items = collection.get_all_items()
            try:
                for item in collection_items:
                    # Append item information to the list
                    all_items.append({  'Collection ID': collection.id, 
                                        'Item ID': item.id,
                                        'Item bounds': item.geometry, 
                                        'item_starttime': item.properties['start_datetime'],
                                        'item_endtime': item.properties['end_datetime'], 
                                        'Assets': item.assets})
            except Exception as e:
                print(e)
                print(f'Error with {collection.id}')
                continue
oxygen_habitat_temperature_items_df = pd.DataFrame(all_items)
oxygen_habitat_temperature_items_df.head()

Processing climate_forecast-air_temperature
Processing climate_forecast-dew_point_temperature
Processing climate_forecast-fractional_saturation_of_oxygen_in_sea_water
Processing climate_forecast-mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
Processing climate_forecast-moles_of_oxygen_per_unit_mass_in_sea_water
Processing climate_forecast-sea_ice_surface_temperature
Processing climate_forecast-sea_surface_foundation_temperature
Processing climate_forecast-sea_surface_subskin_temperature
Processing climate_forecast-sea_surface_temperature
Processing climate_forecast-sea_water_conservative_temperature
Processing climate_forecast-sea_water_potential_temperature
Processing climate_forecast-sea_water_potential_temperature_at_sea_floor
Processing climate_forecast-sea_water_potential_temperature_expressed_as_heat_content
Processing climate_forecast-sea_water_temperature
Processing climate_forecast-surface_temperature
Processing climate_forecast-volume_fraction_of_oxygen_in_sea_

Unnamed: 0,Collection ID,Item ID,Item bounds,item_starttime,item_endtime,Assets
0,climate_forecast-air_temperature,8ebf3d6d-1a6f-58f8-beb3-e6ff6eae3c90,"{'type': 'Polygon', 'coordinates': [[[-52.9000...",2024-12-21T00:00:00.000000Z,2025-01-20T07:10:00.000000Z,{'arco-time-series': <Asset href=https://s3.wa...
1,climate_forecast-air_temperature,33857744-4f8c-5eb2-acc7-3cf15e6026b7,"{'type': 'Polygon', 'coordinates': [[[-52.9000...",2024-12-21T00:00:00.000000Z,2025-01-20T07:10:00.000000Z,{'arco-geo-series': <Asset href=https://s3.waw...
2,climate_forecast-air_temperature,139f5e61-86d9-562a-b24d-d940bb0b25c7,"{'type': 'Polygon', 'coordinates': [[[-14.4, -...",2024-12-21T00:00:00.000000Z,2025-01-20T06:12:00.000000Z,{'arco-time-series': <Asset href=https://s3.wa...
3,climate_forecast-air_temperature,962e103b-9fec-5b95-ab3a-9f8633c03e38,"{'type': 'Polygon', 'coordinates': [[[-14.4, -...",2024-12-21T00:00:00.000000Z,2025-01-20T06:12:00.000000Z,{'arco-geo-series': <Asset href=https://s3.waw...
4,climate_forecast-air_temperature,99b5c8e6-696f-5512-9f59-2f6e53caf374,"{'type': 'Polygon', 'coordinates': [[[-171, -3...",2024-12-21T00:00:00.000000Z,2025-01-20T07:13:00.000000Z,{'arco-time-series': <Asset href=https://s3.wa...


### Select a Collection and Fetch Items
Choose a collection and list the available items (datasets), filtered by date range and geographic region.

In [9]:
def filter_items_by_time(items_df, start_date):
    """
    Filter items based on the time range.
    """
    items_df['item_starttime'] = pd.to_datetime(items_df['item_starttime'])
    # items_df['item_endtime'] = pd.to_datetime(items_df['item_endtime'])
    items_df = items_df.sort_values(by='item_starttime')
    return items_df[(items_df['item_starttime'] > start_date)]

# Define time range
start_date = '2000-01-01'
# end_date = '2030-12-31'

time_df = filter_items_by_time(oxygen_habitat_temperature_items_df, start_date)
time_df.head()

Unnamed: 0,Collection ID,Item ID,Item bounds,item_starttime,item_endtime,Assets
749,climate_forecast-sea_water_temperature,20c3b69e-e60d-540f-9319-33e5f0c1b5b9,"{'type': 'Polygon', 'coordinates': [[[-179.999...",2001-01-04 16:47:52+00:00,2023-12-31T23:59:00.000000Z,{'arco-time-series': <Asset href=https://s3.wa...
750,climate_forecast-sea_water_temperature,a45cc962-398e-52fa-8d57-e36df1882cb3,"{'type': 'Polygon', 'coordinates': [[[-179.999...",2001-01-04 16:47:52+00:00,2023-12-31T23:59:00.000000Z,{'arco-geo-series': <Asset href=https://s3.waw...
705,climate_forecast-sea_water_potential_temperatu...,11b679c0-7053-5ec8-be04-48f68d594214,"{'type': 'Polygon', 'coordinates': [[[-180, 90...",2005-01-01 00:00:00+00:00,2019-01-01T00:00:00.000000Z,{'omi-arco': <Asset href=https://s3.waw3-1.clo...
702,climate_forecast-sea_water_potential_temperatu...,9d3c0920-9bb7-507e-b673-c26d33b6f81e,"{'type': 'Polygon', 'coordinates': [[[-180, 90...",2005-01-01 00:00:00+00:00,2019-01-01T00:00:00.000000Z,{'omi-arco': <Asset href=https://s3.waw3-1.clo...
704,climate_forecast-sea_water_potential_temperatu...,28d17777-1534-5021-ba2d-a98893c5b5b2,"{'type': 'Polygon', 'coordinates': [[[-180, 90...",2005-01-01 00:00:00+00:00,2019-01-01T00:00:00.000000Z,{'omi-arco': <Asset href=https://s3.waw3-1.clo...


### Look for Cloud-Optimized Assets
From each item, find and extract cloud-optimized assets (like Zarr or Parquet) that can be processed further.

In [10]:
all_items_assets = []
           # Now filter the assets
all_items_assets = []
for _, row in time_df.iterrows():
    collection_id = row['Collection ID']
    item_id = row['Item ID']
    bounds = row['Item bounds']
    data_starttime = row['item_starttime']
    data_endtime = row['item_endtime']
    assets = row['Assets']
    
    for asset_key, asset in assets.items():
        if asset.href.endswith('.zarr') or asset.href.endswith('.zarr/') or asset.href.endswith('.parquet'):
            # Append asset information to the list
            all_items_assets.append({'Collection ID': collection_id, 'Item ID': item_id, 'Data Start': data_starttime, 'Data End': data_endtime, 'Bounds': bounds, 'Asset Key': asset_key, 'Asset Href': asset.href})

# Create a DataFrame for assets
assets_df = pd.DataFrame(all_items_assets)
assets_df.head()

assets_df.to_csv('temperature_oxygen_habitat_arco_assets.csv')