In [1]:
import pystac
import xarray as xr
import geopandas as gpd
import pandas as pd
from datetime import date, datetime, timezone


stac_endpoint_url = 'https://s3.waw3-1.cloudferro.com/emodnet/bcubed/stac/catalog.json'


Here we query the STAC endpoint for the different parameters of interest and the time period of interest

We get a list of all assets with '.zarr' on the end.  Thus ensure we have zarr links to query and avoid downloading entire datasets

In [1]:
# Define the parameters and their corresponding file extensions
parameter_extensions = {
    'chlorophyll': '.zarr',
    'iron': '.zarr',
    'nitrate': '.zarr',
    'salinity': '.zarr',
    'photosyntheticallyavailableradiation': '.zarr',
    'totalphytoplankton': '.zarr'
}

zarr_assets= []
# Define the start and end datetime objects for the time period
start_datetime = datetime(2020, 1, 1, tzinfo=timezone.utc)
end_datetime = datetime(2090, 12, 31, tzinfo=timezone.utc)

# Create an empty list to store items with '.zarr' assets for each parameter
parameter_items = {}

# Load the catalog
catalog = pystac.Catalog.from_file(stac_endpoint_url)

catalog = pystac.Catalog.from_file('https://s3.waw3-1.cloudferro.com/emodnet/bcubed/stac/catalog.json')

# Iterate over collections in the catalog
for collection in catalog.get_children():
    try:
        # Check if the collection ID contains any of the parameters
        for parameter in parameter_extensions.keys():
            if parameter in collection.id.lower():
                # Access the temporal extent
                temporal_extent = collection.extent.temporal.intervals
                if temporal_extent:
                    start_time, end_time = temporal_extent[0][0], temporal_extent[0][1]
                    if start_datetime <= start_time <= end_datetime or start_datetime <= end_time <= end_datetime:
                        # Check for items with '.zarr' assets
                        for item in collection.get_all_items():
                            for asset_key, asset in item.assets.items():
                                if asset.href.endswith(parameter_extensions[parameter]):
                                    if parameter not in parameter_items:
                                        parameter_items[parameter] = []
                                    parameter_items[parameter].append({
                                        'Item ID': item.id,
                                        'Collection ID': collection.id,
                                        'Start Time': start_time,
                                        'End Time': end_time,
                                        'asset': asset.href
                                    })
                                    zarr_assets.append(asset.href)
    except Exception as e:
        print(f'Problem with collection {collection.id}: {str(e)}')


# Print items with '.zarr' assets for each parameter
for parameter, items in parameter_items.items():
    print(f"Parameter: {parameter}")
    for item in items:
        
        print(f"Item ID: {item['Item ID']},  Start Time: {item['Start Time']}, End Time: {item['End Time']}, \n {asset.href}") 


print(zarr_assets)

Parameter: chlorophyll
Item ID: water_body_chlorophyll_a_masked_using_relative_error_threshold_0.5_2023-03-31_2023-03-31_9.4_53.0_30.9_65.9,  Start Time: 2023-03-19 00:00:00+00:00, End Time: 2023-03-31 00:00:00+00:00, 
 https://s3.waw3-1.cloudferro.com/emodnet/bcubed/data/biooracle/phytoplankton_concentration/phyc_ssp119_2020_2100_depthsurf.zarr
Item ID: water_body_chlorophyll_a_masked_using_relative_error_threshold_0.5_2023-03-19_2023-03-19_26.5_40.0_42.0_48.0,  Start Time: 2023-03-19 00:00:00+00:00, End Time: 2023-03-31 00:00:00+00:00, 
 https://s3.waw3-1.cloudferro.com/emodnet/bcubed/data/biooracle/phytoplankton_concentration/phyc_ssp119_2020_2100_depthsurf.zarr
Item ID: deepest_values_of_water_body_chlorophyll_a_2023-05-16_2023-05-16_-42.0_25.0_-0.1_47.9,  Start Time: 2023-05-16 00:00:00+00:00, End Time: 2023-05-16 00:00:00+00:00, 
 https://s3.waw3-1.cloudferro.com/emodnet/bcubed/data/biooracle/phytoplankton_concentration/phyc_ssp119_2020_2100_depthsurf.zarr
Item ID: Baseline_avera

Here we examine the zarr files within the geographic bounds of interest
For the parameter of interest

In [None]:
TODO
for zarrlink in zarr_assets:


    for var in list(data_vars)