In [None]:
%pip install beacon-api==1.0.9rc2

In [None]:
from beacon_api import *
client = Client("https://beacon-cora-pr.maris.nl", jwt_token="")

In [None]:
tables = client.list_tables()
tables

In [None]:
tables['default'].get_table_schema()

In [None]:
# METADATA
query_builder = tables['default'].query()
query_builder = (
    query_builder
    .add_select_column("__entry_key")
    .add_select(SelectLiteral("BEACON_CORA_PR", alias="SOURCE_BDI"))
    .add_select(SelectColumn("DC_REFERENCE", alias="SOURCE_BDI_DATASET_ID"))
    .add_select(SelectColumn("PLATFORM_NUMBER"))
    .add_select(SelectColumn("DC_REFERENCE"))
    .add_select(SelectColumn("WMO_INST_TYPE"))
    .add_select(SelectColumn("INSTITUTION_EDMO_CODE"))
    .add_select(SelectColumn(".bigram"))
    .add_select(SelectColumn(".featureType"))
    .add_select(SelectColumn(".featureType", alias="COMMON_FEATURE_TYPE"))
    .add_select(SelectColumn("INSTITUTION_EDMO_CODE", alias="COMMON_EDMO_CODE"))
    .add_select(SelectFunction("concat", [SelectColumn("__entry_key"), SelectColumn("DC_REFERENCE")], "COMMON_ODV_TAG"))
    .add_select(SelectFunction("map_cmems_bigram_l06", [SelectColumn(".bigram"),SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])], alias="COMMON_PLATFORM_L06"))
)

In [None]:
query_builder = (
    query_builder
    .add_select(SelectColumn("TIME"))
    .add_select(SelectColumn("TIME_QC"))
    .add_select(SelectColumn("TIME", alias="COMMON_TIME"))
    .add_select(SelectColumn("TIME_QC", alias="COMMON_TIME_QC"))
    
    .add_select(SelectColumn("LATITUDE"))
    .add_select(SelectColumn("LATITUDE.standard_name"))
    .add_select(SelectColumn("LATITUDE.units"))
    .add_select(SelectColumn("LATITUDE", alias="COMMON_LATITUDE"))
    .add_select(SelectLiteral("degrees_north", "COMMON_LATITUDE_UNITS"))
    .add_select(SelectLiteral("Latitude", "COMMON_LATITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALATZZ01", "COMMON_LATITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGN", "COMMON_LATITUDE_P06"))
    
    .add_select(SelectColumn("LONGITUDE"))
    .add_select(SelectColumn("LONGITUDE.standard_name"))
    .add_select(SelectColumn("LONGITUDE.units"))
    .add_select(SelectColumn("LONGITUDE", alias="COMMON_LONGITUDE"))
    .add_select(SelectLiteral("degrees_east", "COMMON_LONGITUDE_UNITS"))
    .add_select(SelectLiteral("Longitude", "COMMON_LONGITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALONZZ01", "COMMON_LONGITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGE", "COMMON_LONGITUDE_P06"))
    
    .add_select(SelectColumn("POSITION_QC"))
    .add_select(SelectColumn("POSITION_QC", alias="COMMON_POSITION_QC"))
    
    # DEPTH
    
    .add_select_column("PRES")
    .add_select_column("PRES_ADJUSTED")
    .add_select_column("PRES.standard_name")
    .add_select_column("PRES_ADJUSTED.standard_name")
    .add_select_column("PRES.units")
    .add_select_column("PRES_ADJUSTED.units")
    .add_select_column("PRES_QC")
    .add_select_column("PRES_ADJUSTED_QC")
    .add_select_column("DEPH")
    .add_select_column("DEPH_ADJUSTED")
    .add_select_column("DEPH_QC")
    .add_select_column("DEPH_ADJUSTED_QC")
    .add_select_column("DEPH.standard_name")
    .add_select_column("DEPH_ADJUSTED.standard_name")
    .add_select_column("DEPH.units")
    .add_select_column("DEPH_ADJUSTED.units")
    
    # COALESCED DEPTH
    .add_select_coalesced(["PRES_ADJUSTED", "PRES", "DEPH"], "COMMON_ORIGIN_DEPTH")
    .add_select_coalesced(["PRES_ADJUSTED_QC","PRES_QC","DEPH_QC"], "COMMON_ORIGIN_DEPTH_QC")
    .add_select_coalesced(["PRES_ADJUSTED.units", "PRES.units", "DEPH.units"], "COMMON_ORIGIN_DEPTH_UNITS")
    .add_select_coalesced(["PRES_ADJUSTED.standard_name", "PRES.standard_name", "DEPH.standard_name"], "COMMON_ORIGIN_DEPTH_STANDARD_NAME")
    .add_select(SelectFunction("coalesce_label", [SelectColumn("PRES"), SelectLiteral("SDN:P01::PRESPR01"), SelectColumn("DEPH"), SelectLiteral("SDN:P01::ADEPZZ01")], "COMMON_ORIGIN_DEPTH_P01"))
    .add_select(SelectFunction("coalesce_label", [SelectColumn("PRES"), SelectLiteral("SDN:P06::UPDB"), SelectColumn("DEPH"), SelectLiteral("SDN:P06::ULAA")], "COMMON_ORIGIN_DEPTH_P06"))
    
    .add_select(Functions.coalesce([Functions.map_pressure_to_depth(Functions.coalesce(["PRES_ADJUSTED", "PRES"],"pres"), "LATITUDE", "pres_harmonized"), "DEPH"], "COMMON_DEPTH"))
    .add_select_coalesced(["PRES_ADJUSTED_QC","PRES_QC","DEPH_QC"], "COMMON_DEPTH_QC")
    .add_select(SelectLiteral("m", "COMMON_DEPTH_UNITS"))
    .add_select(SelectLiteral("depth", "COMMON_DEPTH_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ADEPZZ01", "COMMON_DEPTH_P01"))
    .add_select(SelectLiteral("SDN:P06::ULAA", "COMMON_DEPTH_P06"))
    
)

In [None]:
# map temperature
query_builder = (
    query_builder
    .add_select_column("TEMP")
    .add_select_column("TEMP_QC")
    .add_select_column("TEMP.units")
    .add_select_column("TEMP.standard_name")
    
    .add_select_column("TEMP_ADJUSTED")
    .add_select_column("TEMP_ADJUSTED_QC")
    .add_select_column("TEMP_ADJUSTED.units")
    .add_select_column("TEMP_ADJUSTED.standard_name")
    
    .add_select_coalesced(["TEMP", "TEMP_ADJUSTED"], "COMMON_ORIGIN_TEMPERATURE")
    .add_select_coalesced(["TEMP_QC", "TEMP_ADJUSTED_QC"], "COMMON_ORIGIN_TEMPERATURE_QC")
    .add_select_coalesced(["TEMP.units", "TEMP_ADJUSTED.units"], "COMMON_ORIGIN_TEMPERATURE_UNITS")
    .add_select_coalesced(["TEMP.standard_name", "TEMP_ADJUSTED.standard_name"], "COMMON_ORIGIN_TEMPERATURE_STANDARD_NAME")
    .add_select(SelectLiteral("SDN:P01::TEMPPR01", "COMMON_ORIGIN_TEMPERATURE_P01"))
    .add_select(SelectLiteral("SDN:P06::UPAA", "COMMON_ORIGIN_TEMPERATURE_P06"))
    
    .add_select_coalesced(["TEMP", "TEMP_ADJUSTED"], "COMMON_TEMPERATURE")
    .add_select_coalesced(["TEMP_QC", "TEMP_ADJUSTED_QC"], "COMMON_TEMPERATURE_QC")
    .add_select(SelectLiteral("degree_C", "COMMON_TEMPERATURE_UNITS"))
    .add_select(SelectLiteral("sea_water_temperature", "COMMON_TEMPERATURE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::TEMPPR01", "COMMON_TEMPERATURE_P01"))
    .add_select(SelectLiteral("SDN:P06::UPAA", "COMMON_TEMPERATURE_P06"))
    
    # Set instrument type
    .add_select(
        SelectFunction("coalesce", [
            SelectFunction("map_wmo_instrument_type_l05", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])]),
            SelectFunction("map_l22_l05", [SelectFunction("map_wmo_instrument_type_l22", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])])]),
            SelectFunction("map_cmems_bigram_l05", [SelectColumn(".bigram")])
            ], "COMMON_TEMPERATURE_L05"))
    .add_select(SelectFunction("map_wmo_instrument_type_l22", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])], "COMMON_TEMPERATURE_L22"))
)

In [None]:
# map salinity
query_builder = (
    query_builder
    .add_select_column("PSAL")
    .add_select_column("PSAL_QC")
    .add_select_column("PSAL.units")
    .add_select_column("PSAL.standard_name")
    
    .add_select_column("PSAL_ADJUSTED")
    .add_select_column("PSAL_ADJUSTED_QC")
    .add_select_column("PSAL_ADJUSTED.units")
    .add_select_column("PSAL_ADJUSTED.standard_name")
    
    .add_select_coalesced(["PSAL", "PSAL_ADJUSTED"], "COMMON_ORIGIN_SALINITY")
    .add_select_coalesced(["PSAL_QC", "PSAL_ADJUSTED_QC"], "COMMON_ORIGIN_SALINITY_QC")
    .add_select_coalesced(["PSAL.units", "PSAL_ADJUSTED.units"], "COMMON_ORIGIN_SALINITY_UNITS")
    .add_select_coalesced(["PSAL.standard_name", "PSAL_ADJUSTED.standard_name"], "COMMON_ORIGIN_SALINITY_STANDARD_NAME")
    .add_select(SelectLiteral("SDN:P01::PSLTZZ01", "COMMON_ORIGIN_SALINITY_P01"))
    .add_select(SelectLiteral("SDN:P06::UUUU", "COMMON_ORIGIN_SALINITY_P06"))
    
    .add_select_coalesced(["PSAL", "PSAL_ADJUSTED"], "COMMON_SALINITY")
    .add_select_coalesced(["PSAL_QC", "PSAL_ADJUSTED_QC"], "COMMON_SALINITY_QC")
    .add_select(SelectLiteral("sea_water_salinity", "COMMON_SALINITY_STANDARD_NAME"))
    .add_select(SelectLiteral("1e-3", "COMMON_SALINITY_UNITS"))
    .add_select(SelectLiteral("SDN:P01::PSLTZZ01", "COMMON_SALINITY_P01"))
    .add_select(SelectLiteral("SDN:P06::UUUU", "COMMON_SALINITY_P06"))
    
    # Set instrument type
    .add_select(
        SelectFunction("coalesce", [
            SelectFunction("map_wmo_instrument_type_l05", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])]),
            SelectFunction("map_l22_l05", [SelectFunction("map_wmo_instrument_type_l22", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])])]),
            SelectFunction("map_cmems_bigram_l05", [SelectColumn(".bigram")])
            ], "COMMON_SALINITY_L05"))
    .add_select(SelectFunction("map_wmo_instrument_type_l22", [SelectFunction("btrim",[SelectColumn("WMO_INST_TYPE")])], "COMMON_SALINITY_L22"))
)

In [None]:
import os 
import json

# Make output dir
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)
# for each year between 1950 and 2025
for year in range(1950, 2025):
    print(f"Processing year {year}")
    query_builder.filters = []
    query_builder = query_builder.add_range_filter("TIME", f"{year}-01-01T00:00:00Z", f"{year}-12-31T23:59:59Z")
    query_builder = (
        query_builder
        .add_filter(OrFilter([
            IsNotNullFilter("TEMP"),
            IsNotNullFilter("PSAL"),
        ]))
    )
    output_file = os.path.join(output_dir, f"cora_pr_{year}.parquet")
    try:
        query_builder.to_parquet(output_file)
        print(f"Saved to {output_file}")
    except Exception as e:
        print(f"Failed to process year {year}: {e}")