In [None]:
%pip install beacon-api==1.0.9rc2

In [None]:
from beacon_api import *
client = Client("https://beacon-cdi.maris.nl", jwt_token="")

In [None]:
tables = client.list_tables()
tables

In [None]:
tables['default'].get_table_schema()

In [None]:
# METADATA
query_builder = tables['default'].query()
query_builder = (
    query_builder
    .add_select_column("__entry_key")
    .add_select(SelectLiteral("BEACON_SEADATANET", alias="SOURCE_BDI"))
    .add_select(SelectFunction("concat", [SelectColumn("SDN_EDMO_CODE"), SelectColumn("SDN_LOCAL_CDI_ID")], alias="SOURCE_BDI_DATASET_ID"))
    .add_select(SelectColumn("SDN_LOCAL_CDI_ID"))
    .add_select(SelectColumn("SDN_EDMO_CODE"))
    .add_select_column("csr_platform_code")
    .add_select_column("csr_platform_code", "COMMON_CSR")
    .add_select(SelectColumn("instrument___gear_type"))
    .add_select(SelectColumn(".featureType"))
    .add_select(SelectColumn(".featureType", alias="COMMON_FEATURE_TYPE"))
    .add_select(SelectColumn("SDN_EDMO_CODE", alias="COMMON_EDMO_CODE"))
    .add_select(SelectFunction("concat", [SelectColumn("SDN_EDMO_CODE"), SelectColumn("SDN_LOCAL_CDI_ID")], "COMMON_ODV_TAG"))
    .add_select(SelectColumn(".platform_type"))
    .add_select(SelectFunction("map_seadatanet_platform_l06", [SelectColumn(".platform_type")], alias="COMMON_PLATFORM_L06"))
    .add_select(SelectFunction("nvl2", [
        SelectColumn("csr_platform_code"),
        SelectFunction("concat", [
            SelectLiteral("SDN:C17::"),
            SelectColumn("csr_platform_code")
            ]),
        SelectLiteral("")
        ],
        alias="COMMON_PLATFORM_C17"))
    
)

In [None]:
query_builder = (
    query_builder
    .add_select(SelectColumn("TIME"))
    .add_select(SelectColumn("TIME_SEADATANET_QC"))
    .add_select(SelectColumn("TIME", alias="COMMON_TIME"))
    .add_select(SelectColumn("TIME_SEADATANET_QC", alias="COMMON_TIME_QC"))

    .add_select(SelectColumn("LATITUDE"))
    .add_select(SelectColumn("LATITUDE.standard_name"))
    .add_select(SelectColumn("LATITUDE.units"))
    .add_select(SelectColumn("LATITUDE", alias="COMMON_LATITUDE"))
    .add_select(SelectLiteral("degrees_north", "COMMON_LATITUDE_UNITS"))
    .add_select(SelectLiteral("Latitude", "COMMON_LATITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALATZZ01", "COMMON_LATITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGN", "COMMON_LATITUDE_P06"))
    
    .add_select(SelectColumn("LONGITUDE"))
    .add_select(SelectColumn("LONGITUDE.standard_name"))
    .add_select(SelectColumn("LONGITUDE.units"))
    .add_select(SelectColumn("LONGITUDE", alias="COMMON_LONGITUDE"))
    .add_select(SelectLiteral("degrees_east", "COMMON_LONGITUDE_UNITS"))
    .add_select(SelectLiteral("Longitude", "COMMON_LONGITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALONZZ01", "COMMON_LONGITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGE", "COMMON_LONGITUDE_P06"))
    
    .add_select(SelectColumn("POSITION_QC"))
    .add_select(SelectColumn("POSITION_QC", alias="COMMON_POSITION_QC"))
    
    # DEPTH
    .add_select_column("PRES")
    .add_select_column("PRESPR01")
    .add_select_column("PRESPS02")
    .add_select_column("DEPTH")
    .add_select_column("ADEPZZ01")
    .add_select_column("DEPH")
    .add_select_column("DEPHPR01")
    
    .add_select_coalesced(["PRES", "PRESPR01", "PRESPS02", "DEPTH", "ADEPZZ01", "DEPH", "DEPHPR01"], alias="COMMON_ORIGIN_DEPTH")
    .add_select_coalesced(["PRES_SEADATANET_QC", 
                            "PRESPR01_SEADATANET_QC", 
                            "PRESPS02_SEADATANET_QC", 
                            "DEPTH_SEADATANET_QC", 
                            "ADEPZZ01_SEADATANET_QC", 
                            "DEPH_SEADATANET_QC", 
                            "DEPHPR01_SEADATANET_QC"], 
                            alias="COMMON_ORIGIN_DEPTH_QC") 
    # COALESCED DEPTH
    .add_select(
        Functions.coalesce(
            [
                Functions.map_pressure_to_depth(
                    Functions.coalesce(["PRES",
                                        "PRESPR01",
                                        "PRESPS02"], alias = 'PRESSURE'),
                    "LATITUDE",
                    alias="depth_from_pres",
                ),  "DEPTH",
                    "ADEPZZ01",
                    "DEPH",
                    "DEPHPR01"
            ],
            alias="COMMON_DEPTH",
        )
    )
    .add_select_coalesced(["PRES_SEADATANET_QC", 
                            "PRESPR01_SEADATANET_QC", 
                            "PRESPS02_SEADATANET_QC", 
                            "DEPTH_SEADATANET_QC", 
                            "ADEPZZ01_SEADATANET_QC", 
                            "DEPH_SEADATANET_QC", 
                            "DEPHPR01_SEADATANET_QC"], 
                            alias="COMMON_DEPTH_QC") 
    .add_select(SelectLiteral("m", "COMMON_DEPTH_UNITS"))
    .add_select(SelectLiteral("depth", "COMMON_DEPTH_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ADEPZZ01", "COMMON_DEPTH_P01"))
    .add_select(SelectLiteral("SDN:P06::ULAA", "COMMON_DEPTH_P06"))
)

In [None]:
# TEMPERATURE
query_builder = (
    query_builder
    .add_select_coalesced([
                "PSSTTS01",
                "TEMPBT01",
                "TEMPCC01",
                "TEMPCC02",
                "TEMPCU01",
                "TEMPCU02",
                "TEMPDTNX",
                "TEMPET01",
                "TEMPHG01",
                "TEMPHG02",
                "TEMPHGNX",
                "TEMPHU01",
                "TEMPMT01",
                "TEMPP901",
                "TEMPPR01",
                "TEMPRTNX",
                "TEMPS601",
                "TEMPS901",
                "TEMPST01",
                "TEMPST02",
                "TEMPSU01",
                "TEMPSZ01",
                "TEMPTC01",
                "TEMPTC02",
                "TMCLCCI1",
                "TMCLCCL1"], 
                alias="TEMPERATURE")
    .add_select_coalesced(["PSSTTS01_SEADATANET_QC",
                "TEMPCC01_SEADATANET_QC",
                "TEMPCC02_SEADATANET_QC",
                "TEMPCU01_SEADATANET_QC",
                "TEMPCU02_SEADATANET_QC",
                "TEMPDTNX_SEADATANET_QC",
                "TEMPET01_SEADATANET_QC",
                "TEMPHG01_SEADATANET_QC",
                "TEMPHG02_SEADATANET_QC",
                "TEMPHGNX_SEADATANET_QC",
                "TEMPHU01_SEADATANET_QC",
                "TEMPMT01_SEADATANET_QC",
                "TEMPP901_SEADATANET_QC",
                "TEMPPR01_SEADATANET_QC",
                "TEMPRTNX_SEADATANET_QC",
                "TEMPS601_SEADATANET_QC",
                "TEMPS901_SEADATANET_QC",
                "TEMPST01_SEADATANET_QC",
                "TEMPST02_SEADATANET_QC",
                "TEMPSU01_SEADATANET_QC",
                "TEMPSZ01_SEADATANET_QC",
                "TEMPTC01_SEADATANET_QC",
                "TEMPTC02_SEADATANET_QC",
                "TMCLCCI1_SEADATANET_QC",
                "TMCLCCL1_SEADATANET_QC",],
                alias="TEMPERATURE_QC")
    .add_select(SelectLiteral("degree_C", "COMMON_TEMPERATURE_UNITS"))
    .add_select(SelectLiteral("sea_water_temperature", "COMMON_TEMPERATURE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::TEMPPR01", "COMMON_TEMPERATURE_P01"))
    .add_select(SelectLiteral("SDN:P06::UPAA", "COMMON_TEMPERATURE_P06"))
    .add_select(SelectFunction("map_seadatanet_instrument_l05", [SelectColumn("instrument___gear_type")], alias="COMMON_TEMPERATURE_L05"))
)

In [None]:
# SALINITY
query_builder = (
    query_builder
    .add_select_coalesced([
                "ASLTZZ01",
                "ODSDM021",
                "PSALBSTX",
                "PSALCC01",
                "PSALCC02",
                "PSALCU01",
                "PSALCU02",
                "PSALPR01",
                "PSALPR02",
                "PSALST01",
                "PSALSU01",
                "PSALSZ01",
                "PSALTC01",
                "PSALTC02",
                "PSALZZXX",
                "PSCLCCI1",
                "PSCLCCL1",
                "PSLTZZ01",
                "SSALAGT1",
                "SSALBSTX",
                "SSALPR01",
                "SSALSG01",
                "SSALST01",], 
                alias="COMMON_SALINITY")

    .add_select_coalesced(["ASLTZZ01_SEADATANET_QC",
                "ODSDM021_SEADATANET_QC",
                "PSALBSTX_SEADATANET_QC",
                "PSALCC01_SEADATANET_QC",
                "PSALCC02_SEADATANET_QC",
                "PSALCU01_SEADATANET_QC",
                "PSALCU02_SEADATANET_QC",
                "PSALPR01_SEADATANET_QC",
                "PSALPR02_SEADATANET_QC",
                "PSALST01_SEADATANET_QC",
                "PSALSU01_SEADATANET_QC",
                "PSALSZ01_SEADATANET_QC",
                "PSALTC01_SEADATANET_QC",
                "PSALTC02_SEADATANET_QC",
                "PSALZZXX_SEADATANET_QC",
                "PSCLCCI1_SEADATANET_QC",
                "PSCLCCL1_SEADATANET_QC",
                "PSLTZZ01_SEADATANET_QC",
                "SSALAGT1_SEADATANET_QC",
                "SSALBSTX_SEADATANET_QC",
                "SSALPR01_SEADATANET_QC",
                "SSALSG01_SEADATANET_QC",
                "SSALST01_SEADATANET_QC",], 
                alias="COMMON_SALINITY_QC") 
    .add_select(SelectLiteral("sea_water_salinity", "COMMON_SALINITY_STANDARD_NAME"))
    .add_select(SelectLiteral("1e-3", "COMMON_SALINITY_UNITS"))
    .add_select(SelectLiteral("SDN:P01::PSLTZZ01", "COMMON_SALINITY_P01"))
    .add_select(SelectLiteral("SDN:P06::UUUU", "COMMON_SALINITY_P06"))
    .add_select(SelectFunction("map_seadatanet_instrument_l05", [SelectColumn("instrument___gear_type")], alias="COMMON_SALINITY_L05"))
)

In [None]:
import os 
import json

# Make output dir
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)
# for each year between 1950 and 2025
for year in range(1950, 2025):
    print(f"Processing year {year}")
    query_builder.filters = []
    query_builder = query_builder.add_range_filter("COMMON_TIME", f"{year}-01-01T00:00:00Z", f"{year}-12-31T23:59:59Z")
    query_builder = (
        query_builder.add_filter(OrFilter([
            IsNotNullFilter("COMMON_TEMPERATURE"),
            IsNotNullFilter("COMMON_SALINITY"),
        ]))
    )
    
    output_file = os.path.join(output_dir, f"seadatanet_{year}.parquet")
    try:
        query_builder.to_parquet(output_file)
        print(f"Saved to {output_file}")
    except Exception as e:
        print(f"Failed to process year {year}: {e}")