In [None]:
%pip install beacon-api==1.0.9rc2

In [None]:
from beacon_api import *
client = Client("https://beacon-wod.maris.nl", jwt_token="")

In [None]:
tables = client.list_tables()
tables

In [None]:
tables['default'].get_table_schema()

In [None]:
WOD_FILL_VALUE = -10000000000.0

In [None]:
query_builder = tables['default'].query()
query_builder = (
    query_builder
    .add_select_column("__entry_key")
    .add_select_column("time")
    .add_select_column("time", "COMMON_TIME")
    .add_select_column("lon")
    .add_select_column("lon.units")
    .add_select_column("lon", "COMMON_LONGITUDE")
    .add_select(SelectLiteral("degrees_east", "COMMON_LONGITUDE_UNITS"))
    .add_select(SelectLiteral("Longitude", "COMMON_LONGITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALONZZ01", "COMMON_LONGITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGE", "COMMON_LONGITUDE_P06"))
    .add_select_column("lat")
    .add_select_column("lat.units")
    .add_select_column("lat", "COMMON_LATITUDE")
    .add_select(SelectLiteral("degrees_north", "COMMON_LATITUDE_UNITS"))
    .add_select(SelectLiteral("Latitude", "COMMON_LATITUDE_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ALATZZ01", "COMMON_LATITUDE_P01"))
    .add_select(SelectLiteral("SDN:P06::DEGN", "COMMON_LATITUDE_P06"))
    .add_select_column("z")
    .add_select_column("z.units")
    .add_select_column("z_WODflag")
    .add_select_column("z", "COMMON_ORIGIN_DEPTH")
    .add_select_column("z_WODflag", "COMMON_ORIGIN_DEPTH_QC")
    .add_select(SelectLiteral("m", "COMMON_ORIGIN_DEPTH_UNITS"))
    .add_select(SelectLiteral("Depth", "COMMON_ORIGIN_DEPTH_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ADEPZZ01", "COMMON_ORIGIN_DEPTH_P01"))
    .add_select(SelectLiteral("SDN:P06::ULAA", "COMMON_ORIGIN_DEPTH_P06"))
    .add_select_column("z", "COMMON_DEPTH")
    .add_select(SelectFunction("map_wod_quality_flag", [SelectColumn("z_WODflag")], "COMMON_DEPTH_QC"))
    .add_select(SelectLiteral("m", "COMMON_DEPTH_UNITS"))
    .add_select(SelectLiteral("depth", "COMMON_DEPTH_STANDARD_NAME"))
    .add_select(SelectLiteral("SDN:P01::ADEPZZ01", "COMMON_DEPTH_P01"))
    .add_select(SelectLiteral("SDN:P06::ULAA", "COMMON_DEPTH_P06"))
)

In [None]:
query_builder = (
    query_builder
    .add_select(SelectLiteral("", "COMMON_EDMERP_CODE"))
    .add_select(SelectLiteral("", "COMMON_EDMO_CODE"))
    .add_select(SelectFunction("map_c17_l06", [SelectFunction("map_wod_platform_c17", [SelectColumn("Platform")])], "COMMON_PLATFORM_L06"))
    .add_select(SelectFunction("map_wod_platform_c17", [SelectColumn("Platform")], "COMMON_PLATFORM_C17"))
    .add_select(SelectColumn("wod_unique_cast", "COMMON_ODV_TAG"))
    .add_select(SelectLiteral("BEACON_WOD", "SOURCE_BDI"))
    .add_select(SelectColumn("wod_unique_cast", "SOURCE_BDI_DATASET_ID"))
    .add_select(SelectColumn(".featureType"))
    .add_select(SelectColumn(".featureType", alias="COMMON_FEATURE_TYPE"))
    .add_select_column("dataset")
    .add_select_column(".institution")
    .add_select_column("Platform")
    .add_select_column("country")
    .add_select_column("WOD_cruise_identifier")
    .add_select_column("wod_unique_cast")
)

In [None]:
# Temperature
query_builder = (
    query_builder
    .add_select(SelectColumn("Temperature"))
    .add_select(SelectColumn("Temperature_WODflag"))
    .add_select(SelectColumn("Temperature.units"))
    
    .add_select(SelectColumn("Temperature", "COMMON_ORIGIN_TEMPERATURE"))
    .add_select(SelectColumn("Temperature_WODflag", "COMMON_ORIGIN_TEMPERATURE_QC"))
    .add_select(SelectColumn("Temperature.standard_name", "COMMON_ORIGIN_TEMPERATURE_STANDARD_NAME"))
    .add_select(SelectColumn("Temperature.units", "COMMON_ORIGIN_TEMPERATURE_UNITS"))
    .add_select(SelectLiteral("SDN:P01::TEMPPR01", "COMMON_ORIGIN_TEMPERATURE_P01"))
    .add_select(SelectLiteral("SDN:P06::UPAA", "COMMON_ORIGIN_TEMPERATURE_P06"))
    # MAP COMMON Temperature
    .add_select(SelectFunction("nullif", [SelectColumn("Temperature"), SelectLiteral(-10000000000.0)], "COMMON_TEMPERATURE"))
    .add_select(SelectFunction("map_wod_quality_flag", [SelectColumn("Temperature_WODflag")], "COMMON_TEMPERATURE_QC"))
    .add_select(SelectLiteral("sea_water_temperature", "COMMON_TEMPERATURE_STANDARD_NAME"))
    .add_select(SelectLiteral("degree_C", "COMMON_TEMPERATURE_UNITS"))
    .add_select(SelectLiteral("SDN:P01::TEMPPR01", "COMMON_TEMPERATURE_P01"))
    .add_select(SelectLiteral("SDN:P06::UPAA", "COMMON_TEMPERATURE_P06"))
    # MAP COMMON Temperature INSTRUMENTS
    .add_select(
        SelectFunction("coalesce", [
            SelectFunction("map_wod_instrument_l05", [SelectColumn("Temperature_Instrument")]),
            SelectFunction("map_l22_l05", [SelectFunction("map_wod_instrument_l22", [SelectColumn("Temperature_Instrument")])])
            ], "COMMON_TEMPERATURE_L05"))
    .add_select(SelectFunction("map_wod_instrument_l22", [SelectColumn("Temperature_Instrument")], "COMMON_TEMPERATURE_L22"))
    .add_select(SelectFunction("map_wod_instrument_l33", [SelectColumn("Temperature_Instrument")], "COMMON_TEMPERATURE_L33"))
    .add_select(SelectColumn("Temperature_Instrument"))
)

In [None]:
# Salinity
query_builder = (
    query_builder
    .add_select(SelectColumn("Salinity"))
    .add_select(SelectColumn("Salinity_WODflag"))
    .add_select(SelectColumn("Salinity", "COMMON_ORIGIN_SALINITY"))
    .add_select(SelectColumn("Salinity_WODflag", "COMMON_ORIGIN_SALINITY_QC"))
    .add_select(SelectColumn("Salinity.standard_name", "COMMON_ORIGIN_SALINITY_STANDARD_NAME"))
    .add_select(SelectLiteral("", "COMMON_ORIGIN_SALINITY_UNITS"))
    .add_select(SelectLiteral("SDN:P01::PSLTZZ01", "COMMON_ORIGIN_SALINITY_P01"))
    .add_select(SelectLiteral("SDN:P06::UUUU", "COMMON_ORIGIN_SALINITY_P06"))
    # MAP COMMON Salinity
    .add_select(SelectFunction("nullif", [SelectColumn("Salinity"), SelectLiteral(-10000000000.0)], "COMMON_SALINITY"))
    .add_select(SelectFunction("map_wod_quality_flag", [SelectColumn("Salinity_WODflag")], "COMMON_SALINITY_QC"))
    .add_select(SelectLiteral("sea_water_salinity", "COMMON_SALINITY_STANDARD_NAME"))
    .add_select(SelectLiteral("1e-3", "COMMON_SALINITY_UNITS"))
    .add_select(SelectLiteral("SDN:P01::PSLTZZ01", "COMMON_SALINITY_P01"))
    .add_select(SelectLiteral("SDN:P06::UUUU", "COMMON_SALINITY_P06"))
    # Instruments
    .add_select(
        SelectFunction("coalesce", [
            SelectFunction("map_wod_instrument_l05", [SelectColumn("Salinity_Instrument")]),
            SelectFunction("map_l22_l05", [SelectFunction("map_wod_instrument_l22", [SelectColumn("Salinity_Instrument")])])
            ], "COMMON_SALINITY_L05"))
    .add_select(SelectFunction("map_wod_instrument_l22", [SelectColumn("Salinity_Instrument")], "COMMON_SALINITY_L22"))
    .add_select(SelectFunction("map_wod_instrument_l33", [SelectColumn("Salinity_Instrument")], "COMMON_SALINITY_L33"))
    .add_select(SelectColumn("Salinity_Instrument"))
)

In [None]:
import os 
import json

# Make output dir
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)
# for each year between 1950 and 2025
for year in range(1950, 2025):
    print(f"Processing year {year}")
    query_builder.filters = []
    query_builder = query_builder.add_range_filter("COMMON_TIME", f"{year}-01-01T00:00:00Z", f"{year}-12-31T23:59:59Z")
    query_builder = (
        query_builder.add_filter(OrFilter([
            IsNotNullFilter("Temperature"),
            IsNotNullFilter("Salinity"),
        ]))
    )
    
    output_file = os.path.join(output_dir, f"wod_{year}.parquet")
    try:
        query_builder.to_parquet(output_file)
        print(f"Saved to {output_file}")
    except Exception as e:
        print(f"Failed to process year {year}: {e}")