In [None]:
import requests
import json
import os
import pandas
import util

In [None]:
TOKEN = ""
BEACON_QUERY_URL = "https://beacon-wod.maris.nl/api/query"

In [None]:
query_parameters = []

# TIME
query_parameters.append(util.column("time"))
query_parameters.append(
    util.function_call("to_timestamp_nanos", ["time"], "COMMON_TIME")
)
# LONGITUDE
query_parameters.append(util.column("lon"))
query_parameters.append(util.column("lon.units"))
# COMMON LONGITUDE
query_parameters.append(util.column("lon", "COMMON_LONGITUDE"))
query_parameters.append(util.value("degrees_east", "COMMON_LONGITUDE_UNITS"))
query_parameters.append(util.value("Longitude", "COMMON_LONGITUDE_STANDARD_NAME"))
query_parameters.append(util.value("SDN:P01::ALONZZ01", "COMMON_LONGITUDE_P01"))
query_parameters.append(util.value("SDN:P06::DEGE", "COMMON_LONGITUDE_P06"))

# LATITUDE
query_parameters.append(util.column("lat"))
query_parameters.append(util.column("lat.units"))
# COMMON LATITUDE
query_parameters.append(util.column("lat", "COMMON_LATITUDE"))
query_parameters.append(util.value("degrees_north", "COMMON_LATITUDE_UNITS"))
query_parameters.append(util.value("Latitude", "COMMON_LATITUDE_STANDARD_NAME"))
query_parameters.append(util.value("SDN:P01::ALATZZ01", "COMMON_LATITUDE_P01"))
query_parameters.append(util.value("SDN:P06::DEGN", "COMMON_LATITUDE_P06"))

# DEPTH
query_parameters.append(util.column("z"))
query_parameters.append(util.column("z.units"))
query_parameters.append(util.column("z_WODflag"))

# COMMON DEPTH
query_parameters.append(util.column("z", "COMMON_ORIGIN_DEPTH"))
query_parameters.append(
    util.function_call("wod_flag_to_sdn", ["z_WODflag"], "COMMON_ORIGIN_DEPTH_QC")
)
query_parameters.append(util.column("z.units", "COMMON_ORIGIN_DEPTH_UNITS"))
query_parameters.append(util.value("Depth", "COMMON_ORIGIN_DEPTH_STANDARD_NAME"))
query_parameters.append(util.value("SDN:P01::ADEPZZ01", "COMMON_ORIGIN_DEPTH_P01"))
query_parameters.append(util.value("SDN:P06::ULAA", "COMMON_ORIGIN_DEPTH_P06"))

query_parameters.append(util.column("z", "COMMON_DEPTH"))
query_parameters.append(
    util.function_call("wod_flag_to_sdn", ["z_WODflag"], "COMMON_DEPTH_QC")
)
query_parameters.append(util.value("m", "COMMON_DEPTH_UNITS"))
query_parameters.append(util.value("Depth", "COMMON_DEPTH_STANDARD_NAME"))
query_parameters.append(util.value("SDN:P01::ADEPZZ01", "COMMON_DEPTH_P01"))
query_parameters.append(util.value("SDN:P06::ULAA", "COMMON_DEPTH_P06"))

In [None]:
# DATASET METADATA

# CSR
# COMMON_ODV TAG
query_parameters.append(
    util.function_call(
        "concat", [util.value("BEACON_WOD"), "@identifier"], "COMMON_ODV_TAG"
    )
)

# BEACON SOURCE REFERENCE
query_parameters.append(util.value("BEACON_WOD", "SOURCE_BDI"))
query_parameters.append(util.column("@identifier", "SOURCE_BDI_DATASET_ID"))

# ORIGIN METADATA FORWARDING
metadata = [
    "dataset",
    ".institution",
    "Platform",
    "country",
    "WOD_cruise_identifier",
    "wod_unique_cast",
]
for m in metadata:
    query_parameters.append(util.column(m))

In [None]:
# MAP Temperature
query_parameters.append(util.column("Temperature"))
query_parameters.append(util.column("Temperature_WODflag"))

query_parameters.append(util.column("Temperature", "COMMON_ORIGIN_TEMPERATURE"))
query_parameters.append(
    util.function_call(
        "wod_flag_to_sdn", ["Temperature_WODflag"], "COMMON_ORIGIN_TEMPERATURE_QC"
    )
)
query_parameters.append(
    util.column("Temperature.standard_name", "COMMON_ORIGIN_TEMPERATURE_STANDARD_NAME")
)
query_parameters.append(
    util.value("Temperature.units", "COMMON_ORIGIN_TEMPERATURE_UNITS")
)
query_parameters.append(
    util.value("SDN:P01::TEMPPR01", "COMMON_ORIGIN_TEMPERATURE_P01")
)
query_parameters.append(util.value("SDN:P06::UPAA", "COMMON_ORIGIN_TEMPERATURE_P06"))

# MAP COMMON Temperature
query_parameters.append(util.column("Temperature", "COMMON_TEMPERATURE"))
query_parameters.append(
    util.function_call(
        "wod_flag_to_sdn", ["Temperature_WODflag"], "COMMON_TEMPERATURE_QC"
    )
)
query_parameters.append(
    util.value(
        "Temperature of the water body",
        "COMMON_TEMPERATURE_STANDARD_NAME",
    )
)
query_parameters.append(util.value("degrees_celsius", "COMMON_TEMPERATURE_UNITS"))
query_parameters.append(util.value("SDN:P01::TEMPPR01", "COMMON_TEMPERATURE_P01"))
query_parameters.append(util.value("SDN:P06::UPAA", "COMMON_TEMPERATURE_P06"))
# MAP COMMON Temperature INSTRUMENTS
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l05", ["Temperature_Instrument"], "COMMON_TEMPERATURE_L05"
    )
)
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l22", ["Temperature_Instrument"], "COMMON_TEMPERATURE_L22"
    )
)
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l33", ["Temperature_Instrument"], "COMMON_TEMPERATURE_L33"
    )
)
query_parameters.append(util.column("Temperature_Instrument"))

In [None]:
# MAP Salinity
query_parameters.append(util.column("Salinity"))
query_parameters.append(util.column("Salinity_WODflag"))

query_parameters.append(util.column("Salinity", "COMMON_ORIGIN_SALINITY"))
query_parameters.append(
    util.function_call(
        "wod_flag_to_sdn", ["Salinity_WODflag"], "COMMON_ORIGIN_SALINITY_QC"
    )
)
query_parameters.append(
    util.column("Salinity.standard_name", "COMMON_ORIGIN_SALINITY_STANDARD_NAME")
)
query_parameters.append(util.value("", "COMMON_ORIGIN_SALINITY_UNITS"))
query_parameters.append(util.value("SDN:P01::PSLTZZ01", "COMMON_ORIGIN_SALINITY_P01"))
query_parameters.append(util.value("SDN:P06::UUUU", "COMMON_ORIGIN_SALINITY_P06"))

# MAP COMMON Salinity
query_parameters.append(util.column("Salinity", "COMMON_SALINITY"))
query_parameters.append(
    util.function_call("wod_flag_to_sdn", ["Salinity_WODflag"], "COMMON_SALINITY_QC")
)
query_parameters.append(
    util.value(
        "Salinity of the water body",
        "COMMON_SALINITY_STANDARD_NAME",
    )
)
query_parameters.append(util.value("Dimensionless", "COMMON_SALINITY_UNITS"))
query_parameters.append(util.value("SDN:P01::PSLTZZ01", "COMMON_SALINITY_P01"))
query_parameters.append(util.value("SDN:P06::UUUU", "COMMON_SALINITY_P06"))
# MAP COMMON Salinity INSTRUMENTS
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l05", ["Salinity_Instrument"], "COMMON_SALINITY_L05"
    )
)
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l22", ["Salinity_Instrument"], "COMMON_SALINITY_L22"
    )
)
query_parameters.append(
    util.function_call(
        "map_wod_instrument_l33", ["Salinity_Instrument"], "COMMON_SALINITY_L33"
    )
)
query_parameters.append(util.column("Salinity_Instrument"))

In [None]:
def build_query(start_time, end_time) -> dict:
    return {
        "select": query_parameters,
        "filters": [
            {
                "column": "time",
                "min": start_time,
                "max": end_time,
            },
            {
                "or": [
                    {
                        "is_not_null": {
                            "column": "Temperature",
                        }
                    },
                    {
                        "is_not_null": {
                            "column": "Salinity",
                        }
                    },
                ]
            },
        ],
        "output": {"format": "parquet"},
    }

In [None]:
os.makedirs("data", exist_ok=True)

for year in range(2000, 2024):
    start_time = f"{year}-01-01T00:00:00.000"
    end_time = f"{year}-12-31T23:59:59.999"
    print(build_query(start_time, end_time))
    print(f"Downloading {start_time} - {end_time}")
    # Download data
    with requests.post(
        BEACON_QUERY_URL,
        json=build_query(start_time, end_time),
        headers={"Authorization": f"Bearer {TOKEN}"},
        stream=True,
    ) as response:
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            print(response.text)
            exit(1)
        response.raise_for_status()
        with open(f"data/WOD_{year}.parquet", "wb") as f:
            for chunk in response.iter_content(chunk_size=4096 * 128):
                if chunk:
                    f.write(chunk)

In [None]:
import pyarrow.parquet as pq

parquet_file = pq.ParquetFile("data/WOD_2000.parquet")

# Read each row group as a pandas DataFrame
print(parquet_file.num_row_groups)
table = parquet_file.read_row_group(1)
df = table.to_pandas()
df