In [16]:
import requests
import json
import os
import pandas
import util

In [17]:
TOKEN = ""
BEACON_QUERY_URL = "https://beacon-argo.maris.nl/api/query"

In [18]:
query_parameters = []

# METADATA
query_parameters.append(util.value("BEACON_ARGO", alias="SOURCE_BDI"))
query_parameters.append(util.column("@identifier"))
query_parameters.append(util.column("@identifier", alias="SOURCE_BDI_DATASET_ID"))
query_parameters.append(util.column("PLATFORM_NUMBER"))
query_parameters.append(util.column("CYCLE_NUMBER"))
query_parameters.append(util.column("WMO_INST_TYPE"))

# Common ODV Tag
query_parameters.append(
    util.function_call("concat", ["@identifier", "CYCLE_NUMBER"], "COMMON_ODV_TAG")
)

# Time
query_parameters.append(util.column("JULD"))
query_parameters.append(util.column("JULD_QC"))
query_parameters.append(
    util.function_call("to_timestamp_nanos", ["JULD"], "COMMON_TIME")
)
query_parameters.append(util.column("JULD_QC", alias="COMMON_TIME_QC"))

# Location
query_parameters.append(util.column("LONGITUDE"))
query_parameters.append(util.column("LATITUDE"))
query_parameters.append(util.column("POSITION_QC"))

query_parameters.append(util.column("LONGITUDE", alias="COMMON_LONGITUDE"))
query_parameters.append(
    util.column("LONGITUDE.standard_name", alias="COMMON_LONGITUDE_STANDARD_NAME")
)
query_parameters.append(util.column("LONGITUDE.units", alias="COMMON_LONGITUDE_UNITS"))
query_parameters.append(util.value("SDN:P01::ALONZZ01", alias="COMMON_LONGITUDE_P01"))
query_parameters.append(util.value("SDN:P06::DEGE", alias="COMMON_LONGITUDE_P06"))
query_parameters.append(
    util.column("LATITUDE.standard_name", alias="COMMON_LATITUDE_STANDARD_NAME")
)
query_parameters.append(util.column("LATITUDE.units", alias="COMMON_LATITUDE_UNITS"))
query_parameters.append(util.column("LATITUDE", alias="COMMON_LATITUDE"))
query_parameters.append(util.value("SDN:P01::ALATZZ01", alias="COMMON_LATITUDE_P01"))
query_parameters.append(util.value("SDN:P06::DEGN", alias="COMMON_LATITUDE_P06"))

query_parameters.append(util.column("POSITION_QC", alias="COMMON_POSITION_QC"))

# Depth
query_parameters.append(util.column("PRES"))
query_parameters.append(util.column("PRES_QC"))
query_parameters.append(
    util.function_call("pressure_to_depth", ["PRES", "LATITUDE"], "COMMON_DEPTH")
)
query_parameters.append(util.value("SDN:P01::ADEPZZ", alias="COMMON_DEPTH_P01"))
query_parameters.append(util.value("SDN:P06::ULAA", alias="COMMON_DEPTH_P06"))

query_parameters.append(util.value("depth", alias="COMMON_DEPTH_STANDARD_NAME"))
query_parameters.append(util.value("m", alias="COMMON_DEPTH_UNITS"))

query_parameters.append(util.column("PRES_QC", alias="COMMON_DEPTH_QC"))


# TEMPERATURE
query_parameters.append(util.column("TEMP"))
query_parameters.append(util.column("TEMP_QC"))
query_parameters.append(util.column("TEMP_ADJUSTED"))
query_parameters.append(util.column("TEMP_ADJUSTED_QC"))
query_parameters.append(
    util.coalesce_columns(["TEMP_ADJUSTED", "TEMP"], "COMMON_TEMPERATURE")
)
query_parameters.append(
    util.coalesce_columns(["TEMP_ADJUSTED_QC", "TEMP_QC"], "COMMON_TEMPERATURE_QC")
)
query_parameters.append(util.value("SDN:P01::TEMPPR01", alias="COMMON_TEMPERATURE_P01"))
query_parameters.append(util.value("SDN:P06::UPAA", alias="COMMON_TEMPERATURE_P06"))
query_parameters.append(
    util.value(
        "Temperature of the water body", alias="COMMON_TEMPERATURE_STANDARD_NAME"
    )
)
query_parameters.append(util.value("degrees_celsius", alias="COMMON_TEMPERATURE_UNITS"))

query_parameters.append(
    util.function_call(
        "map_argo_instrument_l05",
        [util.function_call("btrim", ["WMO_INST_TYPE"], try_cast="Int64")],
        "COMMON_TEMPERATURE_L05",
    )
)

# SALINITY

query_parameters.append(util.column("PSAL"))
query_parameters.append(util.column("PSAL_QC"))
query_parameters.append(util.column("PSAL_ADJUSTED"))
query_parameters.append(util.column("PSAL_ADJUSTED_QC"))

query_parameters.append(
    util.coalesce_columns(["PSAL_ADJUSTED", "PSAL"], "COMMON_SALINITY")
)
query_parameters.append(
    util.coalesce_columns(["PSAL_ADJUSTED_QC", "PSAL_QC"], "COMMON_SALINITY_QC")
)

query_parameters.append(util.value("SDN:P01::PSLTZZ01", alias="COMMON_SALINITY_P01"))
query_parameters.append(util.value("SDN:P06::UUUU", alias="COMMON_SALINITY_P06"))
query_parameters.append(
    util.value("Salinity of the water body", alias="COMMON_SALINITY_STANDARD_NAME")
)
query_parameters.append(util.value("Dimensionless", alias="COMMON_SALINITY_UNITS"))

query_parameters.append(
    util.function_call(
        "map_argo_instrument_l05",
        [util.function_call("btrim", ["WMO_INST_TYPE"], try_cast="Int64")],
        "COMMON_SALINITY_L05",
    )
)

In [19]:
def build_query(start_time, end_time) -> dict:
    return {
        "select": query_parameters,
        "filters": [
            {
                "column": "JULD",
                "min": start_time,
                "max": end_time,
            }
        ],
        "output": {"format": "parquet"},
    }

In [20]:
os.makedirs("data", exist_ok=True)

for year in range(1990, 2024):
    start_time = f"{year}-01-01T00:00:00.000"
    end_time = f"{year}-12-31T23:59:59.999"
    print(f"Downloading {start_time} - {end_time}")
    print(json.dumps(build_query(start_time, end_time)))
    # Download data
    with requests.post(
        BEACON_QUERY_URL,
        json=build_query(start_time, end_time),
        headers={"Authorization": f"Bearer {TOKEN}"},
        stream=True,
    ) as response:
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            print(response.text)
            exit(1)
        response.raise_for_status()
        with open(f"data/ARGO_{year}.parquet", "wb") as f:
            for chunk in response.iter_content(chunk_size=4096 * 128):
                if chunk:
                    f.write(chunk)

Downloading 2000-01-01T00:00:00.000 - 2000-12-31T23:59:59.999
{"select": [{"value": "BEACON-ARGO", "alias": "SOURCE_BDI"}, {"column": "@identifier", "alias": null}, {"column": "@identifier", "alias": "SOURCE_BDI_DATASET_ID"}, {"column": "PLATFORM_NUMBER", "alias": null}, {"column": "CYCLE_NUMBER", "alias": null}, {"column": "WMO_INST_TYPE", "alias": null}, {"function": "concat", "args": ["@identifier", "CYCLE_NUMBER"], "alias": "COMMON_ODV_TAG"}, {"column": "JULD", "alias": null}, {"column": "JULD_QC", "alias": null}, {"function": "to_timestamp_nanos", "args": ["JULD"], "alias": "COMMON_TIME"}, {"column": "JULD_QC", "alias": "COMMON_TIME_QC"}, {"column": "LONGITUDE", "alias": null}, {"column": "LATITUDE", "alias": null}, {"column": "POSITION_QC", "alias": null}, {"column": "LONGITUDE", "alias": "COMMON_LONGITUDE"}, {"column": "LONGITUDE.standard_name", "alias": "COMMON_LONGITUDE_STANDARD_NAME"}, {"column": "LONGITUDE.units", "alias": "COMMON_LONGITUDE_UNITS"}, {"value": "SDN:P01::ALONZ

In [26]:
import pyarrow.parquet as pq

parquet_file = pq.ParquetFile("data/ARGO_2000.parquet")

# Read each row group as a pandas DataFrame
print(parquet_file.num_row_groups)
table = parquet_file.read_row_group(0)
df = table.to_pandas()
df

1


Unnamed: 0,SOURCE_BDI,@identifier,SOURCE_BDI_DATASET_ID,PLATFORM_NUMBER,CYCLE_NUMBER,WMO_INST_TYPE,COMMON_ODV_TAG,JULD,JULD_QC,COMMON_TIME,...,PSAL_QC,PSAL_ADJUSTED,PSAL_ADJUSTED_QC,COMMON_SALINITY,COMMON_SALINITY_QC,COMMON_SALINITY_P01,COMMON_SALINITY_P06,COMMON_SALINITY_STANDARD_NAME,COMMON_SALINITY_UNITS,COMMON_SALINITY_L05
0,BEACON-ARGO,aoml/59025/59025_prof.nc,aoml/59025/59025_prof.nc,59025,1,852,aoml/59025/59025_prof.nc1,972785697,8,2000-10-29 02:14:57,...,4,,4,36.054001,4,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
1,BEACON-ARGO,aoml/59025/59025_prof.nc,aoml/59025/59025_prof.nc,59025,1,852,aoml/59025/59025_prof.nc1,972785697,8,2000-10-29 02:14:57,...,4,,4,36.060001,4,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
2,BEACON-ARGO,aoml/59025/59025_prof.nc,aoml/59025/59025_prof.nc,59025,1,852,aoml/59025/59025_prof.nc1,972785697,8,2000-10-29 02:14:57,...,4,,4,35.886002,4,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
3,BEACON-ARGO,aoml/59025/59025_prof.nc,aoml/59025/59025_prof.nc,59025,1,852,aoml/59025/59025_prof.nc1,972785697,8,2000-10-29 02:14:57,...,4,,4,35.987000,4,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
4,BEACON-ARGO,aoml/59025/59025_prof.nc,aoml/59025/59025_prof.nc,59025,1,852,aoml/59025/59025_prof.nc1,972785697,8,2000-10-29 02:14:57,...,4,,4,35.976002,4,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231159,BEACON-ARGO,aoml/39013/39013_prof.nc,aoml/39013/39013_prof.nc,39013,32,845,aoml/39013/39013_prof.nc32,977974483,1,2000-12-28 03:34:43,...,,,,,,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
231160,BEACON-ARGO,aoml/39013/39013_prof.nc,aoml/39013/39013_prof.nc,39013,32,845,aoml/39013/39013_prof.nc32,977974483,1,2000-12-28 03:34:43,...,,,,,,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
231161,BEACON-ARGO,aoml/39013/39013_prof.nc,aoml/39013/39013_prof.nc,39013,32,845,aoml/39013/39013_prof.nc32,977974483,1,2000-12-28 03:34:43,...,,,,,,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
231162,BEACON-ARGO,aoml/39013/39013_prof.nc,aoml/39013/39013_prof.nc,39013,32,845,aoml/39013/39013_prof.nc32,977974483,1,2000-12-28 03:34:43,...,,,,,,SDN:P01::PSLTZZ01,SDN:P06::UUUU,Salinity of the water body,Dimensionless,SDN:L05::130
