In [1]:
# Installing Python packages
# %pip install pyarrow
# %pip install pandas
# %pip install xarray

In [2]:
# Importing necessary libraries
import pyarrow as pa
import pandas as pd
import xarray as xr
import os
import requests
import json
from io import BytesIO

In [3]:
# Set your Beacon Blue Cloud Token
Token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvZGF0YS5ibHVlLWNsb3VkLm9yZyIsImF1ZCI6Imh0dHBzOlwvXC9kYXRhLmJsdWUtY2xvdWQub3JnIiwiaWF0IjoxNzIyNTgxMTQwLCJleHAiOjE3NTQxMTcxNDAsInVzciI6ODMsImlkIjoibnJleWVzc3VhcmV6QG9ncy5pdCIsImVwX29yZ2FuaXNhdGlvbiI6Ik5hdGlvbmFsIEluc3RpdHV0ZSBvZiBPY2Vhbm9ncmFwaHkgYW5kIEFwcGxpZWQgR2VvIn0.PHfvGOMLTt_pF3cz-5kIRJR2SVnCzIE4pmhwhHr9rv4"


In [4]:
# Fetching available columns from the Beacon API
responseinfo = requests.get("https://beacon-wb2-eutrophication.maris.nl/api/query/available-columns", headers = {"Authorization" : f"Bearer {Token}"}) 
params = responseinfo.json()

#### Below you can search through the available columns by entering text between the brackets of search_columns(" ").

In [5]:
# Search function to find columns based on a search term
# This function searches for columns in the params list that match the search term
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("L05") #Enter your search term here

Matching columns:
COMMON_CHLOROPHYLL_L05
COMMON_OXYGEN_L05
COMMON_PHOSPHATE_L05
COMMON_SILICATE_L05
COMMON_SALINITY_L05
COMMON_TEMPERATURE_L05
COMMON_OXYGEN_SATURATION_L05
COMMON_NITRATE_L05
COMMON_AMMONIUM_L05


In [None]:
# You JSON query request
query = {
    "query_parameters": [
        {
            "column_name": "COMMON_CHLOROPHYLL_PER_VOLUME",
        },
        {
            "column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_QC",
        },
        {
            "column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_UNITS",
        },
        {
            "column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P01",
        },
        {
            "column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P06",
        },
        
        # metadata for clone wars -- EMODNET (temporary solution)
        # {"column_name": "Instrument Info"},
        # {"column_name": "Instrument / gear type"},
        # {"column_name": "CSR platform code"},
        # {"column_name": "CSR platform name"},
        # {"column_name": "Measuring area type"},
        # {"column_name": "Platform type"},
        # {"column_name": "LOCAL_CDI_ID"},
        # {"column_name": "EDMO_code"},
        # {"column_name": "Cruise name"},
        # {"column_name": "Cruise"},

        # metadata for clone wars -- WOD
        {"column_name": "WOD_cruise_identifier"},
        
        
        # metadata for clone wars -- CMEMS
        # {"column_name": ".platform_code"},
        # {"column_name": ".platform_name"},
        # {"column_name": ".instrument"},
        # {"column_name": ".instrument_name"},
        # {"column_name": ".platform_type"},
        # {"column_name": ".platform_type_name"},
        # {"column_name": ".cdm_data_type"},
        # {"column_name": "WMO_istrument_type"},
        # bigram not available yet


        {"column_name": "COMMON_TIME", "alias": "TIME-Marker"}, # ONLY FOR PARQUET
        {"function": "to_timestamp_nanos", "args": [
            "COMMON_TIME"], "alias": "TIME"},
        {"column_name": "COMMON_TIME_QC", "alias": "TIME_QC"},
        {"column_name": "COMMON_DEPTH"},
        {"column_name": "COMMON_DEPTH_QC"},
        {"column_name": "COMMON_LONGITUDE"},
        {"column_name": "COMMON_LATITUDE"},

        # COMMON INSTRUMENTS
        {"column_name": "COMMON_CHLOROPHYLL_L05"},
        {"column_name": "COMMON_CHLOROPHYLL_L06"},
        {"column_name": "COMMON_CHLOROPHYLL_L22"},
        {"column_name": "COMMON_CHLOROPHYLL_L35"},

        # COMMON PLATFORMS
        {"column_name": "COMMON_PLATFORM_L06"},
        {"column_name": "COMMON_PLATFORM_B76"},
        {"column_name": "COMMON_PLATFORM_C17"},

        # COMMON SOURCES
        {"column_name": "SOURCE_BDI"},
        {"column_name": "SOURCE_BDI_DATASET_ID"},
        {"column_name": "COMMON_EDMO_CODE"},
        {"column_name": "COMMON_EDMERP_CODE"},
        {"column_name": "COMMON_CSR"}  
        
    ],
    "filters": [
        {
            "for_query_parameter": "TIME",
            "min": "2003-01-01T00:00:00",
            "max": "2003-12-01T00:00:00",
        },
        {
            "for_query_parameter": "COMMON_DEPTH",
            "min": 0,
            "max": 100,
        },
        # {"for_query_parameter": "COMMON_LONGITUDE", "min": -8, "max": 37},
        # {"for_query_parameter": "COMMON_LATITUDE", "min": 30, "max": 46},
        {
            "is_not_null": {"for_query_parameter": "COMMON_CHLOROPHYLL_PER_VOLUME"},
        },
    ],
    "output": {"format": "parquet"},
}

In [7]:
response = requests.post("https://beacon-wb2-eutrophication.maris.nl/api/query", json.dumps(query), headers = {
    "Authorization" : f"Bearer {Token}",
    "Content-type": "application/json"
})

if response.status_code == 204:
    print("No data has been found for your query, please update your input fields above and run the notebook again.")
elif response.status_code != 200:
    # Print error message if the request was not successful
    print(f"Error: {response.status_code}")
    print(response.text)

In [8]:
response = requests.post(
    "https://beacon-wb2-eutrophication.maris.nl/api/query",
    json.dumps(query),
    headers={"Authorization": f"Bearer {Token}", "Content-type": "application/json"},
)

if response.status_code == 204:
    print(
        "No data has been found for your query, please update your input fields above and run the notebook again."
    )
elif response.status_code != 200:
    print(response.text)

In [9]:
# Create output directory if it doesn't exist
if not os.path.exists("./beacon_output"):
    os.makedirs("beacon_output")
    
# Save the response content to a file
with open("beacon_output/merged_subset.parquet", "wb") as f:
    f.write(response.content)

In [10]:
df = pd.read_parquet(f"./Beacon_V1.0.0_Output/WB1-Merged.parquet")
# df = df.set_index("TIME").sort_index()
df

ArrowInvalid: Could not open Parquet input source '<Buffer>': Parquet file size is 0 bytes