In [1]:
# Installing Python packages
# %pip install pyarrow
# %pip install pandas
# %pip install xarray

In [2]:
# Importing necessary libraries
import pyarrow as pa
import pandas as pd
import xarray as xr
import os
import requests
import json
from io import BytesIO

In [3]:
# Set your Beacon Blue Cloud Token
TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvZGF0YS5ibHVlLWNsb3VkLm9yZyIsImF1ZCI6Imh0dHBzOlwvXC9kYXRhLmJsdWUtY2xvdWQub3JnIiwiaWF0IjoxNzIyNTgxMTQwLCJleHAiOjE3NTQxMTcxNDAsInVzciI6ODMsImlkIjoibnJleWVzc3VhcmV6QG9ncy5pdCIsImVwX29yZ2FuaXNhdGlvbiI6Ik5hdGlvbmFsIEluc3RpdHV0ZSBvZiBPY2Vhbm9ncmFwaHkgYW5kIEFwcGxpZWQgR2VvIn0.PHfvGOMLTt_pF3cz-5kIRJR2SVnCzIE4pmhwhHr9rv4'

In [4]:
# Fetching available columns from the Beacon API
responseinfo = requests.get("https://beacon-wb2-eutrophication.maris.nl/api/query/available-columns", headers = {"Authorization" : f"Bearer {TOKEN}"}) 
params = responseinfo.json()

#### Below you can search through the available columns by entering text between the brackets of search_columns(" ").

In [34]:
# Search function to find columns based on a search term
# This function searches for columns in the params list that match the search term
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("L05") #Enter your search term here

Matching columns:
COMMON_CHLOROPHYLL_L05
COMMON_OXYGEN_L05
COMMON_PHOSPHATE_L05
COMMON_SILICATE_L05
COMMON_SALINITY_L05
COMMON_TEMPERATURE_L05
COMMON_OXYGEN_SATURATION_L05
COMMON_NITRATE_L05
COMMON_NITRATE_NITRITE_L05
COMMON_AMMONIUM_L05


Build the query here:

In [8]:

query_parameters = [
    {"column_name": "SOURCE_BDI"},
    {"column_name": "SOURCE_BDI_DATASET_ID"},
    {"column_name": "COMMON_ODV_TAG"},
            
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME", "alias": "CHLOROPHYLL"},
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_QC", "alias": "CHLOROPHYLL_QC"},
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_UNITS"},
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_STANDARD_NAME"},
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P01"},
    {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P06"},
            
            
    {"column_name": "COMMON_TIME"},
    {"column_name": "COMMON_DEPTH", "alias": "DEPTH"},
    {"column_name": "COMMON_DEPTH_QC", "alias": "DEPTH_QC"},
            
            
    {"column_name": "COMMON_LONGITUDE", "alias": "LONGITUDE"},
    {"column_name": "COMMON_LATITUDE", "alias": "LATITUDE"}   
]

In [37]:
import ipywidgets as widgets
from IPython.display import display

ODV_OUTPUT = False
if ODV_OUTPUT: 
    output = {
            "format": {
                "odv": {
                    "longitude_column": {"column_name": "LONGITUDE"},
                    "latitude_column": {"column_name": "LATITUDE"},
                    "time_column": {"column_name": "COMMON_TIME"},
                    "depth_column": {
                        "column_name": "DEPTH",
                        "unit": "m",
                        "comment": "Codes: SDN:P01::ADEPZZ01 SDN:P06::ULAA",
                        "qf_column": "DEPTH_QC"
                    },
                    "data_columns": [
                        {
                            "column_name": "CHLOROPHYLL",
                            "unit": "mg/m^3",
                            "comment": "Codes: SDN:P01::CHLTVOLU SDN:P06::UMMC",
                            "qf_column": "CHLOROPHYLL_QC"
                        }
                    ],
                    "metadata_columns": [
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_UNITS",},
                        # {"column_name": "DEPTH_UNITS",},
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P01",},
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P06",},
                        {"column_name": "SOURCE_BDI",},
                        {"column_name": "SOURCE_BDI_DATASET_ID",}
                    ],
                    "qf_schema": "SEADATANET",
                    "key_column": "COMMON_ODV_TAG",
                    "archiving": "zip_deflate",

                }
            }
        }
else:

    format_selector = widgets.Dropdown(
        options=[('Parquet', 'parquet'), ('Arrow IPC', 'ipc'), ('netCDF', 'netcdf')],
        value='parquet',
        description='Output Format:',
    )
    display(format_selector)

    def set_output(change):
        global output
        output = {"format": change['new']}
    format_selector.observe(set_output, names='value')
    output = {"format": format_selector.value}

Dropdown(description='Output Format:', options=(('Parquet', 'parquet'), ('Arrow IPC', 'ipc'), ('netCDF', 'netc…

In [38]:
# You JSON query request
query = {
    "query_parameters": query_parameters,       
            
    "filters": [
            {"for_query_parameter": "COMMON_TIME", "min": f"2015-01-01T00:00:00", "max": f"2015-03-01T23:00:00",},
            {"for_query_parameter": "DEPTH", "min": 0, "max": 5},
            {"for_query_parameter": "LONGITUDE", "min": -44, "max": 5},
            {"for_query_parameter": "LATITUDE", "min": 15, "max": 60},
            {"is_not_null": {"for_query_parameter": "CHLOROPHYLL",}},
        ],

    # request different data formats uncomment the one you need:
    # 1) PARQUET
        # "output": {"format": "parquet"}
    # 2) ARROW
        # "output": {"format": "ipc"}
    # 3) netCDF
        # "output": {"format": "netcdf"}
    # 2) ODV
    "output": output   
   }


In [39]:
response = requests.post("https://beacon-wb2-eutrophication.maris.nl/api/query", json.dumps(query), headers = {
    "Authorization" : f"Bearer {TOKEN}",
    "Content-type": "application/json"
})

if response.status_code == 204:
    print("No data has been found for your query, please update your input fields above and run the notebook again.")
elif response.status_code != 200:
    # Print error message if the request was not successful
    print(f"Error: {response.status_code}")
    print(response.text)

In [40]:
# Create output directory if it doesn't exist
if not os.path.exists("./beacon_output"):
    os.makedirs("beacon_output")

### Uncomment and run for parquet output

In [42]:
# Save the response content to a file
with open("beacon_output/merged_subset.parquet", "wb") as f:
    f.write(response.content)

# Wrap the response content in a BytesIO object
response_content = BytesIO(response.content)
# Read the Parquet file into a Pandas DataFrame
df = pd.read_parquet(response_content, engine='pyarrow')
# This is a temporary fix as the merged instance works with seconds since 1970 which isn't supported by parquet (but will be addressed by beacon in the future)
df['COMMON_TIME'] = pd.to_datetime(df['COMMON_TIME'], unit='s')
df

Unnamed: 0,SOURCE_BDI,SOURCE_BDI_DATASET_ID,COMMON_ODV_TAG,CHLOROPHYLL,CHLOROPHYLL_QC,COMMON_CHLOROPHYLL_PER_VOLUME_UNITS,COMMON_CHLOROPHYLL_PER_VOLUME_STANDARD_NAME,COMMON_CHLOROPHYLL_PER_VOLUME_P01,COMMON_CHLOROPHYLL_PER_VOLUME_P06,COMMON_TIME,DEPTH,DEPTH_QC,LONGITUDE,LATITUDE
0,BEACON_WOD,wod_021754693O,BEACON_WODwod_021754693O,0.1800,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-02-28 19:46:52,0.188211,1,-39.763130,59.839478
1,BEACON_WOD,wod_021754693O,BEACON_WODwod_021754693O,0.1656,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-02-28 19:46:52,0.515103,1,-39.763130,59.839478
2,BEACON_WOD,wod_017813090O,BEACON_WODwod_017813090O,1.2000,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-02-17 09:02:59,1.000000,1,-5.597167,54.466667
3,BEACON_WOD,wod_017813065O,BEACON_WODwod_017813065O,10.2300,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-02-12 08:25:00,1.000000,1,-6.684500,55.148666
4,BEACON_WOD,wod_017813034O,BEACON_WODwod_017813034O,0.5500,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-02-10 08:41:59,1.000000,1,-5.806667,54.668167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11320,BEACON_WOD,wod_021754434O,BEACON_WODwod_021754434O,0.1656,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-01-29 21:16:52,0.287266,1,-39.690025,59.964863
11321,BEACON_WOD,wod_021754434O,BEACON_WODwod_021754434O,0.1800,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-01-29 21:16:52,0.297172,1,-39.690025,59.964863
11322,BEACON_WOD,wod_021754434O,BEACON_WODwod_021754434O,0.1872,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-01-29 21:16:52,0.376418,1,-39.690025,59.964863
11323,BEACON_WOD,wod_021754434O,BEACON_WODwod_021754434O,0.1800,1,mg/m^3,Concentration of chlorophyll per unit volume o...,SDN:P01::CHLTVOLU,SDN:P06::UMMC,2015-01-29 21:16:52,0.465569,1,-39.690025,59.964863


### Uncomment and run for ipc arrow output

In [10]:
# # Save the response content to a file
# with open("beacon_output/merged_subset.arrow", "wb") as f:
#     f.write(response.content)

# # Read the Parquet file into a Pandas DataFrame
# df = pd.read_feather(f"./beacon_output/merged_subset.arrow")
# df = df.assign(datetime=pd.to_datetime(df['datetime'])).set_index('datetime').sort_index()
# df

### Uncomment and run for netCDF output

In [14]:
# # Save the response content to a file
# with open("beacon_output/merged_subset.nc", "wb") as f:
#     f.write(response.content)


# # Read the netCDF file into an xarray Dataset
# df =xr.open_dataset(f"./beacon_output/merged_subset.nc").to_dataframe()
# df = df.assign(datetime=pd.to_datetime(df['datetime'])).set_index('datetime').sort_index()
# df

### Uncomment and run for the ODV output

In [27]:
# Save the response content to a file
with open("beacon_output/merged_subset_ODV.zip", "wb") as f:
    f.write(response.content)
