In [1]:
# Installing Python packages
# %pip install pyarrow
# %pip install pandas
# %pip install xarray

In [1]:
# Importing necessary libraries
import pyarrow as pa
import pandas as pd
import xarray as xr
import os
import requests
import json
from io import BytesIO

In [2]:
# Set your Beacon Blue Cloud Token
TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvZGF0YS5ibHVlLWNsb3VkLm9yZyIsImF1ZCI6Imh0dHBzOlwvXC9kYXRhLmJsdWUtY2xvdWQub3JnIiwiaWF0IjoxNzIyNTgxMTQwLCJleHAiOjE3NTQxMTcxNDAsInVzciI6ODMsImlkIjoibnJleWVzc3VhcmV6QG9ncy5pdCIsImVwX29yZ2FuaXNhdGlvbiI6Ik5hdGlvbmFsIEluc3RpdHV0ZSBvZiBPY2Vhbm9ncmFwaHkgYW5kIEFwcGxpZWQgR2VvIn0.PHfvGOMLTt_pF3cz-5kIRJR2SVnCzIE4pmhwhHr9rv4'

In [3]:
# Fetching available columns from the Beacon API
responseinfo = requests.get("https://beacon-wb2-eutrophication.maris.nl/api/query/available-columns", headers = {"Authorization" : f"Bearer {TOKEN}"}) 
params = responseinfo.json()

#### Below you can search through the available columns by entering text between the brackets of search_columns(" ").

In [15]:
# Search function to find columns based on a search term
# This function searches for columns in the params list that match the search term
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("common_CHL") #Enter your search term here

Matching columns:
COMMON_CHLOROPHYLL_PER_VOLUME
COMMON_CHLOROPHYLL_PER_VOLUME_QC
COMMON_CHLOROPHYLL_PER_VOLUME_UNITS
COMMON_CHLOROPHYLL_PER_VOLUME_STANDARD_NAME
COMMON_CHLOROPHYLL_PER_VOLUME_P01
COMMON_CHLOROPHYLL_PER_VOLUME_P06
COMMON_CHLOROPHYLL_L05
COMMON_CHLOROPHYLL_L06
COMMON_CHLOROPHYLL_L22
COMMON_CHLOROPHYLL_L35
COMMON_CHLOROPHYLL_L33


In [35]:
# You JSON query request
query = {
    "query_parameters": [          
            {"column_name": "SOURCE_BDI"},
            {"column_name": "SOURCE_BDI_DATASET_ID"},
            # {"column_name": "COMMON_EDMO_CODE"},
            # {"column_name": "COMMON_EDMERP_CODE"},
            # {"column_name": "COMMON_CSR"},
            # {"column_name": "COMMON_PLATFORM_L06"},
            # {"column_name": "COMMON_PLATFORM_B76"},
            # {"column_name": "COMMON_PLATFORM_C17"},
            {"column_name": "COMMON_ODV_TAG"},
            
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME", "alias": "CHLOROPHYLL"},
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_QC", "alias": "CHLOROPHYLL_QC"},
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_UNITS"},
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_STANDARD_NAME"},
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P01"},
            {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P06"},
            # {"column_name": "COMMON_CHLOROPHYLL_L05"},
            # {"column_name": "COMMON_CHLOROPHYLL_L06"},
            # {"column_name": "COMMON_CHLOROPHYLL_L22"},
            # {"common_name": "COMMON_CHLOROPHYLL_L33"},
            # {"column_name": "COMMON_CHLOROPHYLL_L35"},
            
            {"column_name": "COMMON_TIME", "alias": "time_ISO8601"},
            {"column_name": "COMMON_DEPTH", "alias": "DEPTH"},
            {"column_name": "COMMON_DEPTH_QC", "alias": "DEPTH_QC"},
            
            
            {"column_name": "COMMON_LONGITUDE", "alias": "LONGITUDE"},
            {"column_name": "COMMON_LATITUDE", "alias": "LATITUDE"}

        ],
    "filters": [
            {"for_query_parameter": "time_ISO8601", "min": f"2015-01-01T00:00:00", "max": f"2015-03-01T23:00:00",},
            {"for_query_parameter": "DEPTH", "min": 0, "max": 5},
            {"for_query_parameter": "LONGITUDE", "min": -44, "max": 5},
            {"for_query_parameter": "LATITUDE", "min": 15, "max": 60},
            {"is_not_null": {"for_query_parameter": "CHLOROPHYLL",}},
        ],

    # request different data formats uncomment the one you need:
    # 1) PARQUET
        # "output": {"format": "parquet"}
    # 2) ARROW
        # "output": {"format": "ipc"}
    # 3) netCDF
        # "output": {"format": "netcdf"}
    # 2) ODV
        "output": {
            "format": {
                "odv": {
                    "longitude_column": {"column_name": "LONGITUDE"},
                    "latitude_column": {"column_name": "LATITUDE"},
                    "time_column": {"column_name": "time_ISO8601"},
                    "depth_column": {
                        "column_name": "DEPTH",
                        # "unit": "m",
                        "comment": "Codes: SDN:P01::ADEPZZ01 SDN:P06::ULAA",
                        "qf_column": "DEPTH_QC"
                    },
                    "data_columns": [
                        {
                            "column_name": "CHLOROPHYLL",
                            # "unit": "mg/m^3",
                            "comment": "Codes: SDN:P01::CHLTVOLU SDN:P06::UMMC",
                            "qf_column": "CHLOROPHYLL_QC"
                        }
                    ],
                    "metadata_columns": [
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_UNITS",},
                        # {"column_name": "DEPTH_UNITS",},
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P01",},
                        {"column_name": "COMMON_CHLOROPHYLL_PER_VOLUME_P06",},
                        {"column_name": "SOURCE_BDI",},
                        {"column_name": "SOURCE_BDI_DATASET_ID",}
                    ],
                    "qf_schema": "SEADATANET",
                    "key_column": "COMMON_ODV_TAG",
                    "archiving": "zip_deflate",

                }
            }
        }

    }


In [36]:
response = requests.post("https://beacon-wb2-eutrophication.maris.nl/api/query", json.dumps(query), headers = {
    "Authorization" : f"Bearer {TOKEN}",
    "Content-type": "application/json"
})

if response.status_code == 204:
    print("No data has been found for your query, please update your input fields above and run the notebook again.")
elif response.status_code != 200:
    # Print error message if the request was not successful
    print(f"Error: {response.status_code}")
    print(response.text)

In [37]:
# Create output directory if it doesn't exist
if not os.path.exists("./beacon_output"):
    os.makedirs("beacon_output")

### Uncomment and run for parquet output

In [9]:
# # Save the response content to a file
# with open("beacon_output/merged_subset.parquet", "wb") as f:
#     f.write(response.content)

# # Wrap the response content in a BytesIO object
# response_content = BytesIO(response.content)
# # Read the Parquet file into a Pandas DataFrame
# df = pd.read_parquet(response_content, engine='pyarrow')
# # This is a temporary fix as the merged instance works with seconds since 1970 which isn't supported by parquet (but will be addressed by beacon in the future)
# df['datetime'] = pd.to_datetime(df['datetime'], unit='s')
# df

### Uncomment and run for ipc arrow output

In [10]:
# # Save the response content to a file
# with open("beacon_output/merged_subset.arrow", "wb") as f:
#     f.write(response.content)

# # Read the Parquet file into a Pandas DataFrame
# df = pd.read_feather(f"./beacon_output/merged_subset.arrow")
# df = df.assign(datetime=pd.to_datetime(df['datetime'])).set_index('datetime').sort_index()
# df

### Uncomment and run for netCDF output

In [14]:
# # Save the response content to a file
# with open("beacon_output/merged_subset.nc", "wb") as f:
#     f.write(response.content)


# # Read the netCDF file into an xarray Dataset
# df =xr.open_dataset(f"./beacon_output/merged_subset.nc").to_dataframe()
# df = df.assign(datetime=pd.to_datetime(df['datetime'])).set_index('datetime').sort_index()
# df

### Uncomment and run for the ODV output

In [38]:
# Save the response content to a file
with open("beacon_output/merged_subset_ODV.zip", "wb") as f:
    f.write(response.content)
