# This is a notebook for querying the World Ocean Database instance for the 1.0.0 Beacon release.

-   You can run each cell individually by pressing "shift + enter".
-   For more information, questions, bugs, please contact us on Slack:
    -   https://join.slack.com/t/beacontechnic-wwa5548/shared_invite/zt-2dp1vv56r-tj_KFac0sAKNuAgUKPPDRg.

# Documentation for querying Beacon can be found here:

-   https://maris-development.github.io/beacon/docs/1.0.1/query-docs/querying/json.html


#### In order to get access to the Beacon endpoint, you need to fill in your unique personal token between the " " in the cell below.


In [None]:
TOKEN = ""
BEACON_INSTANCE_URL = "https://beacon-wod.maris.nl"

In [12]:
# Installing the required libraries
%pip install requests
%pip install pandas
%pip install matplotlib
%pip install xarray
%pip install netCDF4
%pip install pandas
%pip install --upgrade cartopy
%pip install scipy
%pip install pyarrow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip






[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


#### Import the required packages


In [13]:
import requests
import json
from io import BytesIO
import xarray as xr
import datetime
import pandas as pd
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import warnings

### Beacon Status Check


In [14]:
response = requests.get(
    f"{BEACON_INSTANCE_URL}/api/health",
    headers={"Authorization": f"Bearer {TOKEN}"},
)
if response.status_code == 200:
    print("Beacon is up and running.")
else:
    print("Beacon is down or not reachable.")
    exit()

Beacon is up and running.


In [15]:
responseinfo = requests.get(f"{BEACON_INSTANCE_URL}/api/query/available-columns", headers = {"Authorization" : f"Bearer {TOKEN}"}) 
params = responseinfo.json()

#### Below you can search through the available columns by entering text between the brackets of search_columns(" ").


In [21]:
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("time") #Enter your search term here

Matching columns:
time
time.standard_name
time.long_name
time.units
time.axis
GMT_time
GMT_time.long_name
real_time
real_time.long_name
real_time.comment
GMT_sample_start_time
GMT_sample_start_time.long_name
GMT_sample_start_time.units
GMT_sample_start_time.comment
PrimaryProd_Incubation Time
PrimaryProd_Incubation Time.units
PrimaryProd_Incubation Time.comment
I_Incubation_time
I_Incubation_time.long_name
I_Incubation_time.comment
I_start_time
I_start_time.long_name
I_start_time.units
I_start_time.comment
Chlorophyll_Incubation Time
Chlorophyll_Incubation Time.units
Chlorophyll_Incubation Time.comment


#### You can define here your input parameters


In [22]:
parameter = "Temperature" #column name 
mindate = "2010-01-01" #yyyy-mm-dd
maxdate = "2010-03-01" #yyyy-mm-dd
minlon = -180
maxlon = 180
minlat = -90
maxlat = 90
mindepth = 0
maxdepth = 500

create_netcdf = True #True or False
create_parquet = True #True or False

#### This will create the query body based on your input parameters, you can add other "query_parameters" and "filters" to suit your needs.

-   For more query examples and explanations, you can take a look at https://maris-development.github.io/beacon/.


In [23]:
query = {
    "query_parameters": [
            {
                "column_name": parameter,
                "alias": parameter,
            },
            {
                "column_name": f"{parameter}_WODflag",
                "alias": f"{parameter}_QC",
            },
            {
                "column_name": "time",
                "alias": "time_ISO8601",
            },
            {
                "column_name": "z",
                "alias": "Depth [m]"
            },
            {
                "column_name": "z_WODflag",
                "alias": "DEPTH_QC"
            },
            {
                "column_name": "lon",
                "alias": "LONGITUDE"
            },
            {
                "column_name": "lat",
                "alias": "LATITUDE"
            },
            {
                "column_name": "dataset",
                "alias": "dataset",
            },
            {
                "column_name": "@identifier",
                "alias": "file_name",
            },
        ],
        "filters": [
            {
                "for_query_parameter": "time_ISO8601",
                "min": f"{mindate}T00:00:00",
                "max": f"{maxdate}T00:00:00",
            },
            {
                "for_query_parameter": "Depth [m]",
                "min": mindepth,
                "max": maxdepth
            },
            {
                "for_query_parameter": "LONGITUDE",
                "min": minlon,
                "max": maxlon
            },
            {
                "for_query_parameter": "LATITUDE",
                "min": minlat,
                "max": maxlat
            },
            {
                "is_not_null": {
                    "for_query_parameter": parameter,
                }
            },
            {
                "for_query_parameter": parameter,
                "neq": -10000000000 # This is the value used for missing data in the WOD as they don't store a fill attribute in their original data
            }
        ],
        "output": {
            "format": {
                "odv": {
                    "longitude_column": {"column_name": "LONGITUDE"},
                    "latitude_column": {"column_name": "LATITUDE"},
                    "time_column": {"column_name": "time_ISO8601"},
                    "depth_column": {
                        "column_name": "Depth [m]",
                        "qf_column": "DEPTH_QC",
                    },
                    "data_columns": [
                        {
                            "column_name": f"{parameter}",
                            "qf_column": f"{parameter}_QC",
                        }
                    ],
                    "metadata_columns": [
                        {
                            "column_name": "dataset",
                        }
                    ],
                    "qf_schema": "WOD",
                    "key_column": "file_name",
                    # With this line we are asking the zip file to be gzip compressed
                    "archiving": "zip_deflate",
                }
            } # To learn more about setting different output formats, see https://maris-development.github.io/beacon/docs/1.0.1/query-docs/querying/json.html#output-format
        }
}

#### This is the post request that is sent to Beacon with the above specified body.


In [24]:
response = requests.post(f"{BEACON_INSTANCE_URL}/api/query", json.dumps(query), headers = {
    "Authorization" : f"Bearer {TOKEN}",
    "Content-type": "application/json"
})

if response.status_code != 200:
    print(response.text)
    print("An error occurred while processing your request. Please check your input fields and try again.")

### Create output files with the results of the query


In [25]:
if not os.path.exists("./Beacon_V1.0.0_Output"):
    os.makedirs("Beacon_V1.0.0_Output")

def generate_file_name(parameter, mindate, maxdate, minlon, maxlon, minlat, maxlat, mindepth, maxdepth, extension):
    regionname = f"[{minlat}_{minlon}]_[{maxlat}_{maxlon}]" 
    file_name = f"{parameter}_{regionname}_{mindate}_{maxdate}_[{mindepth}_{maxdepth}m].{extension}"
    return file_name

# Save the zip file
odv_file_name = generate_file_name(parameter, mindate, maxdate, minlon, maxlon, minlat, maxlat, mindepth, maxdepth, "zip")
with open(f"./Beacon_V1.0.0_Output/{odv_file_name}", "wb") as f:
    f.write(response.content)
print(f"Data saved to {odv_file_name}")

Data saved to Temperature_[-90_-180]_[90_180]_2010-01-01_2010-03-01_[0_500m].zip
