# NITRATE NOTEBOOK

This notebook is ready to query BEACON merged on harmonized and standarized Nitrate from different BDI's and it's conversion from per mass or per volume and outputs in different formats including ODV. 

In [None]:
# Installing Python packages
# %pip install pyarrow
# %pip install pandas
# %pip install xarray

In [1]:
# Importing necessary libraries
import pyarrow as pa
import pandas as pd
import xarray as xr
import os
import requests
import json
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import Button, HBox, VBox

In [2]:
# Set your Beacon Blue Cloud Token
TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvZGF0YS5ibHVlLWNsb3VkLm9yZyIsImF1ZCI6Imh0dHBzOlwvXC9kYXRhLmJsdWUtY2xvdWQub3JnIiwiaWF0IjoxNzIyNTgxMTQwLCJleHAiOjE3NTQxMTcxNDAsInVzciI6ODMsImlkIjoibnJleWVzc3VhcmV6QG9ncy5pdCIsImVwX29yZ2FuaXNhdGlvbiI6Ik5hdGlvbmFsIEluc3RpdHV0ZSBvZiBPY2Vhbm9ncmFwaHkgYW5kIEFwcGxpZWQgR2VvIn0.PHfvGOMLTt_pF3cz-5kIRJR2SVnCzIE4pmhwhHr9rv4'


Swagger page here: https://beacon-wb2-eutrophication.maris.nl/swagger/

In [3]:
# Fetching available columns from the Beacon API
responseinfo = requests.get("https://beacon-wb2-eutrophication.maris.nl/api/query/available-columns", headers = {"Authorization" : f"Bearer {TOKEN}"}) 
params = responseinfo.json()

#### Below you can search through the available columns by entering text between the brackets of search_columns(" ").

In [4]:
# Search function to find columns based on a search term
# This function searches for columns in the params list that match the search term
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("COMMON_PLATFORM") #Enter your search term here

Matching columns:
COMMON_PLATFORM_L06
COMMON_PLATFORM_C17


In [5]:
# Define polygon for spatial filtering
MED_POLYGON = [
    [-48.50, 28.37],
    [-46.04, 64.97],
    [-1.75, 79.96],
    [65.01, 68.27],
    [1.9217,46.309],
    [-5.62, 38.88],
    [-5.62, 31.72],
    [-48.50, 28.37]
]

Here I'll query all the NITRATE N03, to test what has been imported in the merged beacon instance.
* EMODNET: Water body nitrate
* WOD: Nitrate
* CMEMS: NTRA NTAW
* COMMON_ORIGIN_NITRATE
* COMMON_NITRATE_PER_MASS
* COMMON_NITRATE_PER_VOLUME


Build the query here:

In [6]:

query_parameters = [
   
    #MERGED ORIGIN NITRATE         
    {"column_name": "COMMON_ORIGIN_NITRATE", "alias": "ORIGIN_NITRATE"},
    {"column_name": "COMMON_ORIGIN_NITRATE_QC", "alias": "ORIGIN_NITRATE_QC"},
    {"column_name": "COMMON_ORIGIN_NITRATE_UNITS", "alias": "ORIGIN_NITRATE_UNITS"},
    {"column_name": "COMMON_ORIGIN_NITRATE_STANDARD_NAME", "alias": "ORIGIN_NITRATE_STANDARD_NAME"},
    {"column_name": "COMMON_ORIGIN_NITRATE_P01", "alias": "ORIGIN_NITRATE_P01"},
    {"column_name": "COMMON_ORIGIN_NITRATE_P06", "alias": "ORIGIN_NITRATE_P06"},

    #CONVERTED NITRATE per volume FROM MERGED
    {"column_name": "COMMON_NITRATE_PER_VOLUME", "alias": "NITRATE_PER_VOLUME"},
    {"column_name": "COMMON_NITRATE_PER_VOLUME_QC", "alias": "NITRATE_PER_VOLUME_QC"},
    {"column_name": "COMMON_NITRATE_PER_VOLUME_UNITS", "alias": "NITRATE_PER_VOLUME_UNITS"},
    {"column_name": "COMMON_NITRATE_PER_VOLUME_STANDARD_NAME", "alias": "NITRATE_PER_VOLUME_STANDARD_NAME"},
    {"column_name": "COMMON_NITRATE_PER_VOLUME_P01", "alias": "NITRATE_PER_VOLUME_P01"},
    {"column_name": "COMMON_NITRATE_PER_VOLUME_P06", "alias": "NITRATE_PER_VOLUME_P06"},

    # CONVERTED NITRATE per mass FROM MERGED
    {"column_name": "COMMON_NITRATE_PER_MASS", "alias": "NITRATE_PER_MASS"},
    {"column_name": "COMMON_NITRATE_PER_MASS_QC", "alias": "NITRATE_PER_MASS_QC"},
    {"column_name": "COMMON_NITRATE_PER_MASS_UNITS", "alias": "NITRATE_PER_MASS_UNITS"},
    {"column_name": "COMMON_NITRATE_PER_MASS_STANDARD_NAME", "alias": "NITRATE_PER_MASS_STANDARD_NAME"},
    {"column_name": "COMMON_NITRATE_PER_MASS_P01", "alias": "NITRATE_PER_MASS_P01"},
    {"column_name": "COMMON_NITRATE_PER_MASS_P06", "alias": "NITRATE_PER_MASS_P06"},

    # # NTRA in mmol m-3 from CMEMS
    {"column_name": "NTRA", "alias": "CMEMS_NTRA"},
    {"column_name": "NTRA_QC", "alias": "CMEMS_NTRA_QC"},
    {"column_name": "NTRA.units", "alias": "CMEMS_NTRA_UNITS"},
    {"column_name": "NTRA.standard_name", "alias": "CMEMS_NTRA_STANDARD_NAME"},

    # NTAW in µmol kg-1 from CMEMS
    {"column_name": "NTAW", "alias": "CMEMS_NTAW"},
    {"column_name": "NTAW_QC", "alias": "CMEMS_NTAW_QC"},
    {"column_name": "NTAW.units", "alias": "CMEMS_NTAW_UNITS"},
    {"column_name": "NTAW.standard_name", "alias": "CMEMS_NTAW_STANDARD_NAME"},

    # # # NITRATE from WOD
    {"column_name": "Nitrate", "alias": "WOD_NITRATE"},
    {"column_name": "Nitrate_WODflag", "alias": "WOD_NITRATE_QC"},
    {"column_name": "Nitrate.units", "alias": "WOD_NITRATE_UNITS"},

    # # # water body dissolved NITRATE from emodnet
    {"column_name": "Water body nitrate", "alias": "EMODNET_WATER_BODY_NITRATE"},
    {"column_name": "Water body nitrate_qc", "alias": "EMODNET_WATER_BODY_NITRATE_QC"},
    {"column_name": "Water body nitrate.units", "alias": "EMODNET_WATER_BODY_NITRATE_UNITS"},

    # COMMON NITRATE INSTRUMENTS
    {"column_name": "COMMON_NITRATE_L05", "alias": "NITRATE_L05"},
    {"column_name": "COMMON_NITRATE_L22", "alias": "NITRATE_L22"},
    {"column_name": "COMMON_NITRATE_L33", "alias": "NITRATE_L33"},
    
    # COMMON PLATFORMS
    {"column_name": "COMMON_PLATFORM_L06"},
    {"column_name": "COMMON_PLATFORM_C17"},
    
    # COMMON SOURCES
    {"column_name": "SOURCE_BDI"},
    {"column_name": "SOURCE_BDI_DATASET_ID"},
    {"column_name": "COMMON_EDMO_CODE"},
    {"column_name": "COMMON_FEATURE_TYPE"},
    {"column_name": "COMMON_CSR"},    
    {"column_name": "COMMON_ODV_TAG"},

    # DEPTH
    {"column_name": "COMMON_DEPTH", "alias": "DEPTH"},       
    {"column_name": "COMMON_DEPTH_QC", "alias": "DEPTH_QC"},
    {"column_name": "COMMON_DEPTH_UNITS", "alias": "DEPTH_UNITS"}, 
    {"column_name": "COMMON_DEPTH_STANDARD_NAME", "alias": "DEPTH_STANDARD_NAME"},
    {"column_name": "COMMON_DEPTH_P01", "alias": "DEPTH_P01"},
    {"column_name": "COMMON_DEPTH_P06", "alias": "DEPTH_P06"},
    
    {"column_name": "COMMON_ORIGIN_DEPTH", "alias": "ORIGIN_DEPTH"},       
    {"column_name": "COMMON_ORIGIN_DEPTH_QC", "alias": "ORIGIN_DEPTH_QC"},
    {"column_name": "COMMON_ORIGIN_DEPTH_UNITS", "alias": "ORIGIN_DEPTH_UNITS"}, 
    {"column_name": "COMMON_ORIGIN_DEPTH_STANDARD_NAME", "alias": "ORIGIN_DEPTH_STANDARD_NAME"},
    {"column_name": "COMMON_ORIGIN_DEPTH_P01", "alias": "ORIGIN_DEPTH_P01"},
    {"column_name": "COMMON_ORIGIN_DEPTH_P06", "alias": "ORIGIN_DEPTH_P06"},

    {"column_name": "COMMON_TIME", "alias": "TIME"},
    {"column_name": "COMMON_LONGITUDE", "alias": "LONGITUDE"},
    {"column_name": "COMMON_LATITUDE", "alias": "LATITUDE"}   
]

Select the output format:

In [7]:
style = {'description_width': 'initial'}
ODV_OUTPUT = widgets.Dropdown(
    options=[False, True],
    description='Get ODV Output format:',
    style=style
)
display(ODV_OUTPUT)

Dropdown(description='Get ODV Output format:', options=(False, True), style=DescriptionStyle(description_width…

In [8]:
print("ODV_OUTPUT: ", ODV_OUTPUT.value)

ODV_OUTPUT:  False


In [None]:

if ODV_OUTPUT.value: 
    output = {
            "format": {
                "odv": {
                    "longitude_column": {"column_name": "LONGITUDE"},
                    "latitude_column": {"column_name": "LATITUDE"},
                    "time_column": {"column_name": "TIME"},
                    "depth_column": {
                        "column_name": "DEPTH",
                        "unit": "m",
                        "comment": "Codes: SDN:P01::ADEPZZ01 SDN:P06::ULAA",
                        "qf_column": "DEPTH_QC"
                    },
                    "data_columns": [
                        {
                            "column_name": "ORIGIN_NITRATE",
                            "unit": "",
                            "comment": "",
                            "qf_column": "ORIGIN_NITRATE_QC"
                        },
                        {
                            "column_name": "NITRATE_PER_VOLUME",
                            "unit": "umol/l",
                            "comment": "Codes: SDN:P01::NTRAZZXX SDN:P06::UPOX",
                            "qf_column": "NITRATE_PER_VOLUME_QC" 
                        },
                        {
                            "column_name": "NITRATE_PER_MASS",
                            "unit": "umol/kg",
                            "comment": "Codes: SDN:P01::MDMAP005 SDN:P06::KGUM",
                            "qf_column": "NITRATE_PER_MASS_QC"
                        },
                        {
                            "column_name": "CMEMS_NTRA",
                            "unit": "mmol m-3",
                            "comment": "",
                            "qf_column": "CMEMS_DOXY_QC"
                        },
                        {
                            "column_name": "CMEMS_NTAW",
                            "unit": "umol/kg",
                            "comment": "",
                            "qf_column": "CMEMS_NTAW_QC"
                        },
                        {
                            "column_name": "WOD_NITRATE",
                            "unit": "",
                            "comment": "",
                            "qf_column": "WOD_NITRATE_QC"
                        },
                        {
                            "column_name": "WOD_NITRATE",
                            "unit": "",
                            "comment": "",
                            "qf_column": "WOD_NITRATE_QC"
                        },
                        {
                            "column_name": "EMODNET_WATER_BODY_DISSOLVED_NITRATE",
                            "unit": "",
                            "comment": "",
                            "qf_column": "EMODNET_WATER_BODY_DISSOLVED_NITRATE_QC"
                        }
                    ],
                    "metadata_columns": [
                        # COMMON SOURCES
                        {"column_name": "COMMON_EDMO_CODE"},
                        {"column_name": "SOURCE_BDI"},
                        {"column_name": "SOURCE_BDI_DATASET_ID"},
                        {"column_name": "COMMON_CSR"},    
                        {"column_name": "COMMON_ODV_TAG"},
                        {"column_name": "COMMON_FEATURE_TYPE"},
                        
                        # DEPTH
                        {"column_name": "DEPTH_UNITS"},

                        # COMMON PLATFORMS
                        {"column_name": "COMMON_PLATFORM_L06"},
                        {"column_name": "COMMON_PLATFORM_C17"},

                        # COMMON NITRATE PER VOLUME 
                        {"column_name": "NITRATE_PER_VOLUME_UNITS"},
                        {"column_name": "NITRATE_PER_VOLUME_STANDARD_NAME"},
                        {"column_name": "NITRATE_PER_VOLUME_P01"},
                        {"column_name": "NITRATE_PER_VOLUME_P06"},

                        # COMMON NITRATE PER MASS 
                        {"column_name": "NITRATE_PER_MASS_UNITS"},
                        {"column_name": "NITRATE_PER_MASS_STANDARD_NAME"},
                        {"column_name": "NITRATE_PER_MASS_P01"},
                        {"column_name": "NITRATE_PER_MASS_P06"},

                        # COMMON NITRATE INSTRUMENTS
                        {"column_name": "NITRATE_L05"},
                        {"column_name": "NITRATE_L22"},
                        {"column_name": "NITRATE_L33"},
                        
                        # extra test for units
                        {"column_name": "ORIGIN_NITRATE_UNITS"},
                        {"column_name": "NITRATE_PER_MASS_UNITS"},
                        {"column_name": "NITRATE_PER_VOLUME_UNITS"},
                        {"column_name": "CMEMS_DOXY_UNITS"},
                        {"column_name": "WOD_NITRATE_UNITS"},
                        {"column_name": "EMODNET_WATER_BODY_DISSOLVED_NITRATE_UNITS"}                          
                    ],
                    "qf_schema": "SEADATANET",
                    "key_column": "COMMON_ODV_TAG",
                    "archiving": "zip_deflate",

                }
            }
        }
else:

    format_selector = widgets.Dropdown(
        options=[('Parquet', 'parquet'), ('Arrow IPC', 'ipc'), ('netCDF', 'netcdf')],
        value='parquet',
        description='Output Format:',
    )
    display(format_selector)

    def set_output(change):
        global output
        output = {"format": change['new']}
    format_selector.observe(set_output, names='value')
    output = {"format": format_selector.value}

In [None]:
# You JSON query request
query = {
    "query_parameters": query_parameters,       
            
    "filters": [
            # Filter for the time range, LON, LAT & DEPTH
            {"for_query_parameter": "TIME", "min": f"2015-01-01T00:00:00", "max": f"2015-01-31T23:00:00",},
            {"for_query_parameter": "DEPTH", "min": 0, "max": 10},
            {"for_query_parameter": "LONGITUDE", "min": -44, "max": 5},
            {"for_query_parameter": "LATITUDE", "min": 15, "max": 60},
            # {"for_query_parameter": "NITRATE_PER_MASS","min": -200, "max": 600},
            # {"for_query_parameter": "NITRATE_PER_VOLUME","min": -200, "max": 600},
            
            
            # Filter for the polygon
            {"longitude_query_parameter": "LONGITUDE",
             "latitude_query_parameter": "LATITUDE",
             "geometry": {"coordinates": [MED_POLYGON],
                          "type": "Polygon"
                          }
            },
            # Filter to ensure at least one of the parameters is not null
            {
                "or": [
                    {"is_not_null": {"for_query_parameter": "ORIGIN_NITRATE"}},
                    {"is_not_null": {"for_query_parameter": "NITRATE_PER_MASS"}},
                    {"is_not_null": {"for_query_parameter": "NITRATE_PER_VOLUME"}}
                ]
            }
            ],

    "output": output   
   }


In [None]:
response = requests.post("https://beacon-wb2-eutrophication.maris.nl/api/query", json.dumps(query), headers = {
    "Authorization" : f"Bearer {TOKEN}",
    "Content-type": "application/json"
})

if response.status_code == 204:
    print("No data has been found for your query, please update your input fields above and run the notebook again.")
elif response.status_code != 200:
    # Print error message if the request was not successful
    print(f"Error: {response.status_code}")
    print(response.text)

In [None]:
# Create output directory if it doesn't exist
if not os.path.exists("./beacon_output"):
    os.makedirs("beacon_output")

### Uncomment and run for parquet output

In [None]:
# Save the response content to a file
with open("beacon_output/EOV_NITRATE_merged_subset.parquet", "wb") as f:
    f.write(response.content)

# Wrap the response content in a BytesIO object
response_content = BytesIO(response.content)
# Read the Parquet file into a Pandas DataFrame
df = pd.read_parquet(response_content, engine='pyarrow')
# This is a temporary fix as the merged instance works with seconds since 1970 which isn't supported by parquet (but will be addressed by beacon in the future)
df['TIME'] = pd.to_datetime(df['TIME'], unit='s')
# df

### Uncomment and run for ipc arrow output

In [None]:
# # Save the response content to a file
# with open("beacon_output/EOV_NITRATE_merged_subset.arrow", "wb") as f:
#     f.write(response.content)

# # Read the Parquet file into a Pandas DataFrame
# df = pd.read_feather(f"./beacon_output/EOV_NITRATE_merged_subset.arrow")
# df = df.assign(datetime=pd.to_datetime(df['COMMON_TIME'])).set_index('COMMON_TIME').sort_index()
# df

### Uncomment and run for netCDF output

In [None]:
# # Save the response content to a file
# with open("beacon_output/EOV_NITRATE_merged_subset.nc", "wb") as f:
#     f.write(response.content)


# # Read the netCDF file into an xarray Dataset
# df =xr.open_dataset(f"./beacon_output/EOV_NITRATE_merged_subset.nc").to_dataframe()
# df = df.assign(datetime=pd.to_datetime(df['COMMON_TIME'])).set_index('COMMON_TIME').sort_index()
# df

### Uncomment and run for the ODV output

In [None]:
# Save the response content to a file
with open("beacon_output/EOV_NITRATE_merged_subset_ODV.zip", "wb") as f:
    f.write(response.content)


# Here you can run some quick test from your parquet output

In [None]:
print(df['SOURCE_BDI'].unique())
print(df['COMMON_FEATURE_TYPE'].unique())
print(df['COMMON_EDMO_CODE'].unique())
print(df['COMMON_CSR'].unique())

# check units
print(df['CMEMS_DOXY_UNITS'].unique())
print(df['WOD_NITRATE_UNITS'].unique())
print(df['EMODNET_WATER_BODY_DISSOLVED_NITRATE_UNITS'].unique())
print(df['NITRATE_PER_VOLUME_UNITS'].unique())
print(df['NITRATE_PER_MASS_UNITS'].unique())
print(df['ORIGIN_NITRATE_UNITS'].unique())



In [None]:
# This should have been fixed by Robin, I leave it just in case....
# filter pout values that are -10000000000 This is the value used for missing data in the WOD as they don't store a fill attribute in their original data

# df = df[(df['OXYGEN_PER_VOLUME'] > -10000000000) & (df['OXYGEN_PER_MASS'] > -10000000000) & (df['ORIGIN_OXYGEN'] > -10000000000)]

In [None]:
df[['NITRATE_PER_VOLUME', 'NITRATE_PER_MASS', 'ORIGIN_NITRATE','CMEMS_DOXY','WOD_NITRATE','EMODNET_WATER_BODY_DISSOLVED_NITRATE']].describe()

In [None]:
print("Range of NITRATE_PER_VOLUME: ", df['NITRATE_PER_VOLUME'].min(), "to", df['NITRATE_PER_VOLUME'].max())
print("Range of NITRATE_PER_MASS: ", df['NITRATE_PER_MASS'].min(), "to", df['NITRATE_PER_MASS'].max())
print("Range of ORIGIN_NITRATE: ", df['ORIGIN_NITRATE'].min(), "to", df['ORIGIN_NITRATE'].max())

In [None]:
ax = df.plot.scatter(x='TIME', y='NITRATE_PER_VOLUME', color='blue', label='NITRATE_PER_VOLUME', alpha=0.5, figsize=(12, 6))
df.plot.scatter(x='TIME', y='NITRATE_PER_MASS', color='green', label='NITRATE_PER_MASS', alpha=0.5, ax=ax)
df.plot.scatter(x='TIME', y='ORIGIN_NITRATE', color='red', label='ORIGIN_NITRATE', alpha=0.5, ax=ax)
df.plot.scatter(x='TIME', y='CMEMS_DOXY', color='orange', label='CMEMS_DOXY', alpha=0.5, ax=ax)
df.plot.scatter(x='TIME', y='WOD_NITRATE', color='purple', label='WOD_NITRATE', alpha=0.5, ax=ax)
df.plot.scatter(x='TIME', y='EMODNET_WATER_BODY_DISSOLVED_NITRATE', color='brown', label='EMODNET_WATER_BODY_DISSOLVED_NITRATE', alpha=0.5, ax=ax)
ax.legend()

In [None]:
import geopandas as gpd
# Turn the DataFrame into a GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.LONGITUDE, df.LATITUDE),
    crs="EPSG:4326"
)

import plotly.graph_objects as go
import plotly.express as px
# Ensure time is datetime
gdf["TIME"] = pd.to_datetime(gdf["TIME"])

# Resample by day for each source_bdi and calculate mean temperature
mean_daily = (
    gdf.groupby([pd.Grouper(key="TIME", freq="D"), "SOURCE_BDI"])["NITRATE_PER_VOLUME"]
    .mean()
    .reset_index()
)

# Plot with a line per source_bdi
fig = px.line(mean_daily,x="TIME", y="NITRATE_PER_VOLUME", color="SOURCE_BDI")
fig.show()

In [None]:

# Calculate differences
# df['diff_volume_origin'] = df['OXYGEN_PER_VOLUME'] - df['ORIGIN_OXYGEN']
# df['diff_mass_origin'] = df['OXYGEN_PER_MASS'] - df['ORIGIN_OXYGEN']
# df['diff_volume_mass'] = df['OXYGEN_PER_VOLUME'] - df['OXYGEN_PER_MASS']

# # Plot the differences over time
# ax = df.plot.scatter(x='TIME', y='diff_volume_mass', color='purple', label='OXYGEN_PER_VOLUME - ORIGIN_OXYGEN', alpha=0.5, figsize=(12, 6))
# # ax = df.plot.scatter(x='TIME', y='diff_volume_origin', color='purple', label='OXYGEN_PER_VOLUME - ORIGIN_OXYGEN', alpha=0.5, figsize=(12, 6))
# # df.plot.scatter(x='TIME', y='diff_mass_origin', color='orange', label='OXYGEN_PER_MASS - ORIGIN_OXYGEN', alpha=0.5, ax=ax)
# # df.plot.scatter(x='TIME', y='diff_volume_mass', color='teal', label='OXYGEN_PER_VOLUME - OXYGEN_PER_MASS', alpha=0.5, ax=ax)
# ax.legend()