# This is a notebook for querying the merged Eutro WB for Chl, Oxygen and Nutrients instance for the 1.0.0 Beacon release.
* You can run each cell individually by pressing "shift + enter".
* For more information, questions, bugs, please contact us on Slack:
    + https://join.slack.com/t/beacontechnic-wwa5548/shared_invite/zt-2dp1vv56r-tj_KFac0sAKNuAgUKPPDRg.

### 1. DD&AS token
In order to get access to the Beacon endpoint, you need to fill in your unique personal token between the " " in the cell below.

The token can be requested by writting an email to paul@maris.nl and robin@maris.nl. Then the token will be available in the DD&AS https://data.blue-cloud.org/search

![DDAS](images/ddas.png "DD&AS")
![token](images/beacon_token.png "token")

In [1]:
Token = ""

### 2. Install and import the necessary packages
Install the following packages, if you have not already installed them in your environment:
* pip install requests
* pip install xarray
* pip install ipywidgets
* pip install cartopy
* pip install h5netcdf
* pip install netcdf4
* pip install scipy
* pip install packaging
* pip install plotly
* pip install numpy

Import the required packages

In [2]:
import requests
import json
import xarray as xr
import datetime
import pandas as pd
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import numpy as np

### 3. Quering BEACON

#### 3.1 Retrieve the available columns from the Merged Eutrophication EOV's endpoint

Swagger page here: https://beacon-wb2-eutrophication.maris.nl/swagger/ 

In [3]:
responseinfo = requests.get("https://beacon-wb2-eutrophication.maris.nl/api/query/available-columns", headers = {"Authorization" : f"Bearer {Token}"}) 
params = responseinfo.json()

#### 3.2 Available parameters
Below you can search through the available columns by entering text between the brackets of search_columns(" "). For the merged instance we need to look for the parameters under "COMMON"

In [None]:
def search_columns(search_term):
    search_term = search_term.lower()
    matches = [col for col in params if search_term in col.lower()]
    
    if matches:
        print("Matching columns:")
        for match in matches:
            print(match)
    else:
        print("No matching columns found.")

search_columns("common") #Enter your search term here

#### 3.3 Define your input parameters 
This test uses using the Marine Regions delimitation for the North East Atlantic area (General Sea Area): https://www.marineregions.org/gazetteer.php?p=details&id=5664

In [5]:
parameter = "COMMON_OXYGEN" #column name 
mindate = "2001-01-01" #yyyy-mm-dd
maxdate = "2001-01-31" #yyyy-mm-dd
minlon = -44
maxlon = 5
minlat = 15
maxlat = 60
mindepth = 0
maxdepth = 1000

#### 3.4 Query body
Here you will create the query body based on your input parameters, you can add other "query_parameters" and "filters" to suit your needs. 
* For more query examples and explanations, you can take a look at https://github.com/maris-development/beacon-blue-cloud/.  

In [6]:
def query(parameter, mindate, maxdate, minlon, maxlon, minlat, maxlat, mindepth, maxdepth):
    body = {
        "query_parameters": [
            {
                # MERGED PARAM - temperature
                "column_name": parameter,
                "alias": parameter,
                "optional" : True
                # "skip_fill_values": True
            },
            {
                "column_name": f"{parameter}_UNITS",
                "alias": "Unit",
                "optional" : True
            },
            {
                "column_name": "COMMON_TIME",
                "alias": "datetime"
            },
            {
                "column_name": f"{parameter}_QC",
                "alias": f"{parameter}_qf",
                "optional" : True
            },
            {
                "column_name": f"{parameter}_P01",
                "alias": f"{parameter}_P01",
                "optional" : True
            },
            {
                "column_name": f"{parameter}_P06",
                "alias": f"{parameter}_P06",
                "optional" : True
            },
            {
                "column_name": f"{parameter}_STANDARD_NAME",
                "alias": f"{parameter}_STANDARD_NAME",
                "optional" : True
            },
            # MERGED DEPTH
            {
                "column_name": "COMMON_DEPTH",
                "alias": "DEPTH"
            },
            {
                "column_name": "COMMON_DEPTH_UNITS",
                "alias": "DEPTH_UNITS",
                "optional" : True
            },
            # LON LAT
            {
                "column_name": "COMMON_LONGITUDE",
                "alias": "LONGITUDE"
            },
            {
                "column_name": "COMMON_LATITUDE",
                "alias": "LATITUDE"
            },
            # MERGED CHL
            {
                "column_name": "COMMON_CHLOROPHYLL",
                "alias": "CHLOROPHYLL",
                "optional" : True
            },
            {
                "column_name": "COMMON_CHLOROPHYLL_P01",
                "alias": "CHLOROPHYLL_P01",
                "optional" : True
            },
            {
                "column_name": "COMMON_CHLOROPHYLL_P06",
                "alias": "CHLOROPHYLL_P06",
                "optional" : True
            },
            {
                "column_name": "COMMON_CHLOROPHYLL_QC",
                "alias": "CHLOROPHYLL_QC",
                "optional" : True
            },
            {
                "column_name": "COMMON_CHLOROPHYLL_STANDARD_NAME",
                "alias": "CHLOROPHYLL_STANDARD_NAME",
                "optional" : True
            },
            {
                "column_name": "COMMON_CHLOROPHYLL_UNITS",
                "alias": "CHLOROPHYLL_UNITS",
                "optional" : True
            },
            # MERGED NITRATE
            {
                "column_name": "COMMON_NITRATE",
                "alias": "NITRATE",
                "optional" : True
            },
            {
                "column_name": "COMMON_NITRATE_P01",
                "alias": "NITRATE_P01",
                "optional" : True
            },
            {
                "column_name": "COMMON_NITRATE_P06",
                "alias": "NITRATE_P06",
                "optional" : True
            },
            {
                "column_name": "COMMON_NITRATE_QC",
                "alias": "NITRATE_QC",
                "optional" : True
            },
            {
                "column_name": "COMMON_NITRATE_STANDARD_NAME",
                "alias": "NITRATE_STANDARD_NAME",
                "optional" : True
            },
            {
                "column_name": "COMMON_NITRATE_UNITS",
                "alias": "NITRATE_UNITS",
                "optional" : True
            },
            #  METADATA
            {
                "column_name": "SOURCE_BDI",
                "alias": "SOURCE_BDI"
            },
            {
                "column_name": "SOURCE_BDI_DATASET_ID",
                "alias": "SOURCE_BDI_DATASET_ID"
            }            
        ],
        "filters": [
            {
                "for_query_parameter": "datetime",
                "min": f"{mindate}T00:00:00",
                "max": f"{maxdate}T00:00:00",
                "cast": "timestamp"
            },
            {
              "for_query_parameter": "DEPTH",
              "min": mindepth,
              "max": maxdepth
            },
            {
              "for_query_parameter": "LONGITUDE",
              "min": minlon,
              "max": maxlon
            },
            {
              "for_query_parameter": "LATITUDE",
              "min": minlat,
              "max": maxlat
            }
        ],
        "output": {
            "format": "netcdf" 
        }}
    return body

query_body = query(parameter, mindate, maxdate, minlon, maxlon, minlat, maxlat, mindepth, maxdepth)

#### 3.4 Retraiving the queried data
 This is the post request that is sent to Beacon with the above specified body.

In [7]:
response = requests.post("https://beacon-wb2-eutrophication.maris.nl/api/query", json.dumps(query_body), headers = {
    "Authorization" : f"Bearer {Token}",
    "Content-type": "application/json"
})

if response.status_code == 204:
    print("No data has been found for your query, please update your input fields above and run the notebook again.")
elif response.status_code != 200:
    print(response.text)

In [None]:
regionname = f"[{minlat},{minlon}],[{maxlat},{maxlon}]" 

if not os.path.exists("./Beacon_V1.0.0_Output"):
    os.makedirs("Beacon_V1.0.0_Output")

open(f"./Beacon_V1.0.0_Output/EWB_merged_{parameter}_{regionname}_{mindate}-{maxdate}_[{mindepth}-{maxdepth}m].nc", "wb").write(response.content)
df = xr.open_dataset(f"./Beacon_V1.0.0_Output/EWB_merged_{parameter}_{regionname}_{mindate}-{maxdate}_[{mindepth}-{maxdepth}m].nc").to_dataframe()
df = df.assign(datetime=pd.to_datetime(df["datetime"])).set_index("datetime").sort_index()

print(df["SOURCE_BDI"].unique())
df.head()

Optionally apply a filter on your parameter for quick removal of outliers. Note that this can also be achieved within your Beacon request by applying a filter on your parameter. You can uncomment this code block by using ctr + /

In [None]:
# highbound = 40
# lowbound = -2
# df = df.loc[(df[f"{parameter}"] < highbound) & (df[f"{parameter}"] > lowbound)]
# df
print(df["NITRATE_UNITS"].unique())

Check the BDI's contrubuting to this beacon instance on the input parameters seleted above and the counts on the merged collection compared with emodnet :

In [None]:
print("BDI's contributing to the merged instance:", df["SOURCE_BDI"].unique())

# print("EMODnet chemistry count is", np.sum(df['EMODnet Water body dissolved oxygen concentration'].count()))
# print("BEACON merged BDI's count is", np.sum(df['COMMON_OXYGEN'].count()))

#### 4. Plotting of results.

In [11]:
import plotly.express as px

lon = df['LONGITUDE']
lat = df['LATITUDE']
field = df[f'{parameter}']
depth = df['DEPTH']

# fig = px.scatter_mapbox(df, lon=lon, lat=lat, color=field, hover_name=df['Depth'], color_continuous_scale = px.colors.sequential.Rainbow, zoom=3, height=500)
fig = px.scatter_mapbox(df, lon=lon, lat=lat, color=field, size=depth, size_max=10, hover_data='COMMON_OXYGEN_qf', hover_name=df['SOURCE_BDI'], color_continuous_scale = px.colors.sequential.Rainbow, zoom=3, height=800)
fig.update_layout(
    title=dict(text=f'EWB_Merged_Beacon_instance {parameter} {regionname} {mindate}-{maxdate} [{mindepth}-{maxdepth}m] #Points = {len(df["COMMON_OXYGEN"])}'),
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            "below": 'traces',
            "sourcetype": "raster",
            "sourceattribution": "United States Geological Survey",
            "source": [
                "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
            ]
        }
      ])

fig.show()

In [12]:
# lon = df['LONGITUDE']
# lat = df['LATITUDE']
# field = df['EMODnet Water body dissolved oxygen concentration']
# depth = df['DEPTH']

# # fig = px.scatter_mapbox(df, lon=lon, lat=lat, color=field, hover_name=df['Depth'], color_continuous_scale = px.colors.sequential.Rainbow, zoom=3, height=500)
# fig = px.scatter_mapbox(df, lon=lon, lat=lat, color=field, size=depth, size_max=10, hover_data='EMODnet oxygen qc', hover_name=df['SOURCE_BDI'], color_continuous_scale = px.colors.sequential.Rainbow, zoom=3, height=800)
# fig.update_layout(
#     title=dict(text=f'Emodnet_Beacon_instance {parameter} {regionname} {mindate}-{maxdate} [{mindepth}-{maxdepth}m] #Points = {np.sum(df['EMODnet Water body dissolved oxygen concentration'].count())}'),
#     mapbox_style="white-bg",
#     mapbox_layers=[
#         {
#             "below": 'traces',
#             "sourcetype": "raster",
#             "sourceattribution": "United States Geological Survey",
#             "source": [
#                 "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
#             ]
#         }
#       ]
#     )

# fig.show()

In [None]:
print(min(lon))
print(max(lon))
print(min(lat))
print(max(lat))

In [None]:
fig = plt.figure(figsize=(23, 18))
ax = plt.axes(projection=ccrs.PlateCarree())

ax.set_extent([-50, 10, 15, 60], crs=ccrs.PlateCarree())  # (west, east, south, north)

ax.coastlines(resolution="10m")
ax.gridlines(draw_labels=True)

sc = ax.scatter(df["LONGITUDE"], df["LATITUDE"], c=df[parameter], cmap="viridis", s=15, transform=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.RIVERS)     
ax.add_feature(cfeature.BORDERS)   
ax.add_feature(cfeature.LAKES, alpha=0.1) 

cbar = plt.colorbar(sc, ax=ax, orientation="vertical", shrink=0.6, label="Value Set 1")
cbar.set_label(f"{parameter} [{df["Unit"].iloc[0]}]")

plt.title(f"{parameter} {regionname} {mindate}-{maxdate} [{mindepth}-{maxdepth}m]")

plt.show()