#### Compare Biolume Proxy Calculations Accesses from STOQS Database stoqs_auv_compare

This Notebook is part of the auv-python project (private repository at https://github.com/mbari-org/auv-python). It demonstrates how to read and make interactive plots of millions of data points accessed from a STOQS database.

To execute it (for example):

```bash
    cd GitHub  # Or other appropriate directory on your computer
    git clone https://github.com/mbari-org/auv-python.git
    cd auv-python
    poetry install
    poetry shell
    cd notebooks
    jupyter notebook
    # Open this notebook and run it from your browser - interactive zooming does not work in VS Code
```

The url in the cell below was generated by going to https://stoqs.shore.mbari.org/stoqs_auv_compare/ and clicking the buttons of the Measured Parameters to be included in the Parquet file. Then clicking the "Measured Parameter Data Access" section and clicking the "Estimate requirements" button to verify that the estimated values are within the available values of the server. The cells below load the data into two Pandas DataFrames and makes interactive zoomable plots allowing for systematic comparisons between Matlab and auv-python generated proxies.

In [None]:
import pandas as pd
import pooch

# Takes several minutes to retrieve the data the first time, thereafter it's read from a local cache
matlab_proxies = pooch.retrieve(
    url = "https://stoqs.shore.mbari.org/stoqs_auv_compare/api/measuredparameter.parquet?parameter__name=adinos&parameter__name=bg_biolum+%28ph+L%5E%7B-1%7D%29&parameter__name=diatoms&parameter__name=hdinos&parameter__name=intflash+%28ph+s%5E%7B-1%7D%29&parameter__name=nbflash_high+%28L%5E%7B-1%7D%29&parameter__name=nbflash_low+%28L%5E%7B-1%7D%29&collect=name",
    known_hash=None,
)
dfm = pd.read_parquet(matlab_proxies)
dfm.describe()

In [None]:
# Takes several minutes to retrieve the data the first time, thereafter it's read from a local cache
python_proxies = pooch.retrieve(
    url = "https://stoqs.shore.mbari.org/stoqs_auv_compare/api/measuredparameter.parquet?parameter__name=biolume_bg_biolume+%28photons%2Fliter%29&parameter__name=biolume_flow+%28mL%2Fs%29&parameter__name=biolume_intflash+%28photons%2Fs%29&parameter__name=biolume_nbflash_high+%28flashes%2Fliter%29&parameter__name=biolume_nbflash_low+%28flashes%2Fliter%29&parameter__name=biolume_proxy_adinos&parameter__name=biolume_proxy_diatoms&parameter__name=biolume_proxy_hdinos&collect=name&include=activity__name",
    known_hash=None,
)
dfp = pd.read_parquet(python_proxies)
dfp.describe()

In [None]:
dfm.head()

In [None]:
dfp.head()

In [None]:
import holoviews as hv
import hvplot.pandas
hv.extension("bokeh")

# The following cells make time series comparison plots all of the diamond mission data, in order:
# 'adinos', 'bg_biolum', 'diatoms', 'hdinos', 'intflash', 'nbflash_high', 'nbflash_low', and 'profile'
# Do not commit following cell outputs to the repository - they are too big!

non_time_indx = ['platform', 'depth', 'latitude', 'longitude']

In [None]:
adinos_m_plot = dfm.droplevel(non_time_indx)['adinos'].hvplot(width=800, height=300)
adinos_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_proxy_adinos'].hvplot()
adinos_m_plot * adinos_p_plot

In [None]:
bg_biolum_m_plot = dfm.droplevel(non_time_indx)['bg_biolum (ph L^{-1})'].hvplot(width=800, height=300)
bg_biolum_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_bg_biolume (photons/liter)'].hvplot()
bg_biolum_m_plot * bg_biolum_p_plot

In [None]:
diatoms_m_plot = dfm.droplevel(non_time_indx)['diatoms'].hvplot(width=800, height=300)
diatoms_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_proxy_diatoms'].hvplot()
diatoms_m_plot * diatoms_p_plot

In [None]:
hdinos_m_plot = dfm.droplevel(non_time_indx)['hdinos'].hvplot(width=800, height=300)
hdinos_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_proxy_hdinos'].hvplot()
hdinos_m_plot * hdinos_p_plot

In [None]:
intflash_m_plot = dfm.droplevel(non_time_indx)['intflash (ph s^{-1})'].hvplot(width=800, height=300)
intflash_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_intflash (photons/s)'].hvplot()
intflash_m_plot * intflash_p_plot

In [None]:
nbflash_high_m_plot = dfm.droplevel(non_time_indx)['nbflash_high (L^{-1})'].hvplot(width=800, height=300)
nbflash_high_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_nbflash_high (flashes/liter)'].hvplot()
nbflash_high_m_plot * nbflash_high_p_plot    

In [None]:
nbflash_low_m_plot = dfm.droplevel(non_time_indx)['nbflash_low (L^{-1})'].hvplot(width=800, height=300)
nbflash_high_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['biolume_nbflash_low (flashes/liter)'].hvplot()
nbflash_low_m_plot * nbflash_high_p_plot

In [None]:
profile_m_plot = dfm.droplevel(non_time_indx)['profile'].hvplot(width=800, height=300)
profile_p_plot = dfp.droplevel(non_time_indx + ['activity__name'])['profile_number'].hvplot()
profile_m_plot * profile_p_plot

In [None]:
import colorcet
from holoviews.operation.datashader import datashade

# The following cells make biplots of all of the diamond mission data, in order:
# 'adinos', 'bg_biolum', 'diatoms', 'hdinos', 'intflash', 'nbflash_high', 'nbflash_low', and 'profile'
# There should be a slope of 1.0 for all of the plots
df = pd.merge(
    dfm.droplevel(non_time_indx).resample("2S").mean(),
    dfp.droplevel(non_time_indx + ["activity__name"]),
    how="inner",
    left_index=True,
    right_index=True,
)
df.head()

In [None]:
pts_adinos = hv.Points(df, ['adinos', 'biolume_proxy_adinos'])
datashade(pts_adinos, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='adinos')

In [None]:
pts_bg_biolum = hv.Points(df, ['bg_biolum (ph L^{-1})', 'biolume_bg_biolume (photons/liter)'])
datashade(pts_bg_biolum, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='bg_biolum')

In [None]:
pts_diatoms = hv.Points(df, ['diatoms', 'biolume_proxy_diatoms'])
datashade(pts_diatoms, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='diatoms')

In [None]:
pts_hdinos = hv.Points(df, ['hdinos', 'biolume_proxy_hdinos'])
datashade(pts_hdinos, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='hdinos')

In [None]:
pts_intflash = hv.Points(df, ['intflash (ph s^{-1})', 'biolume_intflash (photons/s)'])
datashade(pts_intflash, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='intflash')

In [None]:
pts_nbflash_high = hv.Points(df, ['nbflash_high (L^{-1})', 'biolume_nbflash_high (flashes/liter)'])
datashade(pts_nbflash_high, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='nbflash_high')

In [None]:
pts_nbflash_low = hv.Points(df, ['nbflash_low (L^{-1})', 'biolume_nbflash_low (flashes/liter)'])
datashade(pts_nbflash_low, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='nbflash_low')

In [None]:
pts_profile = hv.Points(df, ['profile', 'profile_number'])
datashade(pts_profile, cmap=colorcet.linear_blue_5_95_c73).opts(width=800, height=600, aspect='equal', title='profile')