#### Explore relationships between parameters from stoqs_auv_compare Parquet output

This Notebook is part of the auv-python project (https://github.com/mbari-org/auv-python). It demonstrates how to read and make interactive plots of millions of data points accessed from a STOQS database.

The urls in the pooch.retrieve() calls  below were generated by going to https://stoqs.shore.mbari.org/stoqs_auv_compare/ and clicking the buttons of the Measured Parameters to be included in the Parquet file. Then clicking the "Measured Parameter Data Access" section and clicking the "Estimate requirements" button to verify that the estimated values are within the available values of the server. 

In [None]:
# Do all the imports here and then load the data so that we can randomly execute
# any of the plotting cells below

import colorcet
import holoviews as hv
import hvplot.pandas
import os
import ipywidgets as widgets
import numpy as np
import pandas as pd
import panel as pn
import pooch
import statsmodels.api as sm
from bokeh.models.formatters import PrintfTickFormatter
from holoviews.operation.datashader import datashade

hv.extension("bokeh")


In [None]:
# Takes several minutes to retrieve the data the first time, thereafter it's read from a local cache
downloader = pooch.HTTPDownloader(timeout=(60, 600))  # (connect, read) timeouts in seconds
data_file = pooch.retrieve(
    url="https://stoqs.shore.mbari.org/stoqs_auv_compare/api/measuredparameter.parquet?parameter__name=altitude&parameter__name=biolume_intflash+%28photons%2Fs%29&parameter__name=biolume_nbflash_high+%28flashes%2Fliter%29&parameter__name=biolume_nbflash_low+%28flashes%2Fliter%29&parameter__name=biolume_proxy_adinos&parameter__name=biolume_proxy_diatoms&parameter__name=biolume_proxy_hdinos&parameter__name=hs2_bb420+%28m-1%29&parameter__name=hs2_fl700&parameter__name=profile_number&collect=name&include=activity__name",
    known_hash="298849c655674377c3494ba4c31a7ed97ebeb3bea2d3dffdd29a5251cfd68d09",
    downloader=downloader,
)
df = pd.read_parquet(data_file)
df.describe()

In [None]:
df.loc[['dorado']].dropna()  # Don't show the Gulper Activities

In [None]:
# Do a little cleanup in a data frame modified
dfm = df.loc[['dorado']].dropna()
dfm['hs2_fl700'] = np.where(dfm['hs2_fl700'] < 0, np.nan, dfm['hs2_fl700'])
dfm.describe()

In [None]:
pts = hv.Points(dfm, ['hs2_bb420 (m-1)', 'hs2_fl700'])
datashade(pts, cmap=colorcet.m_rainbow).opts(width=600, height=600)