#### Cross calibrate co-located Dorado and Pontus fluorescence and backscatter data

This Notebook is part of the auv-python project (private repository at https://github.com/mbari-org/auv-python).

To execute it (for example):

```bash
    cd GitHub  # Or other appropriate directory on your computer
    git clone https://github.com/mbari-org/auv-python.git
    cd auv-python
    poetry install
    poetry shell
    cd notebooks
    jupyter lab
    # Open this notebook and run it from your browser - note that bokeh interactivity does not work in VS Code
```


In [None]:
import holoviews as hv
import hvplot.pandas
import numpy as np
import pandas as pd
import pooch
import statsmodels.api as sm
from scipy.interpolate import griddata
from holoviews.operation.datashader import rasterize

hv.extension("bokeh")

The July 2022 Dorado Diamond mission sampled the same water as a Pontus deployment that month. Here is the Share this view link for the data in STOQS: http://stoqs.mbari.org/p/wa8_j4M. The NW leg of the diamond has Dorado & Pontus sample about 3 km apart from each other:

In [None]:
from IPython.display import Image

Image(filename="images/dorado_pontus_july_2022.png")

Extract 6.5 hours of data from STOQS for this time period and explore the data from dorado and pontus. (If you get a ValueError then update the known_hash value with the one that was got.)

In [None]:
%%time
# Construct the url to retrieve the data from the STOQS database:
parms = [
    "ctd1_salinity",
    "ctd1_temperature (degree_Celsius)",
    "ecopuck_bbp700 (m^-1 sr^-1)",
    "ecopuck_chl (ug/l)",
    "hs2_bb420 (m-1)",
    "hs2_bb700 (m-1)",
    "hs2_fl700",
    "salinity (psu)",
    "sigmat",
    "temperature (degC)",
    "volumescatcoeff117deg470nm (1/m/sr)",
    "volumescatcoeff117deg650nm (1/m/sr)",
    "chlorophyll (ug/l)",
]
stoqs_url = "https://tethysviz.shore.mbari.org/stoqs_lrauv_jul2022/api/measuredparameter.parquet?"
stoqs_url += "parameter__name=" + "&parameter__name=".join(parms)
stoqs_url += "&measurement__instantpoint__activity__platform__name=pontus"
stoqs_url += "&measurement__instantpoint__activity__platform__name=dorado"
stoqs_url += "&measurement__instantpoint__timevalue__gt=2022-07-21+03:30:00"
stoqs_url += "&measurement__instantpoint__timevalue__lt=2022-07-21+10:00:00"
stoqs_url += "&collect=name&include=activity__name"

# Takes a minute or so to retrieve the data the first time, thereafter it's read from a local cache
data_file = pooch.retrieve(
    url=stoqs_url,
    known_hash="bb73ab87c29c9cd6bbff1579256c5b661a77cc1a881f6a14fa823fe06e4be86d",
)
df = pd.read_parquet(data_file)
df.describe()

In [None]:
# Make dataframe modified with index columns converted to regular columns:
dfm = df.reset_index()

In [None]:
# Function to create a scatter plot of a single parameter from a platform
def scatter_plot(df, platform, parameter, height=200):
    return (
        df[df["platform"] == platform]
        .hvplot.scatter(
            x="latitude",
            y="depth",
            c=parameter,
            cmap="viridis",
            colorbar=True,
            width=800,
            height=height,
        )
        .opts(invert_yaxis=True, title=f"{platform} {parameter}")
    )


# Backscatter at 700nm/650nm is a good proxy for particulate matter
dorado_plot = scatter_plot(dfm, "dorado", "ecopuck_bbp700 (m^-1 sr^-1)")
pontus_plot = scatter_plot(dfm, "pontus", "volumescatcoeff117deg650nm (1/m/sr)")
(dorado_plot + pontus_plot).cols(1)

In [None]:
# Fluorescence is a good proxy for chlorophyll
dorado_plot = scatter_plot(dfm, "dorado", "ecopuck_chl (ug/l)")
pontus_plot = scatter_plot(dfm, "pontus", "chlorophyll (ug/l)")
(dorado_plot + pontus_plot).cols(1)

In [None]:
# Function to create a gridded mesh and plot of a single parameter from a platform
def grid_plot(df, platform, parameter, height=200):
    dfp = df[parameter].dropna().reset_index()
    lats = np.linspace(36.75, 36.9, 3000)
    depths = np.linspace(0.0, 150.0, 300)
    g_z = griddata(
        dfp[["latitude", "depth"]].values,
        dfp[parameter].values,
        (lats[None, :], depths[:, None]),
        method="linear",
        rescale=True,
    )
    g_z = np.flip(g_z, axis=0)
    return (
        hv.Image(
            g_z,
            bounds=(
                dfm["latitude"].min(),
                dfm["depth"].min(),
                dfm["latitude"].max(),
                dfm["depth"].max(),
            ),
        ).opts(
            width=800,
            height=height,
            title=f"{platform} {parameter}",
            colorbar=True,
            cmap="viridis",
            invert_yaxis=True,
            logz=True,
        ),
        g_z,
    )


# Backscatter at 700nm/650nm - _bsr
dorado_plot, dorado_data_bsr = grid_plot(df, "dorado", "ecopuck_bbp700 (m^-1 sr^-1)")
pontus_plot, pontus_data_bsr = grid_plot(
    df, "pontus", "volumescatcoeff117deg650nm (1/m/sr)"
)
(dorado_plot + pontus_plot).cols(1)

In [None]:
# Chlorophyll fluorescence - _chl
dorado_plot, dorado_data_chl = grid_plot(df, "dorado", "ecopuck_chl (ug/l)")
pontus_plot, pontus_data_chl = grid_plot(df, "pontus", "chlorophyll (ug/l)")
(dorado_plot + pontus_plot).cols(1)

In [None]:
# Function to make dataframe of gridded data, remove nan rows, plot, and compute regression
def biplot(df, x_plat, y_plat, x_parm, y_parm, log=False):
    x_data = grid_plot(df, x_plat, x_parm)[1].flatten()
    y_data = grid_plot(df, y_plat, y_parm)[1].flatten()
    if log:
        dfg = pd.DataFrame({x_plat: np.log10(x_data), y_plat: np.log10(y_data)})
    else:
        dfg = pd.DataFrame({x_plat: x_data, y_plat: y_data})
    dfa = dfg[[x_plat, y_plat]].dropna()
    results = sm.OLS(dfa[y_plat], sm.add_constant(dfa[x_plat])).fit()
    print(results.summary())
    if log:
        X = f"log10({x_plat} {x_parm})"
        Y = f"log10({y_plat} {y_parm})"
    else:
        X = f"{x_plat} {x_parm}"
        Y = f"{y_plat} {y_parm}"
    title = f"{Y} = \n\t{results.params[1]:.4f} * {X} + {results.params[0]:.4f}"
    slope_plot = hv.Slope.from_scatter(hv.Scatter(dfa.to_numpy())).opts(
        line_width=1, color="red"
    )
    pts = hv.Points(dfa, [x_plat, y_plat])
    title = title + f" (r-squared={results.rsquared:.4f})"
    print(f"\nRegression: {title}\n")
    scatter_plot = (
        rasterize(pts)
        .opts(width=700, height=700, title=title, colorbar=True, cmap="viridis")
        .opts(tools=["hover"])
    )
    return scatter_plot * slope_plot

In [None]:
# Linear backscatter at 700nm/650nm - prediction for pontus from dorado
biplot(
    df,
    "dorado",
    "pontus",
    "ecopuck_bbp700 (m^-1 sr^-1)",
    "volumescatcoeff117deg650nm (1/m/sr)",
    log=False,
)

In [None]:
# Linear chlorophyll - prediction for pontus from dorado
biplot(df, "dorado", "pontus", "ecopuck_chl (ug/l)", "chlorophyll (ug/l)", log=False)

In [None]:
# Logarithmic backscatter at 700nm/650nm - prediction for pontus from dorado
biplot(
    df,
    "dorado",
    "pontus",
    "ecopuck_bbp700 (m^-1 sr^-1)",
    "volumescatcoeff117deg650nm (1/m/sr)",
    log=True,
)

In [None]:
# Logarithmic chlorophyll - prediction for pontus from dorado
biplot(df, "dorado", "pontus", "ecopuck_chl (ug/l)", "chlorophyll (ug/l)", log=True)