# track decoding

With the model parameter $\sigma$ estimated, we can proceed to estimating tracks.

There's multiple different ways:
- mean track
- mode track
- most probable track

Of all of these, the most probable track is the most meaningful one.

imports

In [None]:
import xarray as xr
import fsspec
import json

from pangeo_fish.hmm.estimator import EagerScoreEstimator
from pangeo_fish import tracks
from pangeo_fish.pdf import combine_emission_pdf

## parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
# Notebook specification
# For the moment limiting the plot only mean and mode
track_modes: str | list = ["mean", "mode"]  # , "viterbi"]
# Defining if we re-use computed state for plotting mean and mode
state_modes: str | list = ["mean", "mode"]  # , "viterbi"]
additional_track_quantities: str | list = ["speed", "distance"]


# Dask parameters (Machine, and configuration dependent)
# cluster_size: int | None = None
cluster_name: str = "datarmor-local"
cluster_overrides: dict = {}


# Run specific
working_path: str = "/home/datawork-taos-s/public/fish/"
tag_name: str = "A18832"
tag_base_path: str = "/home/datawork-lops-iaocea/data/fish-intel/tag/nc/"
tag_db_path: str = (
    "/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
)
detections_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/detections_recaptured_fishintel.csv"
ref_model_name: str = "copernicus"
nside: int = 4096  # healpix resolution
# we can set the parameter acoustic to use acoustic information or not

acoustic: str = "/acoustic"

## set path using the parameters


In [None]:
tag_url = tag_base_path + tag_name + ".nc"
emission_path = (
    working_path
    + tag_name
    + "/"
    + ref_model_name
    + acoustic
    + "/emission_"
    + str(nside)
    + ".zarr"
)
states_path = (
    working_path
    + tag_name
    + "/"
    + ref_model_name
    + acoustic
    + "/state_"
    + str(nside)
    + ".zarr"
)
tracks_root = working_path + tag_name + "/" + ref_model_name + acoustic + "/"
parameter_path = (
    working_path
    + tag_name
    + "/"
    + ref_model_name
    + acoustic
    + "/sigma_"
    + str(nside)
    + ".json"
)

## Start Dask cluster

In [None]:
import dask_hpcconfig
from distributed import Client

In [None]:
domainname = !domainname

if domainname == ["nisdatarmor"]:
    cluster = dask_hpcconfig.cluster(cluster_name, **cluster_overrides)
    if cluster_size is not None:
        cluster.scale(cluster_size)
else:
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

open emission probabilities

In [None]:
emission = (
    xr.open_dataset(emission_path, engine="zarr", chunks={}, inline_array=True)
    .pipe(combine_emission_pdf)
    .drop_vars("resolution")
)
emission

open state probabilities

In [None]:
%%time
if states_path is not None:
    states = xr.open_dataset(states_path, engine="zarr", chunks={}, inline_array=True)
else:
    states = None
states

read the estimated parameters

In [None]:
with fsspec.open(parameter_path, mode="r") as f:
    parameters = json.load(f)
parameters.pop("tolerance", None)
parameters

create the estimator

In [None]:
estimator = EagerScoreEstimator(**parameters)
estimator

compute the tracks

In [None]:
fs, _ = fsspec.core.url_to_fs(tracks_root)
fs.mkdirs(tracks_root, exist_ok=True)

In [None]:
for mode in track_modes:
    reuse_states = mode in state_modes and states is not None
    raw_track = estimator.decode(
        states if reuse_states else emission, mode=mode, is_states=reuse_states
    )
    track = tracks.additional_quantities(raw_track, additional_track_quantities)
    track.df.to_parquet(f"{tracks_root}/{mode}.parquet")

visualization

In [None]:
import cmocean
import hvplot.xarray
import movingpandas as mpd
import geopandas as gpd
import xarray as xr
import holoviews as hv

In [None]:
track_paths = [f"{tracks_root}/{mode}.parquet" for mode in track_modes]
tracks = {
    mode: mpd.Trajectory(
        gpd.read_parquet(f"{tracks_root}/{mode}.parquet"), traj_id=mode
    )
    for mode in track_modes
}

In [None]:
hv.Layout(
    [
        track.hvplot(c="speed", tiles="CartoLight", title=name, cmap="cmo.speed")
        for name, track in tracks.items()
    ]
).cols(2)

In [None]:
for name, track in tracks.items():
    plot = track.hvplot(
        c="speed",
        tiles="CartoLight",
        title=name,
        cmap="cmo.speed",
        width=500,
        height=400,
    )
    hvplot.save(plot, tracks_root + name + ".html")