# emission probability from acoustic ranges

imports

In [None]:
import io
import pathlib

import cf_xarray
import dask
import flox.xarray
import numpy as np
import pandas as pd
import xarray as xr
from pangeo_fish import utils
from pangeo_fish.acoustic import (
    count_detections,
    extract_receivers,
    search_acoustic_tag_id,
)
from pangeo_fish.healpy import (
    astronomic_to_cartesian,
    astronomic_to_cell_ids,
    buffer_points,
    geographic_to_astronomic,
)

xr.set_options(keep_attrs=True, display_expand_data=False)

## parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
#Notebook specification
receiver_buffer: float = 1000.0  # in [m]


#Dask parameters (Machine, and configuration dependent) 
cluster_size: int | None = None
cluster_name: str = "datarmor-local"
cluster_overrides: dict = {}

#Run specific 
working_path: str = "/home/datawork-taos-s/public/fish/"
tag_name: str = "A18832"
tag_base_path: str = "/home/datawork-lops-iaocea/data/fish-intel/tag/nc/"
tag_db_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/detections_recaptured_fishintel.csv"

ref_model_name: str = "copernicus"
nside: int = 4096  # healpix resolution

## set path using the parameters


In [None]:
tag_url = tag_base_path +   tag_name + ".nc"

input_path = working_path + tag_name + "/" + ref_model_name + "/emission_"+ str(nside) +".zarr"
output_path = working_path + tag_name + "/" + ref_model_name + "/acoustic/emission_"+ str(nside) +".zarr"

## Specify machine dependent parameters



In [None]:
domainname=!domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
else:
    # local PC
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"

## Start Dask cluster

In [None]:
import dask_hpcconfig
from distributed import Client

In [None]:
if domainname == ["nisdatarmor"]:
    cluster = dask_hpcconfig.cluster(cluster_name, **cluster_overrides)
    if cluster_size is not None:
        cluster.scale(cluster_size)
else:
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## open data

tag database

In [None]:
tag_database = pd.read_csv(tag_db_path, sep=",")
tag_database.head(2)

detections

In [None]:
# work around the weird quoting
with open(detections_path, mode="r") as f:
    lines = (line.replace('"', "") for line in f)
    data = "\n".join(lines)
content = io.StringIO(data)

detection_database = (
    pd.read_csv(content, parse_dates=[1])
    .rename(columns={"date_time": "time"})
    .set_index("time")
)
detection_database.head(2)

base grid

In [None]:
ds = xr.open_dataset(input_path, engine="zarr", chunks={"x": -1, "y": -1})
ds

## select detections

extract receiver locations

In [None]:
receivers = extract_receivers(detection_database).to_xarray()
receivers

In [None]:
acoustic_tag_id = search_acoustic_tag_id(tag_database, ds.attrs["tag_id"])
acoustic_tag_id

In [None]:
detections = (
    detection_database[["deployment_id", "acoustic_tag_id"]]
    .reset_index()
    .set_index("acoustic_tag_id")
    .loc[acoustic_tag_id]
    .set_index("time")
    .to_xarray()
)
detections

## detection weights

count detections

In [None]:
time_intervals = (
    ds[["time"]]
    .cf.add_bounds(keys="time")["time_bounds"]
    .pipe(cf_xarray.bounds_to_vertices, bounds_dim="bounds")
    .pipe(pd.IntervalIndex.from_breaks)
)
time_intervals

In [None]:
weights = (
    count_detections(detections, by=time_intervals)
    .swap_dims({"time_bins": "time"})
    .assign_coords(time=ds.time)
    .pipe(lambda ds: ds.merge(receivers.sel(deployment_id=ds["deployment_id"])))
    .pipe(utils.normalize, dim="deployment_id")
    .fillna(0)
    .rename_vars({"count": "weights"})["weights"]
)
weights

## detection maps

The stations have a detection range given by `receiver_buffers`. For the maps, we select all pixels within that range and set those to `1`.

In [None]:
rot = {k.removeprefix("rot_"): v for k, v in ds.attrs.items() if k.startswith("rot_")}
phi, theta = geographic_to_astronomic(
    lon=receivers.deploy_longitude, lat=receivers.deploy_latitude, rot=rot
)
cartesian_positions = astronomic_to_cartesian(theta=theta, phi=phi, dim="deployment_id")
cartesian_positions

Unfortunately, the cell ids stored as a coordinate are not the correct ones, so we have to reconstruct the correct ones.

In [None]:
phi, theta = geographic_to_astronomic(lat=ds.latitude, lon=ds.longitude, rot=rot)
cell_ids = astronomic_to_cell_ids(nside=ds.attrs["nside"], phi=phi, theta=theta)
cell_ids

In [None]:
masks = buffer_points(
    cell_ids,
    cartesian_positions,
    nside=ds.attrs["nside"],
    buffer_size=receiver_buffer,
    factor=2**16,
    intersect=True,
)
masks

In [None]:
combined_mask = masks.sum(dim="deployment_id").astype(bool)
combined_mask

In [None]:
grid = ds.cf[["latitude", "longitude"]]
grid

## apply weights

In [None]:
reindexed = weights.reindex(time=ds.time, fill_value=0).chunk({"time": 1})
reindexed

In [None]:
fill_values = reindexed.sum(dim="deployment_id").pipe(lambda ds: 1 - ds)
fill_values

In [None]:
%%time
acoustic_pdfs = (
    (reindexed * masks.astype(float))
    .sum(dim="deployment_id")
    .where(combined_mask, fill_values)
    .chunk()
)
acoustic_pdfs

## assign and store to disk

In [None]:
%%time
combined = ds.assign(acoustic=acoustic_pdfs)
combined

write to disk

In [None]:
%%time
combined.drop_vars(["time_bins"]).to_zarr(output_path, mode="w", consolidated=True)

## visualization

In [None]:
emission_ = xr.open_zarr(output_path)
emission_

In [None]:
emission_["acoustic"].isel(time=0).plot(x="longitude", y="latitude")