# preparations for the optimization

From the temperature differences, we can compute:
- the emission probability matrix
- the initial probability
- the final probability
- the ocean mask
- the maximum sigma

In [None]:
import pint_xarray
import xarray as xr

In [None]:
from pangeo_fish.distributions import create_covariances, normal_at
from pangeo_fish.pdf import normal
from pangeo_fish.utils import temporal_resolution

## parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
#Notebook specification
diff_std: float = 0.75
recapture_std: float = 1e-2
truncate: float = 4.0


#Dask parameters (Machine, and configuration dependent) 
cluster_size: int | None = None
cluster_name: str = "datarmor-local"
cluster_overrides: dict = {}

#Run specific 
working_path: str = "/home/datawork-taos-s/public/fish/"
tag_name: str = "A18832"
tag_base_path: str = "/home/datawork-lops-iaocea/data/fish-intel/tag/nc/"
tag_db_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/Acoustic_Data/detections_recaptured_fishintel.csv"

ref_model_name: str = "copernicus"
nside: int = 4096  # healpix resolution


## set path using the parameters


In [None]:
tag_url = tag_base_path +   tag_name + ".nc"

input_path = (
    working_path + tag_name + "/" + ref_model_name + "/diff_" + str(nside) + ".zarr"
)
output_path = (
    working_path + tag_name + "/" + ref_model_name + "/emission_" + str(nside) + ".zarr"
)

## Specify machine dependent parameters



In [None]:
domainname=!domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
else:
    # local PC
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"

## Start Dask cluster

In [None]:
import dask_hpcconfig
from distributed import Client

In [None]:
if domainname == ["nisdatarmor"]:
    cluster = dask_hpcconfig.cluster(cluster_name, **cluster_overrides)
    if cluster_size is not None:
        cluster.scale(cluster_size)
else:
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## open the data

In [None]:
tag = xr.open_dataset(tag_url, engine="netcdf4")
tag

In [None]:
differences = xr.open_dataset(input_path, engine="zarr", chunks={})
differences

## ocean mask

In [None]:
ocean_mask = differences["H0"].notnull()
ocean_mask

## emission probability matrix

In [None]:
%%time
emission_pdf = normal(
    differences["diff"],
    mean=0,
    std=diff_std,
    dims=["x", "y"],
)
emission_pdf

## initial and final probability

In [None]:
%%time
grid = differences[["latitude", "longitude"]].compute()
grid

initial

In [None]:
%%time
initial_position = tag[["latitude", "longitude", "time"]].sel(events="release")
cov = create_covariances(1e-6, coord_names=["latitude", "longitude"])
initial_probability = normal_at(
    grid, pos=initial_position, cov=cov, normalize=True, axes=["latitude", "longitude"]
)
initial_probability

final

In [None]:
%%time
final_position = tag[["longitude", "latitude", "times"]].sel(events="recapture")

cov = create_covariances(recapture_std**2, coord_names=["latitude", "longitude"])
final_probability = normal_at(
    grid, pos=final_position, cov=cov, normalize=True, axes=["latitude", "longitude"]
)

## maximum sigma

In [None]:
%%time
earth_radius = xr.DataArray(6371, dims=None).pint.quantify("km")

timedelta = temporal_resolution(differences.time).pint.quantify().pint.to("h")
grid_resolution = earth_radius * differences["resolution"].pint.quantify()

max_speed = xr.DataArray(60, dims=None).pint.quantify("km / day").pint.to("km / h")
# to make the search a bit more fuzzy
adjustment_factor = 10
max_grid_displacement = max_speed * timedelta * adjustment_factor / grid_resolution

max_sigma = max_grid_displacement.pint.to("dimensionless").pint.magnitude / truncate
max_sigma

## pull everything together and write to disk

In [None]:
%%time
emission = xr.Dataset(
    {
        "pdf": emission_pdf,
        "mask": ocean_mask,
        "initial": initial_probability,
        "final": final_probability,
    },
    attrs=differences.attrs | {"max_sigma": max_sigma},
).chunk()
emission

In [None]:
%%time
emission.to_zarr(output_path, mode="w", consolidated=True, compute=True)

## visualize the results

In [None]:
emission_ = xr.open_zarr(output_path)
emission_

In [None]:
emission_["pdf"].isel(time=0).plot(x="longitude", y="latitude")