# preparations for the optimization

From the temperature differences, we can compute:
- the emission probability matrix
- the initial probability
- the final probability
- the ocean mask
- the maximum sigma

In [None]:
import pint_xarray
import xarray as xr

In [None]:
from pangeo_fish.distributions import create_covariances, normal_at
from pangeo_fish.pdf import normal
from pangeo_fish.utils import temporal_resolution

parameters

In [None]:
# scheduler_address: str | None = None


diff_std: float = 0.75
recapture_std: float = 1e-2
truncate: float = 4.0


tag_name: str = "A19124"
working_path: str = "/home/datawork-taos-s/public/fish/"
# working_path: str | "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/"
ref_model_name: str = "copernicus"
tag_base_path: str = "/home/datawork-taos-s/intranet/data/tag/Archival_Data/NetCDF/"
tag_db_path: str = "/home/datawork-taos-s/intranet/data/tag/Acoustic_Data/FishIntel_tagging_France.csv"
detections_path: str = "/home/datawork-taos-s/intranet/data/tag/Acoustic_Data/pollack_filtered_detections.csv"


nside: int = 4096  # healpix resolution

cluster_size: int = 4

In [None]:
domainname = !domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
#    tag_base_path = "/home/datawork-lops-iaocea/data/fish-intel/"
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
    cluster_name = "datarmor"
else:
    # local PC
#    tag_base_path: str = "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/"
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"
    cluster_name = "local"

tag_url = tag_base_path +   tag_name + ".nc"

input_path = (
    working_path + tag_name + "/" + ref_model_name + "/diff_" + str(nside) + ".zarr"
)
output_path = (
    working_path + tag_name + "/" + ref_model_name + "/emission_" + str(nside) + ".zarr"
)

### Set up Dask



In [None]:
import dask_hpcconfig
from distributed import Client

if domainname == ["nisdatarmor"]:
    overrides = {}
    # overrides = { "cluster.cores": 28 , "cluster.processes": 6 }
    cluster = dask_hpcconfig.cluster("datarmor-local", **overrides)
    #    cluster = dask_hpcconfig.cluster("datarmor-local")
    cluster.scale(cluster_size)
else:
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## open the data

In [None]:
tag = xr.open_dataset(tag_url, engine="netcdf4")
tag

In [None]:
differences = xr.open_dataset(input_path, engine="zarr", chunks={})
differences

## ocean mask

In [None]:
ocean_mask = differences["H0"].notnull()
ocean_mask

## emission probability matrix

In [None]:
emission_pdf = normal(
    differences["diff"],
    mean=0,
    std=diff_std,
    dims=["x", "y"],
)
emission_pdf

## initial and final probability

In [None]:
grid = differences[["latitude", "longitude"]].compute()
grid

initial

In [None]:
initial_position = tag[["latitude", "longitude", "time"]].sel(events="release")
cov = create_covariances(1e-6, coord_names=["latitude", "longitude"])
initial_probability = normal_at(
    grid, pos=initial_position, cov=cov, normalize=True, axes=["latitude", "longitude"]
)
initial_probability

final

In [None]:
final_position = tag[["longitude", "latitude", "times"]].sel(events="recapture")

cov = create_covariances(recapture_std**2, coord_names=["latitude", "longitude"])
final_probability = normal_at(
    grid, pos=final_position, cov=cov, normalize=True, axes=["latitude", "longitude"]
)

## maximum sigma

In [None]:
earth_radius = xr.DataArray(6371, dims=None).pint.quantify("km")

timedelta = temporal_resolution(differences.time).pint.quantify().pint.to("h")
grid_resolution = earth_radius * differences["resolution"].pint.quantify()

max_speed = xr.DataArray(60, dims=None).pint.quantify("km / day").pint.to("km / h")
# to make the search a bit more fuzzy
adjustment_factor = 10
max_grid_displacement = max_speed * timedelta * adjustment_factor / grid_resolution

max_sigma = max_grid_displacement.pint.to("dimensionless").pint.magnitude / truncate
max_sigma

## pull everything together and write to disk

In [None]:
%%time
emission = xr.Dataset(
    {
        "pdf": emission_pdf,
        "mask": ocean_mask,
        "initial": initial_probability,
        "final": final_probability,
    },
    attrs=differences.attrs | {"max_sigma": max_sigma},
).chunk()
emission

In [None]:
%%time
emission.to_zarr(output_path, mode="w", consolidated=True, compute=True)

## visualize the results

In [None]:
emission_ = xr.open_zarr(output_path)
emission_

In [None]:
emission_["pdf"].isel(time=0).plot(x="longitude", y="latitude")