# optimization using `scipy.optimize.minimize`

imports

In [None]:
import json

import dask
import fsspec
import xarray as xr

In [None]:
from pangeo_fish.hmm.estimator import EagerScoreEstimator
from pangeo_fish.hmm.optimize import EagerBoundsSearch
from pangeo_fish.pdf import combine_emission_pdf

## parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
#Notebook specification
tolerance: float = 1e-2
cluster_size: int = 1
# cluster_size: int = 1
# This step is important to chose cluster_size as one.


#Dask parameters (Machine, and configuration dependent) 
cluster_name: str = "datarmor-local"
cluster_overrides: dict = {}

#Run specific 
working_path: str = "/home/datawork-taos-s/public/fish/"
tag_name: str = "A18832"
tag_base_path: str = "/home/datawork-lops-iaocea/data/fish-intel/tag/nc/"
tag_db_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/detections_recaptured_fishintel.csv"

ref_model_name: str = "copernicus"
# we can set the parameter acoustic to use acoustic information or not
# acoustic: str = ""
acoustic: str = "/acoustic"

nside: int = 4096  # healpix resolution

## set path using the parameters


In [None]:
tag_url = tag_base_path +   tag_name + ".nc"

input_path = working_path + tag_name + "/" + ref_model_name + acoustic + "/emission_"+ str(nside) +".zarr"
output_path = working_path + tag_name + "/" + ref_model_name + acoustic + "/sigma_"+ str(nside) +".json"

## Specify machine dependent parameters



In [None]:
domainname=!domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
else:
    # local PC
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"

## Start Dask cluster
### Specific for this notebook

In [None]:
import dask_hpcconfig
from distributed import Client

if domainname == ["nisdatarmor"]:
    n_worker_per_node = 1
    n_threads_per_worker = 28
    print(n_worker_per_node, n_threads_per_worker)
    overrides = {
        "cluster.threads_per_worker": n_threads_per_worker,
        "cluster.n_workers": n_worker_per_node,
    }
    cluster = dask_hpcconfig.cluster("datarmor-local", **overrides)
else:
    n_worker_per_node = 1
    print(n_worker_per_node)
    overrides = {
        "cluster.n_workers": n_worker_per_node,
    }
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## open the data

In [None]:
%%time
data = xr.open_dataset(
    input_path, engine="zarr", chunks={"x": -1, "y": -1}, inline_array=True
).pipe(combine_emission_pdf)
data

In [None]:
%%time
#drop dask before the compute
data=data.compute()
data

## verify the data

In [None]:
import hvplot.xarray

In [None]:
data.pdf.count(["x", "y"]).hvplot(title="count of valid values")

## select the estimator

In [None]:
%%time
estimator = EagerScoreEstimator()

## optimize the parameters

In [None]:
%%time
optimizer = EagerBoundsSearch(
    estimator,
    (1e-4, data.attrs["max_sigma"]),
    optimizer_kwargs={"disp": 3, "xtol": tolerance},
)
optimized = optimizer.fit(data)
optimized

## store the optimized parameters to disk

In [None]:
%%time
params = optimized.to_dict() | {"tolerance": tolerance}
with fsspec.open(output_path, mode="w") as f:
    json.dump(params, f)