# optimization using `scipy.optimize.minimize`

imports

In [None]:
import json

import dask
import fsspec
import xarray as xr

In [None]:
from pangeo_fish.hmm.estimator import EagerScoreEstimator
from pangeo_fish.hmm.optimize import EagerBoundsSearch
from pangeo_fish.pdf import combine_emission_pdf

parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
tolerance: float = 1e-2

# scheduler_address: str | None = None
tag_name: str = "A19124"
working_path: str = "/home/datawork-taos-s/public/fish/"
# working_path: str | "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/"
ref_model_name: str = "copernicus"

nside: int = 4096  # healpix resolution

# we can set the parameter acoustic to use acoustic information or not
# acoustic: str = ""
acoustic: str = "/acoustic"

# cluster_size: int = 1
# This step is important to chose cluster_size as one.
# cluster_size: int = 1

In [None]:
domainname=!domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
    tag_base_path = "/home/datawork-lops-iaocea/data/fish-intel/"
    tag_base_path = "/home/datawork-taos-s/intranet/data/tag/"
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
    cluster_name="datarmor"
else:
    # local PC
    tag_base_path: str = "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/"
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"
    cluster_name="local"

tag_url = tag_base_path +   tag_name + ".nc"

input_path = working_path + tag_name + "/" + ref_model_name + acoustic + "/emission_"+ str(nside) +".zarr"
output_path = working_path + tag_name + "/" + ref_model_name + acoustic + "/sigma_"+ str(nside) +".json"

### Set up Dask



In [None]:
import dask_hpcconfig
from distributed import Client

if domainname == ["nisdatarmor"]:
    n_worker_per_node = 1
    n_threads_per_worker = 28
    print(n_worker_per_node, n_threads_per_worker)
    overrides = {
        "cluster.threads_per_worker": n_threads_per_worker,
        "cluster.n_workers": n_worker_per_node,
    }
    cluster = dask_hpcconfig.cluster("datarmor-local", **overrides)
else:
    n_worker_per_node = 1
    print(n_worker_per_node)
    overrides = {
        "cluster.n_workers": n_worker_per_node,
    }
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## open the data

In [None]:
%%time
data = xr.open_dataset(
    input_path, engine="zarr", chunks={"x": -1, "y": -1}, inline_array=True
).pipe(combine_emission_pdf)
data

## verify the data

In [None]:
import hvplot.xarray

In [None]:
data.pdf.count(["x", "y"]).hvplot(title="count of valid values")

## select the estimator

In [None]:
estimator = EagerScoreEstimator()

## optimize the parameters

In [None]:
%%time
optimizer = EagerBoundsSearch(
    estimator,
    (1e-4, data.attrs["max_sigma"]),
    optimizer_kwargs={"disp": 3, "xtol": tolerance},
)
optimized = optimizer.fit(data)
optimized

## store the optimized parameters to disk

In [None]:
params = optimized.to_dict() | {"tolerance": tolerance}
with fsspec.open(output_path, mode="w") as f:
    json.dump(params, f)