# optimization using `scipy.optimize.minimize`

imports

In [None]:
import json

import dask
import fsspec
import xarray as xr

In [None]:
from pangeo_fish.hmm.estimator import EagerScoreEstimator
from pangeo_fish.hmm.optimize import EagerBoundsSearch
from pangeo_fish.pdf import combine_emission_pdf

parametrize using papermill

In [None]:
input_path: str
output_path: str

tolerance: float = 1e-2

scheduler_address: str | None = None

In [None]:
root = "/home/jmagin/work/data/fish-intel"

name = "A18832-f1_e2500-hp4096"
input_path = f"{root}/emission/{name}.zarr"
output_path = f"{root}/optimized/{name}.json"

set up the dask cluster

In [None]:
from distributed import Client, LocalCluster

if scheduler_address is None:
    cluster = LocalCluster(n_workers=1)
    client = cluster.get_client()
else:
    client = Client(scheduler_address)
client

## optimization

load the data

In [None]:
data = xr.open_dataset(
    input_path, engine="zarr", chunks={"x": -1, "y": -1}, inline_array=True
).pipe(combine_emission_pdf)
data

verify the data is good:

In [None]:
import hvplot.xarray

In [None]:
data.pdf.count(["x", "y"]).hvplot(title="count of valid values")

instatiate the estimator

In [None]:
estimator = EagerScoreEstimator()

optimize the parameters

In [None]:
%%time
optimizer = EagerBoundsSearch(
    estimator,
    (1e-4, data.attrs["max_sigma"]),
    optimizer_kwargs={"disp": 3, "xtol": tolerance},
)
optimized = optimizer.fit(data)
optimized

store the parameters to disk

In [None]:
params = optimized.to_dict() | {"tolerance": tolerance}
with fsspec.open(output_path, mode="w") as f:
    json.dump(params, f)