# optimization using `scipy.optimize.minimize`

imports

In [1]:
import json

import dask
import fsspec
import xarray as xr

In [2]:
from pangeo_fish.hmm.estimator import EagerScoreEstimator
from pangeo_fish.hmm.optimize import EagerBoundsSearch
from pangeo_fish.pdf import combine_emission_pdf

parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [3]:
input_path: str
output_path: str

tolerance: float = 1e-2

scheduler_address: str | None = None

In [4]:
tag_url: str

catalog: str
catalog_parameters: dict = {}

scheduler_address: str | None = None

relative_depth_threshold: float = 0.8

diff_path: str

# local PC
tag_url="/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/tag/nc/A18832.nc"
catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"
scheduler_address: str | None = None
catalog = "https://data-taos.ifremer.fr/kerchunk/ref-marc.yaml"
diff_path="/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/A18832-f1_e2500/diff.zarr"
tag_db_path= "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path= "/Users/todaka/python/git/pangeo-fish/data_local/fish-intel/acoustic/detections_recaptured_fishintel.csv"

# mars
catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-marc.yaml"
catalog_parameters: dict = {  "region": "f1_e2500",  "year": "2022"}
diff_path="/home/datawork-taos-s/public/fish/A18832-f1_e2500/diff.zarr"
diff_healpix_path="/home/datawork-taos-s/public/fish/A18832-f1_e2500/diff_healpix.zarr"

# Datarmor
tag_url="/home/datawork-lops-iaocea/data/fish-intel/tag/nc/A18832.nc"
# copernicus
catalog="/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
#catalog_parameters: dict = {  "type": ["2022_3D","2022_2D","mdt"]}
diff_path="/home/datawork-taos-s/public/fish/A18832-copernicus/diff.zarr"
diff_healpix_path="/home/datawork-taos-s/public/fish/A18832-copernicus/diff_healpix.zarr"
emission_path="/home/datawork-taos-s/public/fish/A18832-copernicus/emission.zarr"
tag_db_path="/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path="/home/datawork-lops-iaocea/data/fish-intel/acoustic/detections_recaptured_fishintel.csv"
receiver_buffer= 1000.0
emission_acoustic_path="/home/datawork-taos-s/public/fish/A18832-copernicus/emission_acoustic.zarr"
state_path="/home/datawork-taos-s/public/fish/A18832-copernicus/state.zarr"
state_json_path="/home/datawork-taos-s/public/fish/A18832-copernicus/state.json"



cluster

In [5]:
from distributed import Client, LocalCluster

if scheduler_address is None:
    cluster = LocalCluster(n_workers=1)
    client = cluster.get_client()
else:
    client = Client(scheduler_address)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 1
Total threads: 56,Total memory: 120.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38897,Workers: 1
Dashboard: http://127.0.0.1:8787/status,Total threads: 56
Started: Just now,Total memory: 120.00 GiB

0,1
Comm: tcp://127.0.0.1:42167,Total threads: 56
Dashboard: http://127.0.0.1:39939/status,Memory: 120.00 GiB
Nanny: tcp://127.0.0.1:51100,
Local directory: /home1/datawork/todaka/git/github-iaocea/pangeo-fish/notebooks/workflow/$TMPDIR/dask-scratch-space/worker-_62fhlqh,Local directory: /home1/datawork/todaka/git/github-iaocea/pangeo-fish/notebooks/workflow/$TMPDIR/dask-scratch-space/worker-_62fhlqh


## open the data

In [6]:
data = xr.open_dataset(
    emission_acoustic_path, engine="zarr", chunks={"x": -1, "y": -1}, inline_array=True
).pipe(combine_emission_pdf)
data

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 1.87 MiB 1.87 MiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 4 graph layers Data type int64 numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.87 MiB 1.87 MiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.87 MiB 1.87 MiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.87 MiB 1.87 MiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 1 graph layer Data type float64 numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.87 MiB 1.87 MiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 1 graph layer Data type float64 numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,1.87 MiB,1.87 MiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,239.83 kiB,239.83 kiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 239.83 kiB 239.83 kiB Shape (527, 466) (527, 466) Dask graph 1 chunks in 1 graph layer Data type bool numpy.ndarray",466  527,

Unnamed: 0,Array,Chunk
Bytes,239.83 kiB,239.83 kiB
Shape,"(527, 466)","(527, 466)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.32 GiB,1.87 MiB
Shape,"(1814, 527, 466)","(1, 527, 466)"
Dask graph,1814 chunks in 13 graph layers,1814 chunks in 13 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 3.32 GiB 1.87 MiB Shape (1814, 527, 466) (1, 527, 466) Dask graph 1814 chunks in 13 graph layers Data type float64 numpy.ndarray",466  527  1814,

Unnamed: 0,Array,Chunk
Bytes,3.32 GiB,1.87 MiB
Shape,"(1814, 527, 466)","(1, 527, 466)"
Dask graph,1814 chunks in 13 graph layers,1814 chunks in 13 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## verify the data

In [7]:
import hvplot.xarray

In [8]:
data.pdf.count(["x", "y"]).hvplot(title="count of valid values")

## select the estimator

In [9]:
estimator = EagerScoreEstimator()

## optimize the parameters

In [None]:
%%time
optimizer = EagerBoundsSearch(
    estimator,
    (1e-4, data.attrs["max_sigma"]),
    optimizer_kwargs={"disp": 3, "xtol": tolerance},
)
optimized = optimizer.fit(data)
optimized

## store the optimized parameters to disk

In [None]:
params = optimized.to_dict() | {"tolerance": tolerance}
with fsspec.open(state_json_path, mode="w") as f:
    json.dump(params, f)