# regrid the dataset to a healpix grid

Since the default geographic rectilinear grid has non-uniform grid sizes and distances and is thus not suitable for this application, we have to transform the dataset to one with equal distances between the cell centers. The Healpix cells satisfy these conditions, and can be used for spatial convolutions when reshaped to a 2D array according to the "nested" cell numbering.

As with any interpolation, the linear interpolation supported by the `healpy` library is split into the computation of weights and the application of those weights to the data.

In [None]:
import xarray as xr
from pangeo_fish.healpy import create_grid, HealpyRegridder

## parametrize with [papermill](https://papermill.readthedocs.io/en/latest/)

In [None]:
#Notebook specification
nside: int = 4096  # healpix resolution
rot = {"lat": 0, "lon": 30}


#Dask parameters (Machine, and configuration dependent) 
cluster_size: int | None = None
cluster_name: str = "datarmor-local"
cluster_overrides: dict = {}

#Run specific 
working_path: str = "/home/datawork-taos-s/public/fish/"
tag_name: str = "A18832"
tag_base_path: str = "/home/datawork-lops-iaocea/data/fish-intel/tag/nc/"
tag_db_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/FishIntel_tagging_France.csv"
detections_path: str = "/home/datawork-lops-iaocea/data/fish-intel/acoustic/Acoustic_Data/detections_recaptured_fishintel.csv"

ref_model_name: str = "copernicus"


## set path using the parameters


In [None]:
tag_url = tag_base_path +   tag_name + ".nc"
input_path = working_path + tag_name + "/" + ref_model_name + "/diff.zarr"
output_path = working_path + tag_name + "/" + ref_model_name + "/diff_"+ str(nside) +".zarr"

## Specify machine dependent parameters



In [None]:
domainname=!domainname

if domainname == ["nisdatarmor"]:
    # Datarmor
    catalog = "/home/datawork-taos-s/intranet/kerchunk/ref-copernicus.yaml"
else:
    # local PC
    catalog = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"

## Start Dask cluster

In [None]:
import dask_hpcconfig
from distributed import Client

In [None]:
if domainname == ["nisdatarmor"]:
    cluster = dask_hpcconfig.cluster(cluster_name, **cluster_overrides)
    if cluster_size is not None:
        cluster.scale(cluster_size)
else:
    cluster = dask_hpcconfig.cluster("local")

client = Client(cluster)
client

## read the data

In [None]:
%%time
ds = xr.open_dataset(input_path, engine="zarr", chunks={})
ds

## fill the coast with neighboring values


In [None]:
max_gap=2
limit=1
method='nearest'
ds=ds.interpolate_na(dim="lon",method=method,limit=limit,max_gap=max_gap).interpolate_na(dim="lat",method=method,limit=limit,max_gap=max_gap)#, fill_value="extrapolate")


## define the target grid

In [None]:
%%time
grid = create_grid(nside=nside, rot=rot)
grid

## compute the weights

In [None]:
%%time
regridder = HealpyRegridder(ds, grid)
regridder

## apply the weights

In [None]:
%%time
regridded = regridder.regrid_ds(ds)
regridded

## compute and save to disk

In [None]:
%%time
regridded.to_zarr(output_path, mode="w", consolidated=True, compute=True)

## visualize the results

In [None]:
regridded_ = xr.open_dataset(output_path, engine="zarr", chunks={})
regridded_

In [None]:
regridded_["diff"].isel(time=0).plot(x="longitude", y="latitude")