### This notebook shows how we are implementing the high variability to the coasts.

In [None]:
import hvplot.xarray
import numpy as np
import xarray as xr

In [None]:
### Opening the dataset
coastal_distance = xr.open_dataset(
    "s3://gfts-ifremer/tags/distance2coast.zarr", engine="zarr"
)

In [None]:
### Reduciing to the bounding box
coastal_distance = coastal_distance.sel(lat=slice(56, 40), lon=slice(-13, 5))

In [None]:
### Displays the data
coastal_distance

In [None]:
# Data before transformation
coastal_distance.dist.hvplot(
    title="Carte des distances", x="lon", y="lat", cmap="viridis", coastline="10m"
)

In [None]:
# Applying the transformation
data = 1 + np.exp(-(coastal_distance["dist"] * coastal_distance["dist"]) * 0.01)

In [None]:
# Data after transformation
data.dist.hvplot(
    title="Carte des distances transformée",
    x="lon",
    y="lat",
    cmap="viridis",
    coastline="10m",
)

The map that as been obtained will divide the difference map. See the notebook at papermill/pangeo-fish_coastal_distance.ipynb to see how it's implemented in the algorithm.
The parameter 0.01 is impacting the horizontal aperture of the curve, after some tries, it has been set to this value that was suiffcient.
___
Below, you will find the first three steps of the algorithm implementing this functionality.

In [None]:
# Import necessary libraries and modules.
import hvplot.xarray
import intake
import pandas as pd
import xarray as xr
from pint_xarray import unit_registry as ureg

from pangeo_fish.io import open_tag

In [None]:
#
# Set up execution parameters for the analysis.
#
# Note: This cell is tagged as parameters, allowing automatic updates when configuring with papermil.

# tag_name corresponds to the name of the biologging tag name (DST identification number),
# which is also a path for storing all the information for the specific fish tagged with tag_name.

tag_name = "LT_A11338"

cloud_root = "s3://gfts-ifremer/tags/bargip"

# tag_root specifies the root URL for tag data used for this computation.
tag_root = f"{cloud_root}/cleaned"

# catalog_url specifies the URL for the catalog for reference data used.
catalog_url = "s3://gfts-ifremer/copernicus_catalogs/master.yml"

# scratch_root specifies the root directory for storing output files.
scratch_root = f"{cloud_root}/tracks"


# storage_options specifies options for the filesystem storing output files.
storage_options = {
    "anon": False,
    # 'profile' : "gfts",
    "client_kwargs": {
        "endpoint_url": "https://s3.gra.perf.cloud.ovh.net",
        "region_name": "gra",
    },
}

# if you are using local file system, activate following two lines
folder_name = "../toto"
storage_options = None
scratch_root = f"/home/jovyan/notebooks/papermill/{folder_name}"

# Default chunk value for time dimension.  This values depends on the configuration of your dask cluster.
chunk_time = 24

#
# Parameters for step 2. **Compare Reference Model with DST Information:**
#
# bbox, bounding box, defines the latitude and longitude range for the analysis area.
bbox = {"latitude": [40, 56], "longitude": [-13, 5]}

# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.
# It determines whether the fish can be considered to be in a certain location based on depth.
relative_depth_threshold = 0.8

#
# Parameters for step 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:**
#
# Distance filepath is the path to the coastal distance file.
distance_filepath = "s3://gfts-ifremer/tags/distance2coast.zarr"

# distance_scale_factor scales the squared distance in the exponential decay function.
distance_scale_factor = 0.01

# nside defines the resolution of the healpix grid used for regridding.
nside = 4096  # *2

# rot defines the rotation angles for the healpix grid.
rot = {"lat": 0, "lon": 30}

# min_vertices sets the minimum number of vertices for a valid transcription for regridding.
min_vertices = 1

In [None]:
# Define target root directories for storing analysis results.
target_root = f"{scratch_root}/{tag_name}"

In [None]:
target_root

In [None]:
tag_root

In [None]:
# Set up a local cluster for distributed computing.
from distributed import LocalCluster

cluster = LocalCluster()
client = cluster.get_client()
client

In [None]:
# Open and retrieve the tag data required for the analysis
tag = open_tag(tag_root, tag_name)
tag

## 2. **Compare Reference Model with DST Tag Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. 

In this step, we compare the reference model data with Data Storage Tag information.
The process involves reading and cleaning the reference model, aligning time, converting depth units, subtracting tag data from the model, and saving the results.

In [None]:
# Import necessary libraries
import intake

from pangeo_fish.cf import bounds_to_bins
from pangeo_fish.diff import diff_z
from pangeo_fish.io import open_copernicus_catalog
from pangeo_fish.tags import adapt_model_time, reshape_by_bins, to_time_slice

# Drop data outside the reference interval
time_slice = to_time_slice(tag["tagging_events/time"])
time = tag["dst"].ds.time
cond = (time <= time_slice.stop) & (time >= time_slice.start)

tag_log = tag["dst"].ds.where(cond, drop=True)

min_ = tag_log.time[0]
max_ = tag_log.time[-1]

time_slice = slice(min_.data, max_.data)

In [None]:
def get_copernicus_zarr(product_id="IBI_MULTIYEAR_PHY_005_002"):
    master_cat = intake.open_catalog(catalog_url)
    if product_id == "IBI_MULTIYEAR_PHY_005_002":

        # Open necessary datasets
        sub_cat = master_cat[product_id]
        thetao = sub_cat["cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m"](
            chunk="time"
        ).to_dask()[["thetao"]]
        zos = (
            sub_cat["cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m"](chunk="time")
            .to_dask()
            .zos
        )
        deptho = sub_cat["cmems_mod_ibi_phy_my_0.083deg-3D_static"].to_dask().deptho

    # Assign latitude and longitude from thetao to deptho to shift in positions
    deptho["latitude"] = thetao["latitude"]
    deptho["longitude"] = thetao["longitude"]

    # Create mask for deptho
    mask = deptho.isnull()

    # Merge datasets and assign relevant variables
    ds = (
        thetao.rename({"thetao": "TEMP"}).assign(
            {
                "XE": zos,
                "H0": deptho,
                "mask": mask,
            }
        )
    ).rename({"latitude": "lat", "longitude": "lon", "elevation": "depth"})

    # Ensure depth is positive
    ds["depth"] = abs(ds["depth"])

    # Rearrange depth coordinates and assign dynamic depth and bathymetry
    ds = (
        ds.isel(depth=slice(None, None, -1))
        .assign(
            {
                "dynamic_depth": lambda ds: (ds["depth"] + ds["XE"]).assign_attrs(
                    {"units": "m", "positive": "down"}
                ),
                "dynamic_bathymetry": lambda ds: (ds["H0"] + ds["XE"]).assign_attrs(
                    {"units": "m", "positive": "down"}
                ),
            }
        )
        .pipe(broadcast_variables, {"lat": "latitude", "lon": "longitude"})
    )
    # print(uris_by_key)
    return ds

In [None]:
# Verify the data
import cmocean
import hvplot.xarray

from pangeo_fish.io import save_html_hvplot

plot = (
    (-tag["dst"].pressure).hvplot(width=1000, height=500, color="blue")
    * (-tag_log).hvplot.scatter(
        x="time", y="pressure", color="red", size=5, width=1000, height=500
    )
    * (
        (tag["dst"].temperature).hvplot(width=1000, height=500, color="blue")
        * (tag_log).hvplot.scatter(
            x="time", y="temperature", color="red", size=5, width=1000, height=500
        )
    )
)
filepath = f"{target_root}/tags.html"

save_html_hvplot(plot, filepath, storage_options)

# plot

In [None]:
from pangeo_fish.io import broadcast_variables

In [None]:
model = get_copernicus_zarr()

In [None]:
# Subset the reference_model by
# - align model time with the time of tag_log, also
# - drop data for depth later that are unlikely due to the observed pressure from tag_log
# - defined latitude and longitude of bbox.
#
reference_model = (
    model.sel(time=adapt_model_time(time_slice))
    .sel(lat=slice(*bbox["latitude"]), lon=slice(*bbox["longitude"]))
    .pipe(
        lambda ds: ds.sel(
            depth=slice(None, (tag_log["pressure"].max() - ds["XE"].min()).compute())
        )
    )
)

In [None]:
%%time
# Reshape the tag log, so that it bins to the time step of reference_model
reshaped_tag = reshape_by_bins(
    tag_log,
    dim="time",
    bins=(
        reference_model.cf.add_bounds(["time"], output_dim="bounds")
        .pipe(bounds_to_bins, bounds_dim="bounds")
        .get("time_bins")
    ),
    bin_dim="bincount",
    other_dim="obs",
).chunk({"time": chunk_time})

In [None]:
# Subtract the time_bined tag_log from the reference_model.
# Here, for each time_bin, each observed value are compared with the correspoindng depth of reference_model using diff_z function.
#

diff = (
    diff_z(
        reference_model.chunk(dict(depth=-1)),
        reshaped_tag,
        depth_threshold=relative_depth_threshold,
    )
    .assign_attrs({"tag_id": tag_name})
    .assign(
        {
            "H0": reference_model["H0"],
            "ocean_mask": reference_model["H0"].notnull(),
        }
    )
)

# Persist the diff data
diff = diff.chunk(default_chunk).persist()
# diff

In [None]:
%%time
# Verify the data
# diff["diff"].count(["lat","lon"]).plot()

In [None]:
# Saving the lat and lon for interpolate the lat and lon with the distance dataset
target_lat = diff["lat"]
target_lon = diff["lon"]

In [None]:
%%time
# Save snapshot to disk
diff.to_zarr(f"{target_root}/diff.zarr", mode="w", storage_options=storage_options)

# Cleanup
del tag_log, model, reference_model, reshaped_tag, diff

## 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.

In this step, we regrid the data from the reference model grid to a Healpix grid. This process involves defining the Healpix grid, creating the target grid, computing interpolation weights, performing the regridding, and saving the regridded data.


In [None]:
# Import necessary libraries
import numpy as np
import s3fs
from xhealpixify import HealpyGridInfo, HealpyRegridder

from pangeo_fish.grid import center_longitude

In [None]:
%%time

# Open the diff data and performs cleaning operations to prepare it for regridding.

ds = (
    xr.open_dataset(
        f"{target_root}/diff.zarr",
        engine="zarr",
        chunks={},
        storage_options=storage_options,
    )
    .pipe(lambda ds: ds.merge(ds[["latitude", "longitude"]].compute()))
    .swap_dims({"lat": "yi", "lon": "xi"})
)
ds

In [None]:
s3 = s3fs.S3FileSystem(
    anon=False,
    client_kwargs={
        "endpoint_url": "https://s3.gra.perf.cloud.ovh.net",
    },
)

In [None]:
# Opening the coastal dataset and sorting the lat in ascending
coastal_distance = (
    xr.open_zarr(distance_filepath)
    .sel(lat=slice(56, 40), lon=slice(-13, 5))
    .sortby("lat")
)

In [None]:
# Interpolate the data so its the same grid
coastal_distance = coastal_distance.interp(
    lat=target_lat, lon=target_lon, method="linear"
)

In [None]:
# Applying the transformation
coastal_distance["dist"] = 1 + np.exp(
    -(coastal_distance.dist * coastal_distance.dist) * distance_scale_factor
)

In [None]:
coastal_distance = coastal_distance.swap_dims({"lat": "yi", "lon": "xi"}).drop_vars(
    ["lat", "lon"]
)

In [None]:
%%time
# Define the target Healpix grid information
grid = HealpyGridInfo(level=int(np.log2(nside)), rot=rot)
target_grid = grid.target_grid(ds).pipe(center_longitude, 0)
target_grid

In [None]:
%%time
# Compute the interpolation weights for regridding the diff data
regridder = HealpyRegridder(
    ds[["longitude", "latitude", "ocean_mask"]],
    target_grid,
    method="bilinear",
    interpolation_kwargs={"mask": "ocean_mask", "min_vertices": min_vertices},
)
regridder

In [None]:
%%time
# Perform the regridding operation using the computed interpolation weights.
regridded = regridder.regrid_ds(ds)
regridded

In [None]:
regridded_coastal = regridder.regrid_ds(coastal_distance)

In [None]:
%%time
# Reshape the regridded data to 2D
reshaped = grid.to_2d(regridded).pipe(center_longitude, 0)
reshaped = reshaped.persist()
reshaped

In [None]:
reshaped_coastal = grid.to_2d(regridded_coastal).pipe(center_longitude, 0)

In [None]:
# This cell verifies the regridded data by plotting the count of non-NaN values.
# reshaped["diff"].count(["x", "y"]).plot()

In [None]:
coastal_chunk = {"x": default_chunk_xy["x"], "y": default_chunk_xy["y"]}

In [None]:
reshaped["diff"].isel(time=0).hvplot.quadmesh(
    title="Carte des différences avant l'ajout de l'incertitude",
    x="longitude",
    y="latitude",
    cmap="cool",
    coastline="10m",
    xlim=bbox["longitude"],
    ylim=bbox["latitude"],
)

In [None]:
# Dividing the diff to insert the incertitude measurement
reshaped["diff"] = reshaped["diff"] / reshaped_coastal["dist"]

In [None]:
reshaped["diff"].isel(time=0).hvplot.quadmesh(
    title="Carte des différences après l'ajout de l'incertitude",
    x="longitude",
    y="latitude",
    cmap="cool",
    coastline="10m",
    xlim=bbox["longitude"],
    ylim=bbox["latitude"],
)