# Set up

In [2]:
import calendar
import copernicusmarine
import dask
from datetime import datetime, timedelta
import exactextract as ee
from exactextract import exact_extract
import gc
import geopandas as gpd
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from datetime import datetime, timedelta
import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask
import rioxarray as rxr
from shapely.geometry import mapping, shape
from shapely.geometry import mapping, Point
from scipy.spatial import cKDTree
import time 
from tqdm import tqdm
import xarray as xr



os.chdir("/media/marieke/Shared/Chap-1/Model/Scripts/Chap_1_2018-2024")

# Get Copernicus data

### Chlorophyll 1km

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["CHL"]
}

# Load xarray dataset
chl = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
chl.to_netcdf("./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc")


### pH, oxygen 4.2 km

#### pH

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "med-ogs-bio-rean-d",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["02"]
}

# Load xarray dataset
ox = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
ph.to_netcdf("./data/raw_data/predictors/oxygen/med-ogs-bio-rean-d _20130101-20250101.nc")


#### oxygen

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["CHL"]
}

# Load xarray dataset
chl = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
chl.to_netcdf("./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc")


### SST 1km (2008-2025)

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["analysed_sst"]
}

# Load xarray dataset
sst = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
sst.to_netcdf("./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc")


### Ocean mixed layer thickness 4.2km (1987-2025)

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["analysed_sst"]
}

# Load xarray dataset
sst = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
sst.to_netcdf("./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc")


# Extraction

## Functions

In [24]:
# Pipeline with exactextract

def get_dates(date, time_step):
    """
    Calculate the range of dates for a given time step relative to the provided date.
    """
    from datetime import datetime, timedelta

    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    end_date = date - timedelta(days=1)

    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }

    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")

    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date






def compute_stats(data_array, shape_geometry):
    """
    Extract values and compute weighted mean - automatically with exactextract- , min and max.
    """
    
    try:
        feature = {"type": "Feature", "geometry": mapping(shape_geometry), "properties": {}}
        res = exact_extract(data_array, [feature], ["mean", "min", "max"])


        if not res or len(res) == 0:
            return None, None, None

        props = res[0]["properties"]

        # Multi-band keys: band_1_mean, band_2_mean, etc.
        mean_vals = [v for k, v in props.items() if k.endswith("_mean")]
        min_vals  = [v for k, v in props.items() if k.endswith("_min")]
        max_vals  = [v for k, v in props.items() if k.endswith("_max")]

         # Single-band keys: mean, min, max
        if not mean_vals:
            if "mean" in props:
                mean_vals = [props["mean"]]
        if not min_vals:
            if "min" in props:
                min_vals = [props["min"]]
        if not max_vals:
            if "max" in props:
                max_vals = [props["max"]]
   

        if not mean_vals or not min_vals or not max_vals:
            return None, None, None

        mean_val = float(np.nanmean(mean_vals))
        min_val  = float(np.nanmin(min_vals))
        max_val  = float(np.nanmax(max_vals))

        return mean_val, min_val, max_val

    except Exception as e:
        print(f"compute_stats ERROR: {e}")
        return None, None, None







def open_nc(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute NCDF statistics for a given geometry and date using a netCDF file.
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        ds = ds.rio.write_crs("EPSG:4326", inplace=True)


        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}


        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if ds_time_range.time.size == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]
            valid_data = chl_data.dropna(dim="time", how="all")


            if valid_data.size > 0:                 
                mean_val, min_val, max_val = compute_stats(valid_data, shape_geometry)                
                results[label] = (mean_val, min_val, max_val)
            else:
                print("valid_data.size == 0")
                results[label] = (None, None, None)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}







def process_geojson(geojson_path, netcdf_path, output_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = gpd.read_file(geojson_path)
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        nc_stats = open_nc(shape_geometry, date, netcdf_path, variable)

        gc.collect()

        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_{variable}_{label}_mean"] = mean
            result_entry[f"Cop_{variable}_{label}_min"] = min_val
            result_entry[f"Cop_{variable}_{label}_max"] = max_val

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_path, index=False)

In [25]:
# NA extraction (on 3 nearest pixels) 

def _detect_xy_coords(da):
    """
    Returns names of the 2 spatial coordinate dims and the 2 coordinate variables (lon/x, lat/y),
    in the order (y_dim, x_dim, y_coord_name, x_coord_name).
    """
    # Candidate names (y_dim, x_dim)
    candidates = [
        ("y", "x", "y", "x"),
        ("lat", "lon", "lat", "lon"),
        ("latitude", "longitude", "latitude", "longitude"),
        ("latitude", "lon", "latitude", "lon"),
        ("y", "x", "lat", "lon")
    ]
    dims = da.dims
    coords = da.coords

    for ydim, xdim, ycoord, xcoord in candidates:
        if ydim in dims and xdim in dims:
            # prefer explicit coords if present
            if ycoord in coords and xcoord in coords:
                return ydim, xdim, ycoord, xcoord
            # else still OK (coords might be named same as dims)
            return ydim, xdim, ydim, xdim

    # Fallback: pick the last two non-time dims as spatial dims
    non_time_dims = [d for d in dims if d.lower() != "time"]
    if len(non_time_dims) >= 2:
        ydim = non_time_dims[-2]
        xdim = non_time_dims[-1]
        # pick coordinates with same name if exist, else use dims
        ycoord = ydim if ydim in coords else None
        xcoord = xdim if xdim in coords else None
        if ycoord and xcoord:
            return ydim, xdim, ycoord, xcoord

    # Nothing detected
    return None, None, None, None


def _find_nearest_pixel_indices(da, lon, lat, max_pixels=3):
    """
    Given an xarray DataArray (2D or 3D with spatial dims), return up to max_pixels
    nearest (iy, ix) index tuples to the point (lon, lat).
    """
    ydim, xdim, ycoord_name, xcoord_name = _detect_xy_coords(da)
    if ydim is None:
        raise RuntimeError("Could not detect spatial dims in data_array. Expected dims like ('lat','lon') or ('y','x').")

    # Get coordinate arrays (make 2D grid of coordinates)
    try:
        xcoords = da.coords[xcoord_name]
        ycoords = da.coords[ycoord_name]
    except Exception:
        # try fallback: use coordinate values constructed from indices
        xcoords = np.arange(da.sizes[xdim])
        ycoords = np.arange(da.sizes[ydim])

    # Convert to 1D arrays (if they are 2D, flatten their unique values)
    if getattr(xcoords, "ndim", 1) == 2:
        # assume regular grid: take first row/col
        xvals = np.asarray(xcoords[0, :]).ravel()
    else:
        xvals = np.asarray(xcoords)

    if getattr(ycoords, "ndim", 1) == 2:
        yvals = np.asarray(ycoords[:, 0]).ravel()
    else:
        yvals = np.asarray(ycoords)

    # Build meshgrid of points and compute squared distance
    xv, yv = np.meshgrid(xvals, yvals)
    # Note: coordinates might be (lon, lat) or (x, y) depending on naming; user expects lon, lat order.
    # We'll assume x axis corresponds to 'lon'-like coords and y axis to 'lat'-like coords.
    dist2 = (xv - lon) ** 2 + (yv - lat) ** 2
    flat_idx = np.argsort(dist2.ravel())
    n_available = dist2.size
    n_pick = min(max_pixels, n_available)

    indices = []
    for i in range(n_pick):
        fi = flat_idx[i]
        iy, ix = np.unravel_index(fi, dist2.shape)
        indices.append((iy, ix))
    return indices, (yvals, xvals)


def compute_stats_na(data_array, shape_geometry):
    """
    Extract values and compute weighted mean (via exact_extract), min and max.
    If exact_extract finds no intersecting pixels, find the 3 nearest pixel centers
    to the polygon centroid and compute mean/min/max of those pixels (across space and time).
    """
    try:
        feature = {"type": "Feature", "geometry": mapping(shape_geometry), "properties": {}}
        res = exact_extract(data_array, [feature], ["mean", "min", "max"])

        if res and len(res) > 0:
            props = res[0].get("properties", {})

            # Multi-band/time keys: band_1_mean, band_2_mean, etc., or single-band keys: mean, min, max
            mean_vals = [v for k, v in props.items() if k.endswith("_mean")]
            min_vals = [v for k, v in props.items() if k.endswith("_min")]
            max_vals = [v for k, v in props.items() if k.endswith("_max")]

            # Single-band fallback
            if not mean_vals and "mean" in props:
                mean_vals = [props["mean"]]
            if not min_vals and "min" in props:
                min_vals = [props["min"]]
            if not max_vals and "max" in props:
                max_vals = [props["max"]]

            if not mean_vals or not min_vals or not max_vals:
                # go to fallback sampling if some stats missing
                raise ValueError("exact_extract returned incomplete stats, falling back to nearest-pixel sampling")

            mean_val = float(np.nanmean(mean_vals))
            min_val = float(np.nanmin(min_vals))
            max_val = float(np.nanmax(max_vals))

            return mean_val, min_val, max_val

        # If we reach here: res empty => fallback to nearest pixels
        # Compute centroid of polygon (in same CRS as data assumed to be lon/lat EPSG:4326)
        centroid = shape_geometry.centroid
        lon = centroid.x
        lat = centroid.y

        # data_array might have time dimension; we will gather values for up to 3 nearest pixels
        # and compute mean/min/max across the selected pixels and across time.
        # Work on a 2D spatial slice: if there's a time dimension, keep it; we gather all time values.
        da = data_array

        # If data_array has a time dimension and multiple time steps, we keep them all.
        # Build spatial-only 2D grid sampling indices
        indices, (yvals, xvals) = _find_nearest_pixel_indices(da, lon, lat, max_pixels=3)

        # Figure out spatial dims names to isel
        ydim, xdim, ycoord_name, xcoord_name = _detect_xy_coords(da)
        if ydim is None:
            # can't detect dims - return Nones
            return None, None, None

        # collect values
        pixel_values = []
        # If da is DataArray with dims (time, y, x) or (y,x) etc.
        for iy, ix in indices:
            try:
                # xarray .isel expects integer index positions for each dim name
                sel_dict = {ydim: iy, xdim: ix}
                sel_da = da.isel(**sel_dict)
                vals = np.asarray(sel_da.values).ravel()  # includes time if present
                pixel_values.append(vals)
            except Exception:
                # try alternative: if coords were 1D but dims named differently, attempt manual numpy indexing
                try:
                    arr = np.asarray(da.values)
                    # handle shape permutations: try to find last two axes as spatial
                    if arr.ndim >= 2:
                        iy_eff = iy
                        ix_eff = ix
                        # assume last two dims are y,x
                        vals = arr[..., iy_eff, ix_eff].ravel() if arr.ndim > 2 else arr[iy_eff, ix_eff].ravel()
                        pixel_values.append(vals)
                    else:
                        # cannot index; skip
                        pass
                except Exception:
                    pass

        if len(pixel_values) == 0:
            # no data found in fallback
            return None, None, None

        # concatenate all pixel arrays (this includes time dimension flattened)
        concat = np.concatenate([p[np.isfinite(p)] for p in pixel_values]) if any(p.size for p in pixel_values) else np.array([])
        if concat.size == 0:
            return None, None, None

        mean_val = float(np.nanmean(concat))
        min_val = float(np.nanmin(concat))
        max_val = float(np.nanmax(concat))

        return mean_val, min_val, max_val

    except Exception as e:
        # For debugging, you might want to log the exception
        print(f"compute_stats ERROR (fallback attempted): {e}")
        return None, None, None


def open_nc_na(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute NCDF statistics for a given geometry and date using a netCDF file.
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        # ensure CRS - caller used .rio; keep as-is, but don't require rioxarray for fallback
        try:
            ds = ds.rio.write_crs("EPSG:4326", inplace=True)
        except Exception:
            pass

        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}

        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if getattr(ds_time_range.time, "size", 0) == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]
            valid_data = chl_data.dropna(dim="time", how="all") if "time" in chl_data.dims else chl_data

            if valid_data.size > 0:
                mean_val, min_val, max_val = compute_stats_na(valid_data, shape_geometry)
                results[label] = (mean_val, min_val, max_val)
            else:
                print("valid_data.size == 0")
                results[label] = (None, None, None)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}



def process_geojson_na(geojson, netcdf_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = geojson
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        nc_stats = open_nc_na(shape_geometry, date, netcdf_path, variable)

        gc.collect()

        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_{variable}_{label}_mean"] = mean
            result_entry[f"Cop_{variable}_{label}_min"] = min_val
            result_entry[f"Cop_{variable}_{label}_max"] = max_val

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    return results_df


In [43]:
# --- Detect spatial dims ---
def _detect_xy_coords(da):
    candidates = [
        ("y", "x", "y", "x"),
        ("lat", "lon", "lat", "lon"),
        ("latitude", "longitude", "latitude", "longitude"),
        ("latitude", "lon", "latitude", "lon"),
        ("y", "x", "lat", "lon")
    ]
    dims = da.dims
    coords = da.coords
    for ydim, xdim, ycoord, xcoord in candidates:
        if ydim in dims and xdim in dims:
            if ycoord in coords and xcoord in coords:
                return ydim, xdim, ycoord, xcoord
            return ydim, xdim, ydim, xdim
    # fallback: last two non-time dims
    non_time_dims = [d for d in dims if d.lower() != "time"]
    if len(non_time_dims) >= 2:
        ydim, xdim = non_time_dims[-2], non_time_dims[-1]
        ycoord = ydim if ydim in coords else None
        xcoord = xdim if xdim in coords else None
        if ycoord and xcoord:
            return ydim, xdim, ycoord, xcoord
    return None, None, None, None

# --- Flatten grid and compute distances ---
def _get_sorted_pixel_indices(da, lon, lat):
    ydim, xdim, ycoord_name, xcoord_name = _detect_xy_coords(da)
    if ydim is None:
        raise RuntimeError("Could not detect spatial dims")
    xvals = np.asarray(da.coords[xcoord_name])
    yvals = np.asarray(da.coords[ycoord_name])
    xv, yv = np.meshgrid(xvals, yvals)
    dist2 = (xv - lon)**2 + (yv - lat)**2
    flat_idx = np.argsort(dist2.ravel())
    indices = [np.unravel_index(fi, dist2.shape) for fi in flat_idx]
    return indices, ydim, xdim

# --- Compute stats on nearest non-NaN pixels ---
def compute_nearest_stats_non_na(da, shape_geometry, max_pixels=3, debug=False):
    centroid = shape_geometry.centroid
    lon, lat = centroid.x, centroid.y
    all_indices, ydim, xdim = _get_sorted_pixel_indices(da, lon, lat)

    collected_vals = []
    for i, (iy, ix) in enumerate(all_indices):
        vals = np.ravel(da.isel(**{ydim: iy, xdim: ix}).values)
        vals_finite = vals[np.isfinite(vals)]
        if debug:
            print(f"Pixel {i+1} at index ({iy},{ix}) values: {vals}")
            print(f"Pixel {i+1} finite values: {vals_finite}")
        collected_vals.extend(vals_finite)
        if len(collected_vals) >= max_pixels:
            break

    if len(collected_vals) == 0:
        if debug:
            print("No valid data found in any nearby pixels.")
        return None, None, None

    concat = np.array(collected_vals)
    mean_val, min_val, max_val = float(np.nanmean(concat)), float(np.nanmin(concat)), float(np.nanmax(concat))
    if debug:
        print(f"Aggregated mean: {mean_val}, min: {min_val}, max: {max_val}")
    return mean_val, min_val, max_val

# --- Compute NCDF stats for a polygon ---
def open_nc_nearest(shape_geometry, date, netcdf_path, variable="CHL", debug=False):
    ds = xr.open_dataset(netcdf_path)
    if "time" in ds.dims:
        ds = ds.sel(time=slice(date - timedelta(days=1), date - timedelta(days=1)))
    da = ds[variable]
    return {"nearest": compute_nearest_stats_non_na(da, shape_geometry, debug=debug)}

# --- Process GeoJSON ---
def process_geojson_nearest(geojson, netcdf_path, variable="CHL", debug=False):
    shapes = geojson.set_crs("EPSG:4326", allow_override=True)
    results = []
    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0]):
        shape_geometry = row.geometry
        polygon_id = row.get("replicates", None)
        nc_stats = open_nc_nearest(
            shape_geometry,
            datetime.strptime(row["date"], "%Y-%m-%d"),
            netcdf_path,
            variable,
            debug=debug
        )
        result_entry = {"replicates": polygon_id}
        for label, (mean_val, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_{variable}_{label}_mean"] = mean_val
            result_entry[f"Cop_{variable}_{label}_min"] = min_val
            result_entry[f"Cop_{variable}_{label}_max"] = max_val
        results.append(result_entry)
        gc.collect()
    return gpd.GeoDataFrame(results)


In [46]:
import gc
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import mapping
from datetime import datetime, timedelta
from tqdm import tqdm

# --- Time range helper ---
def get_dates(date, time_step):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    end_date = date - timedelta(days=1)
    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }
    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")
    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date

# --- Detect spatial dims ---
def _detect_xy_coords(da):
    candidates = [
        ("y", "x", "y", "x"),
        ("lat", "lon", "lat", "lon"),
        ("latitude", "longitude", "latitude", "longitude"),
        ("latitude", "lon", "latitude", "lon"),
        ("y", "x", "lat", "lon")
    ]
    dims = da.dims
    coords = da.coords
    for ydim, xdim, ycoord, xcoord in candidates:
        if ydim in dims and xdim in dims:
            if ycoord in coords and xcoord in coords:
                return ydim, xdim, ycoord, xcoord
            return ydim, xdim, ydim, xdim
    # fallback
    non_time_dims = [d for d in dims if d.lower() != "time"]
    if len(non_time_dims) >= 2:
        ydim, xdim = non_time_dims[-2], non_time_dims[-1]
        ycoord = ydim if ydim in coords else None
        xcoord = xdim if xdim in coords else None
        if ycoord and xcoord:
            return ydim, xdim, ydim, xdim
    return None, None, None, None

# --- Nearest pixels ---
def _get_sorted_pixel_indices(da, lon, lat):
    ydim, xdim, ycoord_name, xcoord_name = _detect_xy_coords(da)
    if ydim is None:
        raise RuntimeError("Could not detect spatial dims")
    xvals = np.asarray(da.coords[xcoord_name])
    yvals = np.asarray(da.coords[ycoord_name])
    xv, yv = np.meshgrid(xvals, yvals)
    dist2 = (xv - lon)**2 + (yv - lat)**2
    flat_idx = np.argsort(dist2.ravel())
    indices = [np.unravel_index(fi, dist2.shape) for fi in flat_idx]
    return indices, ydim, xdim

# --- Compute stats on nearest non-NaN pixels ---
def compute_nearest_stats_non_na(da, shape_geometry, max_values=3, debug=False):
    centroid = shape_geometry.centroid
    lon, lat = centroid.x, centroid.y
    all_indices, ydim, xdim = _get_sorted_pixel_indices(da, lon, lat)

    collected_vals = []
    for i, (iy, ix) in enumerate(all_indices):
        vals = np.ravel(da.isel(**{ydim: iy, xdim: ix}).values)
        vals_finite = vals[np.isfinite(vals)]
        if debug:
            print(f"Pixel {i+1} at index ({iy},{ix}) values: {vals}")
            print(f"Pixel {i+1} finite values: {vals_finite}")
        collected_vals.extend(vals_finite)
        if len(collected_vals) >= max_values:
            break

    if len(collected_vals) == 0:
        if debug:
            print("No valid data found in nearby pixels.")
        return None, None, None

    concat = np.array(collected_vals)
    mean_val, min_val, max_val = float(np.nanmean(concat)), float(np.nanmin(concat)), float(np.nanmax(concat))
    if debug:
        print(f"Aggregated mean: {mean_val}, min: {min_val}, max: {max_val}")
    return mean_val, min_val, max_val

# --- Open netCDF and compute stats for all time_steps ---
def open_nc_nearest_timesteps(shape_geometry, date, netcdf_path, variable="CHL", debug=False):
    ds = xr.open_dataset(netcdf_path)
    ds = ds.rio.write_crs("EPSG:4326", inplace=True)
    time_steps = ["day", "week", "month", "year", "5years"]
    date_ranges = {label: get_dates(date, label) for label in time_steps}
    results = {}

    for label, (start_date, end_date) in date_ranges.items():
        ds_time_range = ds.sel(time=slice(start_date, end_date)) if "time" in ds.dims else ds
        if getattr(ds_time_range.time, "size", 1) == 0:
            results[label] = (None, None, None)
            continue

        da = ds_time_range[variable]
        if da.size == 0:
            results[label] = (None, None, None)
            continue

        mean_val, min_val, max_val = compute_nearest_stats_non_na(da, shape_geometry, debug=debug)
        results[label] = (mean_val, min_val, max_val)

    return results

# --- Process GeoJSON ---
def process_geojson_nearest_timesteps(geojson, netcdf_path, variable="CHL", debug=False):
    shapes = geojson.set_crs("EPSG:4326", allow_override=True)
    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        nc_stats = open_nc_nearest_timesteps(shape_geometry, date, netcdf_path, variable, debug=debug)
        gc.collect()

        result_entry = {"replicates": polygon_id}
        for label, (mean_val, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_{variable}_{label}_mean"] = mean_val
            result_entry[f"Cop_{variable}_{label}_min"] = min_val
            result_entry[f"Cop_{variable}_{label}_max"] = max_val

        results.append(result_entry)

    return gpd.GeoDataFrame(results)


## Run extraction

In [None]:
# Data prep : Convert mtdt_5.gpkg to .geojson 
# Load the file with buffer for extraction
gdf = gpd.read_file("./data/processed_data/eDNA/mtdt_5.gpkg")

# Save as GeoJSON
geojson_path = "./data/processed_data/eDNA/mtdt_5.geojson"
gdf.to_file(geojson_path, driver="GeoJSON")

print(f"GeoJSON file saved to {geojson_path}")


### Chlorophyll

In [None]:
# Extract CHL from cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D  

geojson_path="./data/processed_data/eDNA/mtdt_5.geojson"
netcdf_path="./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc"
output_path="./data/processed_data/predictors/mtdt_5_CHL_exactextract.csv"


process_geojson(
    geojson_path=geojson_path,
    netcdf_path=netcdf_path,
    output_path=output_path,
    variable="CHL"  
)


#### Extract NA

In [18]:
# Data prep

# Read data
chl = pd.read_csv("./data/processed_data/predictors/mtdt_5_CHL_exactextract.csv")
chl_spat = gpd.read_file("./data/processed_data/eDNA/mtdt_5.geojson")

# Columns of interest
chl_cols = [
    'Cop_CHL_day_mean', 'Cop_CHL_day_min', 'Cop_CHL_day_max',
    'Cop_CHL_week_mean', 'Cop_CHL_week_min', 'Cop_CHL_week_max',
    'Cop_CHL_month_mean', 'Cop_CHL_month_min', 'Cop_CHL_month_max',
    'Cop_CHL_year_mean', 'Cop_CHL_year_min', 'Cop_CHL_year_max',
    'Cop_CHL_5years_mean', 'Cop_CHL_5years_min', 'Cop_CHL_5years_max'
]

# Keep only rows with at least one NA in those columns
chl_na = chl[chl[chl_cols].isna().any(axis=1)]

# Filter spatial GeoDataFrame to match NA rows using 'replicates'
chl_spat_na = chl_spat[chl_spat['replicates'].isin(chl_na['replicates'])]
chl_spat_na.shape


(14, 28)

In [47]:
# Extraction 

netcdf_path="./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc"


na_extract = process_geojson_nearest_timesteps(chl_spat_na[0:10], netcdf_path, variable="CHL", debug=True)

print(na_extract)


Processing shapes:   0%|                                 | 0/10 [00:00<?, ?it/s]

Pixel 1 at index (135,446) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (134,446) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (136,446) values: [0.03860969]
Pixel 3 finite values: [0.03860969]
Pixel 4 at index (135,445) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (135,447) values: [nan]
Pixel 5 finite values: []
Pixel 6 at index (134,445) values: [nan]
Pixel 6 finite values: []
Pixel 7 at index (136,445) values: [0.03860354]
Pixel 7 finite values: [0.03860354]
Pixel 8 at index (134,447) values: [nan]
Pixel 8 finite values: []
Pixel 9 at index (136,447) values: [0.03924014]
Pixel 9 finite values: [0.03924014]
Aggregated mean: 0.038817793130874634, min: 0.038603540509939194, max: 0.039240140467882156
Pixel 1 at index (135,446) values: [nan nan nan nan nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (134,446) values: [nan nan nan nan nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (136,446) values: [0.03954391 0.04239909 0.0372893  0.0

Processing shapes:  10%|██▌                      | 1/10 [00:00<00:06,  1.33it/s]

Pixel 1 at index (135,446) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (134,446) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (136,446) values: [0.041856   0.05039306 0.04703444 ... 0.03954259 0.04280984 0.03860969]
Pixel 3 finite values: [0.041856   0.05039306 0.04703444 ... 0.03954259 0.04280984 0.03860969]
Aggregated mean: 0.1290677785873413, min: 0.023475823923945427, max: 1.671581745147705
Pixel 1 at index (151,475) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (150,475) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (151,476) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (152,475) values: [0.06230608]
Pixel 4 finite values: [0.06230608]
Pixel 5 at index (150,476) values: [nan]
Pixel 5 finite values: []
Pixel 6 at index (151,474) values: [0.06137509]
Pixel 6 finite values: [0.06137509]
Pixel 7 at index (150,474) values: [nan]
Pixel 7 finite values: []
Pixel 8 at index (152,476)

Processing shapes:  20%|█████                    | 2/10 [00:01<00:06,  1.32it/s]

Pixel 1 at index (151,475) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (150,475) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (151,476) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (152,475) values: [0.06556961 0.08193054 0.0650951  ... 0.0608315  0.05747365 0.06230608]
Pixel 4 finite values: [0.06556961 0.08193054 0.0650951  ... 0.0608315  0.05747365 0.06230608]
Aggregated mean: 0.12836098670959473, min: 0.021540097892284393, max: 1.1409083604812622
Pixel 1 at index (144,470) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (143,470) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (144,471) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (143,471) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (145,470) values: [0.04750602]
Pixel 5 finite values: [0.04750602]
Pixel 6 at index (145,471) values: [nan]
Pixel 6 finite values: []
Pixel 7 at index 

Processing shapes:  30%|███████▌                 | 3/10 [00:02<00:05,  1.30it/s]

Pixel 1 at index (144,470) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (143,470) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (144,471) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (143,471) values: [nan nan nan ... nan nan nan]
Pixel 4 finite values: []
Pixel 5 at index (145,470) values: [0.05019778 0.04687704 0.04457241 ... 0.05056209 0.06153848 0.04750602]
Pixel 5 finite values: [0.05019778 0.04687704 0.04457241 ... 0.05056209 0.06153848 0.04750602]
Aggregated mean: 0.12863583862781525, min: 0.015254789963364601, max: 1.0258376598358154
Pixel 1 at index (145,489) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (146,489) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (145,488) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (146,488) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (144,489) values: [nan]
Pixel 5 finite values: []
Pixel 6 at

Processing shapes:  40%|██████████               | 4/10 [00:03<00:04,  1.26it/s]

Pixel 1 at index (145,489) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (146,489) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (145,488) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (146,488) values: [nan nan nan ... nan nan nan]
Pixel 4 finite values: []
Pixel 5 at index (144,489) values: [nan nan nan ... nan nan nan]
Pixel 5 finite values: []
Pixel 6 at index (144,488) values: [nan nan nan ... nan nan nan]
Pixel 6 finite values: []
Pixel 7 at index (147,489) values: [nan nan nan ... nan nan nan]
Pixel 7 finite values: []
Pixel 8 at index (145,490) values: [nan nan nan ... nan nan nan]
Pixel 8 finite values: []
Pixel 9 at index (147,488) values: [0.05025046 0.04548546 0.04233098 ... 0.06837323 0.06458901 0.06533112]
Pixel 9 finite values: [0.05025046 0.04548546 0.04233098 ... 0.06837323 0.06458901 0.06533112]
Aggregated mean: 0.15746603906154633, min: 0.021727316081523895, max: 

Processing shapes:  50%|████████████▌            | 5/10 [00:03<00:03,  1.28it/s]

Pixel 1 at index (144,470) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (143,470) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (144,471) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (145,470) values: [0.05019778 0.04687704 0.04457241 ... 0.05056209 0.06153848 0.04750602]
Pixel 4 finite values: [0.05019778 0.04687704 0.04457241 ... 0.05056209 0.06153848 0.04750602]
Aggregated mean: 0.12863583862781525, min: 0.015254789963364601, max: 1.0258376598358154
Pixel 1 at index (232,321) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (233,321) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (232,320) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (231,321) values: [0.09576152]
Pixel 4 finite values: [0.09576152]
Pixel 5 at index (233,320) values: [nan]
Pixel 5 finite values: []
Pixel 6 at index (232,322) values: [0.08598803]
Pixel 6 finite values: [0.08598803]


Processing shapes:  60%|███████████████          | 6/10 [00:04<00:03,  1.19it/s]

Pixel 2 at index (233,321) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (232,320) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (231,321) values: [0.07428484 0.0816424  0.06988659 ... 0.08865806 0.09803498 0.09576152]
Pixel 4 finite values: [0.07428484 0.0816424  0.06988659 ... 0.08865806 0.09803498 0.09576152]
Aggregated mean: 0.19679035246372223, min: 0.03131013736128807, max: 1.8656566143035889
Pixel 1 at index (135,445) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (135,446) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (134,445) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (134,446) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (136,445) values: [0.03814353]
Pixel 5 finite values: [0.03814353]
Pixel 6 at index (136,446) values: [0.03657886]
Pixel 6 finite values: [0.03657886]
Pixel 7 at index (135,444) values: [nan]
Pixel 7 finite values: []
Pixel 8 at index (133,445

Processing shapes:  70%|█████████████████▌       | 7/10 [00:05<00:02,  1.12it/s]

Pixel 1 at index (135,445) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (135,446) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (134,445) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (134,446) values: [nan nan nan ... nan nan nan]
Pixel 4 finite values: []
Pixel 5 at index (136,445) values: [0.0655285  0.04916571 0.04932015 ... 0.04286461 0.0480005  0.03814353]
Pixel 5 finite values: [0.0655285  0.04916571 0.04932015 ... 0.04286461 0.0480005  0.03814353]
Aggregated mean: 0.12077436596155167, min: 0.01634303294122219, max: 0.9540687799453735
Pixel 1 at index (134,445) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (135,445) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (134,446) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (133,445) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (135,446) values: [nan]
Pixel 5 finite values: []
Pixel 6 at 

Processing shapes:  80%|████████████████████     | 8/10 [00:06<00:01,  1.13it/s]

Pixel 1 at index (134,445) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (135,445) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (134,446) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (133,445) values: [nan nan nan ... nan nan nan]
Pixel 4 finite values: []
Pixel 5 at index (135,446) values: [nan nan nan ... nan nan nan]
Pixel 5 finite values: []
Pixel 6 at index (134,444) values: [0.06869644 0.04943627 0.04953559 ... 0.04368578 0.04842122 0.03925543]
Pixel 6 finite values: [0.06869644 0.04943627 0.04953559 ... 0.04368578 0.04842122 0.03925543]
Aggregated mean: 0.11793826520442963, min: 0.015106502920389175, max: 0.9981226325035095
Pixel 1 at index (135,446) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (134,446) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (136,446) values: [0.04356029]
Pixel 3 finite values: [0.04356029]
Pixel 4 at index (135,445) values: [

Processing shapes:  90%|██████████████████████▌  | 9/10 [00:07<00:00,  1.18it/s]

Pixel 1 at index (135,446) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (134,446) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (136,446) values: [0.05289697 0.0483956  0.04573946 ... 0.04436352 0.03603407 0.04356029]
Pixel 3 finite values: [0.05289697 0.0483956  0.04573946 ... 0.04436352 0.03603407 0.04356029]
Aggregated mean: 0.1223808005452156, min: 0.01573161780834198, max: 1.0286386013031006
Pixel 1 at index (178,498) values: [nan]
Pixel 1 finite values: []
Pixel 2 at index (178,499) values: [nan]
Pixel 2 finite values: []
Pixel 3 at index (179,498) values: [nan]
Pixel 3 finite values: []
Pixel 4 at index (179,499) values: [nan]
Pixel 4 finite values: []
Pixel 5 at index (177,498) values: [nan]
Pixel 5 finite values: []
Pixel 6 at index (177,499) values: [nan]
Pixel 6 finite values: []
Pixel 7 at index (180,498) values: [0.0561219]
Pixel 7 finite values: [0.0561219]
Pixel 8 at index (178,497) values: [nan]
Pixe

Processing shapes: 100%|████████████████████████| 10/10 [00:08<00:00,  1.17it/s]

Pixel 1 at index (178,498) values: [nan nan nan ... nan nan nan]
Pixel 1 finite values: []
Pixel 2 at index (178,499) values: [nan nan nan ... nan nan nan]
Pixel 2 finite values: []
Pixel 3 at index (179,498) values: [nan nan nan ... nan nan nan]
Pixel 3 finite values: []
Pixel 4 at index (179,499) values: [nan nan nan ... nan nan nan]
Pixel 4 finite values: []
Pixel 5 at index (177,498) values: [nan nan nan ... nan nan nan]
Pixel 5 finite values: []
Pixel 6 at index (177,499) values: [nan nan nan ... nan nan nan]
Pixel 6 finite values: []
Pixel 7 at index (180,498) values: [0.05127265 0.04997202 0.05367459 ... 0.04211581 0.04555116 0.0561219 ]
Pixel 7 finite values: [0.05127265 0.04997202 0.05367459 ... 0.04211581 0.04555116 0.0561219 ]
Aggregated mean: 0.11030025780200958, min: 0.029758170247077942, max: 0.7398016452789307
            replicates  Cop_CHL_day_mean  Cop_CHL_day_min  Cop_CHL_day_max  \
0  SPY201950/SPY201951          0.038818         0.038604         0.039240   
1  SPY2




In [48]:
na_extract

Unnamed: 0,replicates,Cop_CHL_day_mean,Cop_CHL_day_min,Cop_CHL_day_max,Cop_CHL_week_mean,Cop_CHL_week_min,Cop_CHL_week_max,Cop_CHL_month_mean,Cop_CHL_month_min,Cop_CHL_month_max,Cop_CHL_year_mean,Cop_CHL_year_min,Cop_CHL_year_max,Cop_CHL_5years_mean,Cop_CHL_5years_min,Cop_CHL_5years_max
0,SPY201950/SPY201951,0.038818,0.038604,0.03924,0.039229,0.034407,0.04281,0.04593,0.034407,0.062159,0.129546,0.03236,0.603989,0.129068,0.023476,1.671582
1,SPY210648/SPY210653,0.061823,0.061375,0.062306,0.066806,0.057474,0.075945,0.075324,0.057474,0.111796,0.119745,0.023486,1.140908,0.128361,0.02154,1.140908
2,SPY211266/SPY211280,0.04695,0.046571,0.047506,0.056126,0.047506,0.062833,0.061687,0.047506,0.07584,0.119801,0.015255,1.024679,0.128636,0.015255,1.025838
3,SPY211274/SPY211278,0.060922,0.058592,0.065331,0.066611,0.0625,0.073127,0.065726,0.051038,0.079717,0.143761,0.021727,1.259294,0.157466,0.021727,1.495065
4,SPY211285,0.04695,0.046571,0.047506,0.056126,0.047506,0.062833,0.061687,0.047506,0.07584,0.119801,0.015255,1.024679,0.128636,0.015255,1.025838
5,SPY220882/SPY220887,0.094287,0.085988,0.101111,0.109219,0.088658,0.145164,0.10293,0.060097,0.197165,0.159592,0.039496,0.988589,0.19679,0.03131,1.865657
6,SPY231865,0.037993,0.036579,0.039255,0.043871,0.038144,0.051028,0.051059,0.031021,0.075682,0.097352,0.031021,0.388456,0.120774,0.016343,0.954069
7,SPY231873/SPY231874,0.037993,0.036579,0.039255,0.044518,0.039255,0.05078,0.050815,0.031963,0.070217,0.094171,0.028609,0.413553,0.117938,0.015107,0.998123
8,SPY231895/SPY231896,0.044285,0.043449,0.045845,0.042956,0.036034,0.045479,0.045978,0.030348,0.074847,0.095769,0.030348,0.387568,0.122381,0.015732,1.028639
9,SPY233348/SPY233355,0.048984,0.044302,0.056122,0.049092,0.04206,0.056122,0.050773,0.04206,0.064902,0.09823,0.03694,0.348255,0.1103,0.029758,0.739802


### SST

In [None]:
#  Extract SST from SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc 

geojson_path="./data/processed_data/eDNA/mtdt_5.geojson"
netcdf_path="./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc"
output_path="./data/processed_data/predictors/mtdt_5_SST.csv"


process_geojson(
    geojson_path=geojson_path,
    netcdf_path=netcdf_path,
    output_path=output_path,
    variable="SST"  
)