# Set up

In [11]:
import calendar
import copernicusmarine
import dask
from datetime import datetime, timedelta
import exactextract as ee
from exactextract import exact_extract
import geopandas as gpd
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from datetime import datetime, timedelta
import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask
import rioxarray as rxr
from shapely.geometry import mapping, shape
from shapely.geometry import mapping, Point
from scipy.spatial import cKDTree
import time 
from tqdm import tqdm
import xarray as xr



os.chdir("/media/marieke/Shared/Chap-1/Model/Scripts/Chap_1_2018-2024")

# Get Copernicus data

### Chlorophyll 1km

In [41]:
# Set parameters
data_request = {
   "dataset_id" : "cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["CHL"]
}

# Load xarray dataset
chl = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
chl.to_netcdf("./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc")


INFO - 2025-10-17T15:11:26Z - Selected dataset version: "202311"
INFO - 2025-10-17T15:11:26Z - Selected dataset part: "default"
INFO - 2025-10-17T15:11:26Z - Downloading Copernicus Marine data requires a Copernicus Marine username and password, sign up for free at: https://data.marine.copernicus.eu/register


Copernicus Marine username:

  mschultz2


Copernicus Marine password:

  ········


### pH, oxygen 4.2 km

#### pH

In [5]:
# Set parameters
data_request = {
   "dataset_id" : "med-ogs-bio-rean-d",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["02"]
}

# Load xarray dataset
ox = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
ph.to_netcdf("./data/raw_data/predictors/oxygen/med-ogs-bio-rean-d _20130101-20250101.nc")


INFO - 2025-10-20T15:09:34Z - Selected dataset version: "202105"
INFO - 2025-10-20T15:09:34Z - Selected dataset part: "default"
INFO - 2025-10-20T15:09:34Z - Downloading Copernicus Marine data requires a Copernicus Marine username and password, sign up for free at: https://data.marine.copernicus.eu/register


Copernicus Marine username:

  mschultz2


Copernicus Marine password:

  ········




VariableDoesNotExistInTheDataset: The variable '02' is neither a variable or a standard name in the dataset.

#### oxygen

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["CHL"]
}

# Load xarray dataset
chl = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
chl.to_netcdf("./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc")


### SST 1km (2008-2025)

In [6]:
# Set parameters
data_request = {
   "dataset_id" : "SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["analysed_sst"]
}

# Load xarray dataset
sst = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
sst.to_netcdf("./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc")


INFO - 2025-10-20T15:15:14Z - Selected dataset version: "202311"
INFO - 2025-10-20T15:15:14Z - Selected dataset part: "default"
INFO - 2025-10-20T15:15:14Z - Downloading Copernicus Marine data requires a Copernicus Marine username and password, sign up for free at: https://data.marine.copernicus.eu/register


Copernicus Marine username:

  mschultz2


Copernicus Marine password:

  ········


  nc4_var = self.ds.createVariable(**default_args)


### Ocean mixed layer thickness 4.2km (1987-2025)

In [None]:
# Set parameters
data_request = {
   "dataset_id" : "SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2",
   "longitude" : [3, 9.65], 
   "latitude" : [41.2, 44],
   "time" : ["2013-01-01", "2025-01-01"],
   "variables" : ["analysed_sst"]
}

# Load xarray dataset
sst = copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"]
)

# Export to NCDF 
sst.to_netcdf("./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc")


# Extraction

## Functions - test for weighted mean

In [9]:
!pip install exactextract

Collecting exactextract
  Downloading exactextract-0.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Downloading exactextract-0.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m0m eta [36m0:00:01[0m36m0:00:01[0mm
[?25hInstalling collected packages: exactextract
Successfully installed exactextract-0.2.2


In [3]:
def get_dates(date, time_step):
    """
    Calculate the range of dates for a given time step relative to the provided date.
    """
    from datetime import datetime, timedelta

    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    end_date = date - timedelta(days=1)

    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }

    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")

    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date


def compute_stats_exactextract(data_array, shape_geometry, max_buffer_distance=0, step=0.01,
                               nodata=None, tmp_dir=None):
    """
    Using exactextract, extract pixel values & coverage fractions for shape_geometry and compute:
      - coverage-weighted mean
      - min (over pixels with coverage > 0)
      - max (over pixels with coverage > 0)

    If the polygon returns no valid pixels, will expand the geometry by successive buffers up to max_buffer_distance.

    Parameters
    ----------
    data_array : xarray.DataArray
        single-band raster (can be multi-time -> but pass the aggregated slice already e.g. time-mean or stack)
        must have georeference (rioxarray metadata) and CRS EPSG:4326 in your workflow.
    shape_geometry : shapely geometry
    max_buffer_distance : float
        maximum buffer to try (in degrees if CRS is EPSG:4326)
    step : float
        buffer increment
    nodata : numeric or None
        nodata value to set in the temporary tiff if desired. If None, attempt to detect from data_array.
    tmp_dir : str or None
        directory for temporary files. If None, uses system temp.

    Returns
    -------
    mean_val, min_val, max_val, used_buffer_distance
    """
    # Attempt to infer nodata
    if nodata is None:
        try:
            nodata = data_array.rio.nodata
        except Exception:
            nodata = None

    current_buffer_distance = 0.0

    # Ensure data_array has spatial metadata (rioxarray)
    if not hasattr(data_array, "rio"):
        raise ValueError("data_array must be a rioxarray-enabled xarray.DataArray with CRS and transform.")

    # reduce to single band if multiple bands remain (assumes 2D or 3D with single band)
    # The calling code should pass a DataArray containing the values of interest (e.g. averaged over time)
    while current_buffer_distance <= max_buffer_distance + 1e-12:
        if current_buffer_distance > 0:
            search_geometry = shape_geometry.buffer(current_buffer_distance)
        else:
            search_geometry = shape_geometry

        try:
            # Write the data_array to a temporary GeoTIFF file (exactextract expects a raster file)
            with tempfile.NamedTemporaryFile(suffix=".tif", dir=tmp_dir, delete=False) as tmp:
                tmp_path = tmp.name

            # rioxarray's to_raster / rio.to_raster
            # For safety, ensure data_array is 2D (if it is a time stack, caller should collapse to single 2D)
            # If there's an extra dim (e.g., time), the first non-time dim will be written; adjust if needed.
            # We will call .rio.to_raster which writes GeoTIFF.
            da_to_write = data_array

            # If data_array has a 'time' dimension, it must be collapsed before writing. The caller should send
            # a single time-slice or aggregated array. For safety, if time present and size==1, select it.
            if "time" in da_to_write.dims and da_to_write.sizes["time"] == 1:
                da_to_write = da_to_write.isel(time=0)

            # Ensure 2D
            if len(da_to_write.dims) > 2:
                # try to reduce by selecting first non-spatial dim except time
                nonspatial = [d for d in da_to_write.dims if d not in ("x", "y", "lon", "lat")]
                if nonspatial:
                    da_to_write = da_to_write.isel({nonspatial[0]: 0})
                else:
                    # fallback: take first 2D slice
                    dims = da_to_write.dims
                    da_to_write = da_to_write.isel({dims[0]: 0})

            # Write GeoTIFF
            # Ensure CRS is set
            try:
                if da_to_write.rio.crs is None:
                    da_to_write = da_to_write.rio.write_crs("EPSG:4326", inplace=False)
            except Exception:
                # ignore and proceed
                pass

            # write
            da_to_write.rio.to_raster(tmp_path)

            # Now call exactextract
            # exactextract.extract expects either a path or rasterio dataset and a GeoJSON-like geometry.
            geojson_geom = mapping(search_geometry)

            # 'mode' : can be 'accurate' to compute fractional coverage. Use that explicitly.
            # The exactextract Python API returns a list of records (one per geometry). Each record
            # contains arrays 'values' and 'coverage_fraction' (naming mirrors R version).
            # We'll be defensive when parsing the returned structure.
            ee_result = ee.extract(tmp_path, [geojson_geom], mode="accurate", bands=1)

            # ee.extract returns a list with one element per input geometry
            if not ee_result or len(ee_result) == 0:
                # no data found; try next buffer
                os.remove(tmp_path)
                current_buffer_distance += step
                continue
                
            print("-----------------------------------------------------------------------------------------------")
            print(ee_result)
            
            rec = ee_result[0]
            

            # Expected fields: 'values' and 'coverage_fraction' OR 'values' and 'coverage_area' depending on version.
            values = None
            cov = None

            # Try common keys
            if isinstance(rec, dict):
                # Many examples return keys like 'values' and 'coverage_fraction'
                if "values" in rec and "coverage_fraction" in rec:
                    values = rec["values"]
                    cov = rec["coverage_fraction"]
                elif "values" in rec and "coverage_area" in rec:
                    values = rec["values"]
                    cov = rec["coverage_area"]
                elif "value" in rec and "coverage_fraction" in rec:
                    values = rec["value"]
                    cov = rec["coverage_fraction"]
                else:
                    # try to inspect keys and pick arrays
                    for k in rec:
                        if isinstance(rec[k], (list, tuple)) and values is None:
                            values = rec[k]
                        elif isinstance(rec[k], (list, tuple)) and cov is None and rec[k] is not values:
                            cov = rec[k]

            # If not dict, maybe it's a pandas-like DataFrame. Try to coerce.
            if values is None or cov is None:
                # try treating rec as a table-like sequence of rows: search for columns named:
                try:
                    import pandas as _pd
                    df = _pd.DataFrame(rec)
                    if "values" in df.columns and "coverage_fraction" in df.columns:
                        values = df["values"].tolist()
                        cov = df["coverage_fraction"].tolist()
                    elif "value" in df.columns and "coverage_fraction" in df.columns:
                        values = df["value"].tolist()
                        cov = df["coverage_fraction"].tolist()
                    elif "values" in df.columns and "coverage_area" in df.columns:
                        values = df["values"].tolist()
                        cov = df["coverage_area"].tolist()
                except Exception:
                    pass

            # cleanup tmp file
            try:
                os.remove(tmp_path)
            except Exception:
                pass

            if values is None or cov is None:
                # no usable extraction; try next buffer distance
                current_buffer_distance += step
                continue

            # Convert to numpy arrays and mask nodata or NaNs
            import numpy as np
            values = np.array(values, dtype=float)
            cov = np.array(cov, dtype=float)

            # Mask invalid values (NaN, or nodata)
            mask_valid = ~np.isnan(values)
            if nodata is not None:
                mask_valid = mask_valid & (values != nodata)

            # Also require coverage > 0
            mask_valid = mask_valid & (cov > 0)

            if mask_valid.sum() == 0:
                # no valid pixels; expand buffer
                current_buffer_distance += step
                continue

            values_sel = values[mask_valid]
            cov_sel = cov[mask_valid]

            # weighted mean
            weighted_sum = (values_sel * cov_sel).sum()
            total_cov = cov_sel.sum()

            if total_cov == 0:
                mean_val = None
            else:
                mean_val = (weighted_sum / total_cov).item()

            # For min / max we'll use the min and max among pixels intersected (coverage>0).
            min_val = float(values_sel.min())
            max_val = float(values_sel.max())

            return mean_val, min_val, max_val, current_buffer_distance

        except Exception as e:
            # ignore and try next buffer distance (but log if you want)
            # print(f"exactextract error at buffer {current_buffer_distance}: {e}")
            try:
                os.remove(tmp_path)
            except Exception:
                pass

            current_buffer_distance += step
            continue

    # nothing found up to max_buffer_distance
    return None, None, None, max_buffer_distance


def open_nc(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute CHL statistics for a given geometry and date using a netCDF file.

    Now uses exactextract-based weighted means (coverage-weighted).
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        ds = ds.rio.write_crs("EPSG:4326", inplace=True)

        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}

        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if ds_time_range.time.size == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]

            # count how many days are fully empty (all NaN) in the time slice
            empty_days = sum(chl_data.sel(time=t).isnull().all().item() for t in chl_data.time)

            # Drop fully-empty time steps; we will aggregate across time as the user's original code did,
            # but the original code passed the whole valid_data stack to compute_stats which used rioxarray.clip.
            # Here we will compute per-pixel mean over time for the valid pixels (so exactextract receives a single 2D raster)
            valid_data = chl_data.dropna(dim="time", how="all")
            if valid_data.size > 0:
                # We will compute the per-pixel mean across time where values exist (keeping NaNs where all times are NaN)
                # result is 2D DataArray with same spatial coords.
                # If instead you want to compute zonal statistics per day and then aggregate across days, we can change approach.
                per_pixel_mean = valid_data.mean(dim="time", skipna=True)

                mean_val, min_val, max_val, used_buffer = compute_stats_exactextract(
                    per_pixel_mean, shape_geometry, max_buffer_distance=0.1, step=0.01
                )

                results[label] = (mean_val, min_val, max_val, empty_days)
            else:
                results[label] = (None, None, None, empty_days)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}


def process_geojson(geojson_path, netcdf_path, output_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = gpd.read_file(geojson_path)
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)

    shapes = shapes[0:3]

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        chl_stats = open_nc(shape_geometry, date, netcdf_path, variable)

        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val, empty_days) in chl_stats.items():
            result_entry[f"Cop_CHL_{label}_mean"] = mean
            result_entry[f"Cop_CHL_{label}_min"] = min_val
            result_entry[f"Cop_CHL_{label}_max"] = max_val
            result_entry[f"Cop_CHL_{label}_empty_days"] = empty_days

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_path, index=False)


## Functions

In [None]:
# Pipeline with exactextract

def get_dates(date, time_step):
    """
    Calculate the range of dates for a given time step relative to the provided date.
    """
    from datetime import datetime, timedelta

    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    end_date = date - timedelta(days=1)

    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }

    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")

    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date






def compute_stats(data_array, shape_geometry):
    """
    Extract values and compute weighted mean - automatically with exactextract- , min and max.
    """
    
    try:
        feature = {"type": "Feature", "geometry": mapping(shape_geometry), "properties": {}}
        res = exact_extract(data_array, [feature], ["mean", "min", "max"])


        if not res or len(res) == 0:
            return None, None, None

        props = res[0]["properties"]

        # Multi-band keys: band_1_mean, band_2_mean, etc.
        mean_vals = [v for k, v in props.items() if k.endswith("_mean")]
        min_vals  = [v for k, v in props.items() if k.endswith("_min")]
        max_vals  = [v for k, v in props.items() if k.endswith("_max")]

         # Single-band keys: mean, min, max
        if not mean_vals:
            if "mean" in props:
                mean_vals = [props["mean"]]
        if not min_vals:
            if "min" in props:
                min_vals = [props["min"]]
        if not max_vals:
            if "max" in props:
                max_vals = [props["max"]]
   

        if not mean_vals or not min_vals or not max_vals:
            return None, None, None

        mean_val = float(np.nanmean(mean_vals))
        min_val  = float(np.nanmin(min_vals))
        max_val  = float(np.nanmax(max_vals))

        return mean_val, min_val, max_val

    except Exception as e:
        print(f"compute_stats ERROR: {e}")
        return None, None, None







def open_nc(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute NCDF statistics for a given geometry and date using a netCDF file.
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        ds = ds.rio.write_crs("EPSG:4326", inplace=True)


        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}


        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if ds_time_range.time.size == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]
            valid_data = chl_data.dropna(dim="time", how="all")


            if valid_data.size > 0:                 
                mean_val, min_val, max_val = compute_stats(valid_data, shape_geometry)                
                results[label] = (mean_val, min_val, max_val)
            else:
                print("valid_data.size == 0")
                results[label] = (None, None, None)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}





def process_geojson(geojson_path, netcdf_path, output_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = gpd.read_file(geojson_path)
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)
    shapes = shapes[0:20]

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        nc_stats = open_nc(shape_geometry, date, netcdf_path, variable)

        print("------------replicates--------------")
        print(polygon_id)
        print("------------nc_stats--------------")
        print(nc_stats)


        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_CHL_{label}_mean"] = mean
            result_entry[f"Cop_CHL_{label}_min"] = min_val
            result_entry[f"Cop_CHL_{label}_max"] = max_val

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_path, index=False)

In [46]:
def get_dates(date, time_step):
    """
    Calculate the range of dates for a given time step relative to the provided date.
    """
    from datetime import datetime, timedelta

    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    end_date = date - timedelta(days=1)

    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }

    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")

    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date




def compute_stats(data_array, shape_geometry, max_buffer_distance=0, step=0.01):
    """
    Compute the mean, min, and max of valid points (non-NaN) within the given geometry using `.clip`.
    Expand the search radius if no valid points exist.
    """
    current_buffer_distance = 0

    while current_buffer_distance <= max_buffer_distance:
        search_geometry = shape_geometry.buffer(current_buffer_distance) if current_buffer_distance > 0 else shape_geometry

        try:
            
            # Extraction is done here :
            clipped_data = data_array.rio.clip([mapping(search_geometry)], crs="EPSG:4326", drop=True)
            # Doc : https://corteva.github.io/rioxarray/html/rioxarray.html#rioxarray.raster_array.RasterArray.clip
            # drop = True :  drop the data outside of the extent of the mask geometries Otherwise, it will return the same raster with the data masked. 
            # all_touched = False (default) : only pixels whose center is within the polygon or that are selected by Bresenham’s line algorithm will be burned in.
            
            if clipped_data.count().item() > 0:
                return (
                    clipped_data.mean().item(),
                    clipped_data.min().item(),
                    clipped_data.max().item(),
                    current_buffer_distance
                )
        except Exception as e:
            pass

        current_buffer_distance += step

    return None, None, None, max_buffer_distance





def open_nc(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute CHL statistics for a given geometry and date using a netCDF file.
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        ds = ds.rio.write_crs("EPSG:4326", inplace=True)

        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}

        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if ds_time_range.time.size == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]
            empty_days = sum(chl_data.sel(time=t).isnull().all().item() for t in chl_data.time)
            valid_data = chl_data.dropna(dim="time", how="all")

            if valid_data.size > 0:
                mean_val, min_val, max_val, max_search_dist = compute_stats(
                    valid_data, shape_geometry, max_buffer_distance=0, step=0.01
                )
                results[label] = (mean_val, min_val, max_val, empty_days)
            else:
                results[label] = (None, None, None, empty_days)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}





def process_geojson(geojson_path, netcdf_path, output_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = gpd.read_file(geojson_path)
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)
    shapes = shapes[0:3]

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        chl_stats = open_nc(shape_geometry, date, netcdf_path, variable)

        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val, empty_days) in chl_stats.items():
            result_entry[f"Cop_CHL_{label}_mean"] = mean
            result_entry[f"Cop_CHL_{label}_min"] = min_val
            result_entry[f"Cop_CHL_{label}_max"] = max_val
            result_entry[f"Cop_CHL_{label}_empty_days"] = empty_days

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_path, index=False)

## Run extraction

### TEST

In [26]:
def get_dates(date, time_step):
    """
    Calculate the range of dates for a given time step relative to the provided date.
    """
    from datetime import datetime, timedelta

    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")

    end_date = date - timedelta(days=1)

    time_deltas = {
        'day': 1,
        'week': 7,
        'month': 30,
        'year': 365,
        '5years': 5 * 365 + 1,
    }

    if time_step not in time_deltas:
        raise ValueError("Unsupported time step. Choose from 'day', 'week', 'month', 'year', or '5years'.")

    start_date = date - timedelta(days=time_deltas[time_step])
    return start_date, end_date






def compute_stats(data_array, shape_geometry):
    """
    Extract values and compute weighted mean - automatically with exactextract- , min and max.
    """
    
    try:
        feature = {"type": "Feature", "geometry": mapping(shape_geometry), "properties": {}}
        res = exact_extract(data_array, [feature], ["mean", "min", "max"])


        if not res or len(res) == 0:
            return None, None, None

        props = res[0]["properties"]

        # Multi-band keys: band_1_mean, band_2_mean, etc.
        mean_vals = [v for k, v in props.items() if k.endswith("_mean")]
        min_vals  = [v for k, v in props.items() if k.endswith("_min")]
        max_vals  = [v for k, v in props.items() if k.endswith("_max")]

         # Single-band keys: mean, min, max
        if not mean_vals:
            if "mean" in props:
                mean_vals = [props["mean"]]
        if not min_vals:
            if "min" in props:
                min_vals = [props["min"]]
        if not max_vals:
            if "max" in props:
                max_vals = [props["max"]]
   

        if not mean_vals or not min_vals or not max_vals:
            return None, None, None

        mean_val = float(np.nanmean(mean_vals))
        min_val  = float(np.nanmin(min_vals))
        max_val  = float(np.nanmax(max_vals))

        return mean_val, min_val, max_val

    except Exception as e:
        print(f"compute_stats ERROR: {e}")
        return None, None, None







def open_nc(shape_geometry, date, netcdf_path, variable="CHL"):
    """
    Compute NCDF statistics for a given geometry and date using a netCDF file.
    """
    results = {}

    try:
        if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d")

        ds = xr.open_dataset(netcdf_path)
        ds = ds.rio.write_crs("EPSG:4326", inplace=True)


        target_date = date - timedelta(days=1)
        time_steps = ["day", "week", "month", "year", "5years"]
        date_ranges = {label: get_dates(date, label) for label in time_steps}


        for label, (start_date, end_date) in date_ranges.items():
            ds_time_range = ds.sel(time=slice(start_date, end_date))

            if ds_time_range.time.size == 0:
                results[label] = (None, None, None, 0)
                continue

            chl_data = ds_time_range[variable]
            valid_data = chl_data.dropna(dim="time", how="all")


            if valid_data.size > 0:                 
                mean_val, min_val, max_val = compute_stats(valid_data, shape_geometry)                
                results[label] = (mean_val, min_val, max_val)
            else:
                print("valid_data.size == 0")
                results[label] = (None, None, None)

        return results

    except Exception as e:
        print(f"Error processing shape with target date: {date}: {e}")
        return {}





def process_geojson(geojson_path, netcdf_path, output_path, variable="CHL"):
    """
    Process the GeoJSON file and compute statistics for each shape using a netCDF file.
    """
    shapes = gpd.read_file(geojson_path)
    shapes = shapes.set_crs("EPSG:4326", allow_override=True)
    shapes = shapes[0:20]

    results = []

    for _, row in tqdm(shapes.iterrows(), total=shapes.shape[0], desc="Processing shapes"):
        shape_geometry = row.geometry
        date = row["date"]
        polygon_id = row.get("replicates", None)

        nc_stats = open_nc(shape_geometry, date, netcdf_path, variable)

        print("------------replicates--------------")
        print(polygon_id)
        print("------------nc_stats--------------")
        print(nc_stats)


        result_entry = {"replicates": polygon_id}
        for label, (mean, min_val, max_val) in nc_stats.items():
            result_entry[f"Cop_CHL_{label}_mean"] = mean
            result_entry[f"Cop_CHL_{label}_min"] = min_val
            result_entry[f"Cop_CHL_{label}_max"] = max_val

        results.append(result_entry)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_path, index=False)

In [24]:

geojson_path="./data/processed_data/eDNA/mtdt_5.geojson"
netcdf_path="./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc"
output_path="./data/processed_data/predictors/mtdt_5_CHL_test.csv"


process_geojson(
    geojson_path=geojson_path,
    netcdf_path=netcdf_path,
    output_path=output_path,
    variable="CHL"  
)


Processing shapes:   0%|                                 | 0/20 [00:00<?, ?it/s]

valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:   5%|█▎                       | 1/20 [00:06<02:06,  6.66s/it]

------------replicates--------------
SPY180622/SPY201155/SPY201156/SPY201189
------------replicates--------------
{'day': (0.06718862086574383, 0.06525104492902756, 0.0686667412519455), 'week': (0.07321769710675555, 0.06525104492902756, 0.07775455713272095), 'month': (0.08463646815102051, 0.06468724459409714, 0.1160086989402771), 'year': (0.5199282805972645, 0.04498501121997833, 5.6283979415893555), '5years': (0.4490315943948328, 0.04498501121997833, 5.6283979415893555)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  10%|██▌                      | 2/20 [00:12<01:55,  6.40s/it]

------------replicates--------------
SPY180624/SPY181146/SPY181151/SPY181155
------------replicates--------------
{'day': (0.0942835757467272, 0.09415426850318909, 0.09460443258285522), 'week': (0.07609841884238212, 0.06538700312376022, 0.09460443258285522), 'month': (0.11665163654579479, 0.06316225230693817, 0.2834864854812622), 'year': (0.14774635743532225, 0.023832421749830246, 1.1077903509140015), '5years': (0.13461176351858611, 0.023832421749830246, 1.7574970722198486)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  15%|███▊                     | 3/20 [00:18<01:45,  6.19s/it]

------------replicates--------------
SPY180625/SPY181700/SPY181701/SPY181708
------------replicates--------------
{'day': (0.143948073727683, 0.10495904833078384, 0.1684579700231552), 'week': (0.1643321235981451, 0.08790973573923111, 0.3173648715019226), 'month': (0.27238026017171485, 0.08790973573923111, 0.8021978735923767), 'year': (0.3406483257225056, 0.0512312687933445, 2.160412549972534), '5years': (0.28326905702523314, 0.020442044362425804, 2.160412549972534)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  20%|█████                    | 4/20 [00:24<01:35,  5.96s/it]

------------replicates--------------
SPY180629/SPY181145/SPY190597/SPY192300
------------replicates--------------
{'day': (0.14650312278335986, 0.10953126102685928, 0.1781977117061615), 'week': (0.1793566922719146, 0.1024145558476448, 0.33735331892967224), 'month': (0.3218301585534278, 0.1024145558476448, 0.8908647894859314), 'year': (0.40965885748467584, 0.0449051670730114, 2.7410783767700195), '5years': (0.33464995672031883, 0.01433512195944786, 2.7410783767700195)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  25%|██████▎                  | 5/20 [00:30<01:28,  5.93s/it]

------------replicates--------------
SPY180630/SPY180635/SPY181822/SPY181823
------------replicates--------------
{'day': (0.08129365927809644, 0.06514974683523178, 0.166176438331604), 'week': (0.10513171552727922, 0.06514974683523178, 0.25679343938827515), 'month': (0.11732555607480322, 0.06514974683523178, 0.25679343938827515), 'year': (0.2003313279650582, 0.03359855338931084, 1.6378918886184692), '5years': (0.20769580570962545, 0.02783365175127983, 3.4752748012542725)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  30%|███████▌                 | 6/20 [00:36<01:22,  5.87s/it]

------------replicates--------------
SPY180634/SPY180934/SPY181811/SPY181820
------------replicates--------------
{'day': (0.0637442987041596, 0.0637071430683136, 0.06379544734954834), 'week': (0.0824925073919399, 0.0637071430683136, 0.11400075256824493), 'month': (0.11440941628330445, 0.0637071430683136, 0.22827759385108948), 'year': (0.1827514305084315, 0.029737479984760284, 1.472585678100586), '5years': (0.18312658748674362, 0.029737479984760284, 2.083415985107422)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  35%|████████▊                | 7/20 [00:41<01:15,  5.78s/it]

------------replicates--------------
SPY180765/SPY180785/SPY181511/SPY181884/SPY181890/SPY181893/SPY181894/SPY181910
------------replicates--------------
{'day': (0.4409860842753508, 0.37382540106773376, 0.45488327741622925), 'week': (0.3649728611178591, 0.2412862926721573, 0.45488327741622925), 'month': (0.27626915202455954, 0.15723074972629547, 0.45488327741622925), 'year': (0.1800567877952407, 0.02767486497759819, 1.1560429334640503), '5years': (0.17878812886524686, 0.02767486497759819, 3.670457601547241)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  40%|██████████               | 8/20 [00:47<01:09,  5.76s/it]

------------replicates--------------
SPY180927/SPY180930
------------replicates--------------
{'day': (0.6820290160393356, 0.5909357070922852, 0.8469605445861816), 'week': (0.6973667539991262, 0.2944985628128052, 1.2790412902832031), 'month': (0.6784111361675828, 0.12874218821525574, 1.3767783641815186), 'year': (0.3723649590354914, 0.0509054996073246, 2.871854782104492), '5years': (0.47053450279197606, 0.04640711843967438, 5.414854526519775)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  45%|███████████▎             | 9/20 [00:52<01:02,  5.71s/it]

------------replicates--------------
SPY180932/SPY181152/SPY181154/SPY181158
------------replicates--------------
{'day': (0.08752318610198435, 0.07966101169586182, 0.08896530419588089), 'week': (0.0875666589741707, 0.065118208527565, 0.10748524963855743), 'month': (0.10732313319777316, 0.06251183152198792, 0.22221216559410095), 'year': (0.15135017938734652, 0.02396441251039505, 1.1154208183288574), '5years': (0.13836418905339423, 0.02396441251039505, 1.6913037300109863)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  50%|████████████            | 10/20 [00:58<00:56,  5.69s/it]

------------replicates--------------
SPY180933/SPY180935
------------replicates--------------
{'day': (1.0251717015837203, 0.7933692336082458, 1.1605799198150635), 'week': (1.024429585040404, 0.5186532735824585, 1.9022114276885986), 'month': (0.8640490246425284, 0.18601861596107483, 1.9022114276885986), 'year': (0.48128413508457124, 0.057316455990076065, 3.5908260345458984), '5years': (0.6114463350661249, 0.057316455990076065, 12.805350303649902)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  55%|█████████████▏          | 11/20 [01:04<00:51,  5.70s/it]

------------replicates--------------
SPY180936/SPY180937
------------replicates--------------
{'day': (1.6242952039706178, 1.5390106439590454, 1.651769757270813), 'week': (1.5960856908233878, 0.8311907052993774, 2.4715404510498047), 'month': (1.0824335142530155, 0.3959610164165497, 2.4715404510498047), 'year': (0.5887896991644038, 0.05140399560332298, 3.400531768798828), '5years': (0.7459895947200631, 0.05140399560332298, 8.777917861938477)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  60%|██████████████▍         | 12/20 [01:10<00:45,  5.73s/it]

------------replicates--------------
SPY181147/SPY181149/SPY181150/SPY181160
------------replicates--------------
{'day': (0.0737353573376694, 0.0717928409576416, 0.07753580063581467), 'week': (0.06715847906363429, 0.0553617998957634, 0.08070025593042374), 'month': (0.10611847769951675, 0.05301511287689209, 0.3100566267967224), 'year': (0.14033859327194517, 0.01997200772166252, 1.1281532049179077), '5years': (0.13209645569428738, 0.01997200772166252, 6.392641067504883)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  65%|███████████████▌        | 13/20 [01:15<00:39,  5.69s/it]

------------replicates--------------
SPY181148/SPY181156/SPY181157/SPY181159
------------replicates--------------
{'day': (0.131694957613945, 0.131694957613945, 0.131694957613945), 'week': (0.079471974500588, 0.0620441734790802, 0.131694957613945), 'month': (0.12728502849737802, 0.0620441734790802, 0.34316882491111755), 'year': (0.14732732126769954, 0.02322176657617092, 1.1109230518341064), '5years': (0.13270898503919518, 0.02322176657617092, 1.77898108959198)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  70%|████████████████▊       | 14/20 [01:21<00:34,  5.72s/it]

------------replicates--------------
SPY181153/SPY181812/SPY181818/SPY181819
------------replicates--------------
{'day': (0.10548677359690538, 0.1014319583773613, 0.1510164439678192), 'week': (0.09183098963713585, 0.0842970460653305, 0.1510164439678192), 'month': (0.10055327329396463, 0.06580230593681335, 0.15657129883766174), 'year': (0.16946335979998295, 0.03212473914027214, 1.3873748779296875), '5years': (0.15489835693438392, 0.03212473914027214, 1.9302968978881836)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  75%|██████████████████      | 15/20 [01:27<00:28,  5.79s/it]

------------replicates--------------
SPY181699/SPY181824/SPY181828/SPY192310
------------replicates--------------
{'day': (0.09457855629503577, 0.08332124352455139, 0.10706867277622223), 'week': (0.11016209978058826, 0.07392069697380066, 0.22254154086112976), 'month': (0.15565169525385658, 0.07392069697380066, 0.3342788815498352), 'year': (0.21601303599789426, 0.026480820029973984, 1.9134045839309692), '5years': (0.18716930121821684, 0.026480820029973984, 1.9134045839309692)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  80%|███████████████████▏    | 16/20 [01:33<00:23,  5.79s/it]

------------replicates--------------
SPY181702/SPY181705/SPY181707/SPY181713
------------replicates--------------
{'day': (0.19643147280754839, 0.19151578843593597, 0.20126061141490936), 'week': (0.2090251772661045, 0.1532139927148819, 0.3439825177192688), 'month': (0.20017020572891894, 0.10836302489042282, 0.3439825177192688), 'year': (0.33000640671989523, 0.005742328707128763, 2.4003312587738037), '5years': (0.337400201630093, 0.005742328707128763, 11.53050708770752)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  85%|████████████████████▍   | 17/20 [01:39<00:17,  5.79s/it]

------------replicates--------------
SPY181703/SPY181704/SPY181706/SPY181710
------------replicates--------------
{'day': (0.11675674140826812, 0.11663961410522461, 0.1169876828789711), 'week': (0.1331359526342161, 0.10470551997423172, 0.20119823515415192), 'month': (0.18971540942983225, 0.1015477180480957, 0.34692639112472534), 'year': (0.32826664629033286, 0.007849838584661484, 2.0642359256744385), '5years': (0.31761524591060486, 0.007849838584661484, 4.724327087402344)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  90%|█████████████████████▌  | 18/20 [01:45<00:11,  5.93s/it]

------------replicates--------------
SPY181709/SPY181711/SPY181712/SPY181714
------------replicates--------------
{'day': (0.1439992332309456, 0.12584328651428223, 0.19430223107337952), 'week': (0.18101171908432181, 0.11814986914396286, 0.3777979910373688), 'month': (0.18693118402115777, 0.10224533081054688, 0.3777979910373688), 'year': (0.3155863822259259, 0.006594486068934202, 2.265831708908081), '5years': (0.30878524600318813, 0.006594486068934202, 7.125894069671631)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes:  95%|██████████████████████▊ | 19/20 [01:51<00:05,  5.99s/it]

------------replicates--------------
SPY181808/SPY181813/SPY181816/SPY181826
------------replicates--------------
{'day': (0.12801810956785772, 0.10635823756456375, 0.15980233252048492), 'week': (0.10056857914424719, 0.08761268854141235, 0.15980233252048492), 'month': (0.10588328421672685, 0.0696224793791771, 0.1609726846218109), 'year': (0.1852359207190488, 0.033809613436460495, 1.8106924295425415), '5years': (0.16959241372515169, 0.033809613436460495, 1.8106924295425415)}
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0
valid_data.size > 0


Processing shapes: 100%|████████████████████████| 20/20 [01:57<00:00,  5.87s/it]

------------replicates--------------
SPY181809/SPY181815/SPY181821/SPY181825
------------replicates--------------
{'day': (0.1706240475177765, 0.1706240475177765, 0.1706240475177765), 'week': (0.11138479518038887, 0.08246105909347534, 0.1706240475177765), 'month': (0.11460076570510865, 0.07447385042905807, 0.1706240475177765), 'year': (0.2097957569544446, 0.025837991386651993, 1.8801963329315186), '5years': (0.19308581539028838, 0.025837991386651993, 1.8801963329315186)}





### Chlorophyll

In [35]:
# 17/10/2025 : Extract CHL from cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D 

# 1. Convert .shp to .geojson 
# Load the file with buffer for extraction
gdf = gpd.read_file("./data/processed_data/eDNA/mtdt_5.gpkg")

# Save as GeoJSON
geojson_path = "./data/processed_data/eDNA/mtdt_5.geojson"
gdf.to_file(geojson_path, driver="GeoJSON")

print(f"GeoJSON file saved to {geojson_path}")


GeoJSON file saved to ./data/processed_data/eDNA/mtdt_5.geojson


In [36]:
# 2. Make extraction (using Fct 4 and max buffer size = 0)

geojson_path="./data/processed_data/eDNA/mtdt_5.geojson"
netcdf_path="./data/raw_data/predictors/Chl/cmems_obs-oc_med_bgc-plankton_my_l4-gapfree-multi-1km_P1D_20130101-20250101.nc"
output_path="./data/processed_data/predictors/mtdt_5_CHL.csv"


process_geojson(
    geojson_path=geojson_path,
    netcdf_path=netcdf_path,
    output_path=output_path,
    variable="CHL"  
)


Processing shapes: 100%|██████████████████████| 788/788 [39:34<00:00,  3.01s/it]


### SST

In [8]:
#  Extract SST from SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc 

# 2. Make extraction (using Fct 4 and max buffer size = 0)

geojson_path="./data/processed_data/eDNA/mtdt_5.geojson"
netcdf_path="./data/raw_data/predictors/SST/SST_MED_SST_L4_NRT_OBSERVATIONS_010_004_c_V2_SST_20130101-20250101.nc"
output_path="./data/processed_data/predictors/mtdt_5_SST.csv"


process_geojson(
    geojson_path=geojson_path,
    netcdf_path=netcdf_path,
    output_path=output_path,
    variable="SST"  
)

NameError: name 'process_geojson' is not defined