## Extraction des variables température et salinité en surface et en profondeur à partir des données MARS 3D

#### 1. Extraction 24h en amont

a. En profondeur 

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import glob
import os
import re
from datetime import datetime, timedelta

""" Trouve tous les fichiers NetCDF dans la fenêtre temporelle donnée """
def get_netcdf_paths_for_period(dt, base_folder, hours=24):
    
    start_dt = dt - timedelta(hours=hours)
    files = []

    for year in range(start_dt.year, dt.year + 1):
        year_folder = os.path.join(base_folder, str(year))
        pattern = os.path.join(year_folder, "MARC_F2-MARS3D-MENOR1200_????????T????Z.nc")
        candidates = glob.glob(pattern)

        def extract_datetime_from_filename(f):
            match = re.search(r"_(\d{8}T\d{4})Z\.nc$", f)
            if not match:
                return None
            return datetime.strptime(match.group(1), "%Y%m%dT%H%M")

        for f in candidates:
            f_dt = extract_datetime_from_filename(f)
            if f_dt and start_dt <= f_dt <= dt:
                files.append((f, f_dt))

    files.sort(key=lambda x: x[1])
    if not files:
        raise FileNotFoundError(f"Aucun fichier trouvé entre {start_dt} et {dt}")

    return [f for f, _ in files]


"""
    Extraction TEMP/SAL pour un polygone et un fichier NetCDF.
    - Calcul de la couche exacte selon bathymétrie locale
    - Fallback sur 3 pixels les plus proches si aucun pixel intersecté
    - Moyenne pondérée par fraction de surface du polygone intersectant le pixel
    - Valeurs min max et mean pondérée en sortie 
    """

def get_temp_sal_for_poly(poly, depth_sampling_surface, ncdf_path):
    
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, "crs"):
        ds = ds.rio.write_crs("EPSG:4326")

# --- Définition des variables ---
# Définition de la couche de profondeur pour récupérer les valeurs à la profondeur d'échantillonage souhaitée
    bathy = ds["H0"]
    temp = ds["TEMP"]
    sal = ds["SAL"]

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(bathy.rio.crs).iloc[0]

    transform = bathy.rio.transform()
    height, width = bathy.shape

    coords = []
    bathy_vals = []
    weights = []

    # --- Pixels intersectés par le polygone + aire de l'intersection + profondeur par pixels ---
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            intersection = poly_proj.intersection(pixel_poly)
            if not intersection.is_empty:
                b_val = bathy.values[j, i]
                if not np.isnan(b_val) and b_val > 0:
                    coords.append((j, i))
                    bathy_vals.append(b_val)
                    frac = intersection.area / pixel_poly.area
                    weights.append(frac)

    # --- Fallback : 3 pixels les plus proches si aucun pixel intersecté ---
    if not bathy_vals:
        pixel_distances = []
        for j in range(height):
            for i in range(width):
                b_val = bathy.values[j, i]
                if np.isnan(b_val) or b_val <= 0:
                    continue
                x_c, y_c = transform * (i + 0.5, j + 0.5)
                dist = Point(x_c, y_c).distance(poly_proj)
                pixel_distances.append((dist, j, i, b_val))

        pixel_distances.sort(key=lambda x: x[0])
        closest = pixel_distances[:3]
        coords = [(j, i) for _, j, i, _ in closest]
        bathy_vals = [b for _, _, _, b in closest]
        weights = [1.0 for _ in closest]  # poids uniforme pour fallback

    # --- Calcul de la couche verticale selon bathymétrie locale ---
    layers_phys = [int(depth_sampling_surface / b * 60) for b in bathy_vals]
    layers_phys = [60 - l for l in layers_phys]
    layers_index = [max(0, min(l - 1, 59)) for l in layers_phys]

    # --- Extraction TEMP/SAL par pixel et moyenne pondérée ---
    temp_values, sal_values = [], []
    temp_weights, sal_weights = [], []

    for (j, i), l, w in zip(coords, layers_index, weights):
        t_val = temp.values[0, l, j, i]
        s_val = sal.values[0, l, j, i]
        if not np.isnan(t_val):
            temp_values.append(t_val)
            temp_weights.append(w)
        if not np.isnan(s_val):
            sal_values.append(s_val)
            sal_weights.append(w)

    ds.close()

    if not temp_values:
        return [], [], np.nan, np.nan

    temp_values = np.array(temp_values)
    sal_values = np.array(sal_values)
    temp_weights = np.array(temp_weights)
    sal_weights = np.array(sal_weights)

    temp_weighted_mean = np.nansum(temp_values * temp_weights) / np.nansum(temp_weights)
    sal_weighted_mean = np.nansum(sal_values * sal_weights) / np.nansum(sal_weights)

    return temp_values, sal_values, temp_weighted_mean, sal_weighted_mean

""" Lacement de la fonction """

def main():
    base_folder = # Chemin des fichiers netcdf 3 hours. Exemple : "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/SAL-TEMP_latlon/3H/"
    gdf = gpd.read_file() # Chemin du fichier de polygone. Exemple : "/home/paulinev/Bureau/Mars3D/sal_temp/adne_extract_med_ouest.geojson"

# Initiation des valeurs à enregistrer 
    min_temp, max_temp, mean_temp = [], [], []
    min_sal, max_sal, mean_sal = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction Temp & Sal sur 24h"):
        dt = row['datetime'] # à modifier selon la colonne datetime disponible dans votre dataset  
        try:
            files = get_netcdf_paths_for_period(dt, base_folder, hours=24)
            all_temp_values, all_sal_values = [], []
            weighted_temp_means, weighted_sal_means = [], []

            for f in files:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(
                    row['geometry'], row['depth_sampling_surface'], f
                ) # à modifier selon le nom de la colonne de profondeur de sampling  
                all_temp_values.extend(t_vals)
                all_sal_values.extend(s_vals)
                if not np.isnan(t_wmean):
                    weighted_temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    weighted_sal_means.append(s_wmean)

            if all_temp_values:
                min_temp.append(np.nanmin(all_temp_values))
                max_temp.append(np.nanmax(all_temp_values))
                mean_temp.append(np.nanmean(weighted_temp_means))
            else:
                min_temp.append(np.nan)
                max_temp.append(np.nan)
                mean_temp.append(np.nan)

            if all_sal_values:
                min_sal.append(np.nanmin(all_sal_values))
                max_sal.append(np.nanmax(all_sal_values))
                mean_sal.append(np.nanmean(weighted_sal_means))
            else:
                min_sal.append(np.nan)
                max_sal.append(np.nan)
                mean_sal.append(np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            min_temp.append(np.nan)
            max_temp.append(np.nan)
            mean_temp.append(np.nan)
            min_sal.append(np.nan)
            max_sal.append(np.nan)
            mean_sal.append(np.nan)

# Inférence des valeurs aux colonnes 
    gdf['temp_min_24h'] = min_temp
    gdf['temp_max_24h'] = max_temp
    gdf['temp_mean_24h'] = mean_temp
    gdf['sal_min_24h'] = min_sal
    gdf['sal_max_24h'] = max_sal
    gdf['sal_mean_24h'] = mean_sal

# Enregistrement du fichier 
    gdf.to_file("adne_extract_med_ouest.geojson", driver="GeoJSON") # modifier le nom de sortie 


if __name__ == "__main__":
    main()


b. En surface 

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import box
import glob
import os
import re
from datetime import datetime, timedelta
from scipy.spatial import cKDTree

def get_netcdf_paths_for_period(dt, base_folder, hours=24):
    start_dt = dt - timedelta(hours=hours)
    files = []

    for year in range(start_dt.year, dt.year + 1):
        year_folder = os.path.join(base_folder, str(year))
        pattern = os.path.join(year_folder, "MARC_F2-MARS3D-MENOR1200_????????T????Z.nc")
        candidates = glob.glob(pattern)

        def extract_datetime_from_filename(f):
            match = re.search(r"_(\d{8}T\d{4})Z\.nc$", f)
            if not match:
                return None
            return datetime.strptime(match.group(1), "%Y%m%dT%H%M")

        for f in candidates:
            f_dt = extract_datetime_from_filename(f)
            if f_dt and start_dt <= f_dt <= dt:
                files.append((f, f_dt))

    files.sort(key=lambda x: x[1])
    if not files:
        raise FileNotFoundError(f"Aucun fichier trouvé entre {start_dt} et {dt}")

    return [f for f, _ in files]


def get_ws_vel_for_poly(poly, ncdf_path, depth_index=0):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, "crs"):
        ds = ds.rio.write_crs("EPSG:4326")

    ws = ds["TEMP"]
    vel = ds["SAL"]

    # Clip raster avec polygone (vectorisé, rapide)
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    # Extraction des valeurs non-NaN
    ws_vals = ws_clip.values[0].flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]

    vel_vals = vel_clip.values[0, depth_index].flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # Fallback si aucun pixel intersecté
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        # Récupère les centres de pixels non-NaN pour fallback
        transform = ws.rio.transform()
        height, width = ws.shape[1:]
        xs = np.arange(width) + 0.5
        ys = np.arange(height) + 0.5
        xv, yv = np.meshgrid(xs, ys)
        x_coords, y_coords = transform * (xv, yv)

        # TEMP fallback
        if len(ws_vals) == 0:
            ws_all = ws.values[0]
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([x_coords[valid_idx], y_coords[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        # SAL fallback
        if len(vel_vals) == 0:
            vel_all = vel.values[0, depth_index]
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([x_coords[valid_idx], y_coords[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ds.close()

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    return ws_vals, vel_vals, ws_mean, vel_mean


def main():
    base_folder = # "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/3H"
    gdf = gpd.read_file() # à modifier 

    ws_min, ws_max, ws_mean = [], [], []
    vel_min, vel_max, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction WINDSTRESS & VELOCITY sur 24h"):
        dt = row["datetime"] # à modifier 
        try:
            files = get_netcdf_paths_for_period(dt, base_folder, hours=24)
            all_ws, all_vel = [], []
            weighted_ws, weighted_vel = [], []

            for f in files:
                ws_vals, vel_vals, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
                if not np.isnan(ws_wmean):
                    weighted_ws.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    weighted_vel.append(vel_wmean)

            ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
            ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
            ws_mean.append(np.nanmean(weighted_ws) if weighted_ws else np.nan)

            vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)
            vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)
            vel_mean.append(np.nanmean(weighted_vel) if weighted_vel else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            ws_min.append(np.nan)
            ws_max.append(np.nan)
            ws_mean.append(np.nan)
            vel_min.append(np.nan)
            vel_max.append(np.nan)
            vel_mean.append(np.nan)

    gdf["temp_min_24h"] = ws_min
    gdf["temp_max_24h"] = ws_max
    gdf["temp_mean_24h"] = ws_mean
    gdf["sal_min_24h"] = vel_min
    gdf["sal_max_24h"] = vel_max
    gdf["sal_mean_24h"] = vel_mean

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON") # à modifier 


if __name__ == "__main__":
    main()


#### 2. Extraction 7 jours en amont

a. En profondeur

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import os
from datetime import timedelta


def get_netcdf_paths_for_period(dt, base_folder, days, stat_type):
    """Retourne la liste des fichiers journaliers pour les `days` jours avant dt (inclus)."""
    start_dt = dt - timedelta(days=days)
    files = []

    for day in (start_dt + timedelta(n) for n in range((dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(
            f"Aucun fichier {stat_type} trouvé entre {start_dt} et {dt}"
        )

    return files


def get_temp_sal_for_poly(poly, depth_sampling_surface, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, "crs"):
        ds = ds.rio.write_crs("EPSG:4326")

    bathy = ds["H0"]
    temp = ds["TEMP"]
    sal = ds["SAL"]

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(bathy.rio.crs).iloc[0]

    transform = bathy.rio.transform()
    height, width = bathy.shape

    coords = []
    bathy_vals = []
    weights = []

    # --- Pixels intersectés par le polygone ---
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            intersection = poly_proj.intersection(pixel_poly)
            if not intersection.is_empty:
                b_val = bathy.values[j, i]
                if not np.isnan(b_val) and b_val > 0:
                    coords.append((j, i))
                    bathy_vals.append(b_val)
                    weights.append(intersection.area / pixel_poly.area)

    # --- Fallback : 3 pixels les plus proches si aucun pixel intersecté ---
    if not bathy_vals:
        pixel_distances = []
        for j in range(height):
            for i in range(width):
                b_val = bathy.values[j, i]
                if np.isnan(b_val) or b_val <= 0:
                    continue
                x_c, y_c = transform * (i + 0.5, j + 0.5)
                dist = Point(x_c, y_c).distance(poly_proj)
                pixel_distances.append((dist, j, i, b_val))

        pixel_distances.sort(key=lambda x: x[0])
        closest = pixel_distances[:3]
        coords = [(j, i) for _, j, i, _ in closest]
        bathy_vals = [b for _, _, _, b in closest]
        weights = [1.0 for _ in closest]

    # --- Calcul de la couche verticale ---
    layers_phys = [int(depth_sampling_surface / b * 60) for b in bathy_vals]
    layers_phys = [60 - l for l in layers_phys]
    layers_index = [max(0, min(l - 1, 59)) for l in layers_phys]

    # --- Extraction TEMP/SAL par pixel ---
    temp_values, sal_values = [], []
    for (j, i), l in zip(coords, layers_index):
        if temp.ndim == 4:  # ancien format avec time
            t_val = temp.values[0, l, j, i]
            s_val = sal.values[0, l, j, i]
        else:  # format journalier sans time
            t_val = temp.values[l, j, i]
            s_val = sal.values[l, j, i]

        if not np.isnan(t_val):
            temp_values.append(t_val)
        if not np.isnan(s_val):
            sal_values.append(s_val)

    ds.close()

    if not temp_values:
        return [], [], np.nan, np.nan

    temp_values = np.array(temp_values)
    sal_values = np.array(sal_values)
    weights_arr = np.array(weights)

    temp_weighted_mean = np.nansum(temp_values * weights_arr) / np.nansum(weights_arr)
    sal_weighted_mean = np.nansum(sal_values * weights_arr) / np.nansum(weights_arr)

    return temp_values, sal_values, temp_weighted_mean, sal_weighted_mean


def main():
    base_folder = # "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/SAL-TEMP_latlon/Daily/Med-Ouest"
    gdf = gpd.read_file() # Nom de la couche de polygones 

    temp_max7, temp_min7, temp_mean7 = [], [], []
    sal_max7, sal_min7, sal_mean7 = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 7 jours"):
        dt = row['date'] # Modifier si besoin 

        try:
            # --- MAX du max ---
            files_max = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="max")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_max:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f)# Modifier si besoin 
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_max7.append(np.nanmax(all_temp) if all_temp else np.nan)
            sal_max7.append(np.nanmax(all_sal) if all_sal else np.nan)

            # --- MIN du min ---
            files_min = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="min")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_min:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # Modifier si besoin 
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_min7.append(np.nanmin(all_temp) if all_temp else np.nan)
            sal_min7.append(np.nanmin(all_sal) if all_sal else np.nan)

            # --- MOYENNE des mean ---
            files_mean = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="mean")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_mean:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # Modifier si besoin 
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_mean7.append(np.nanmean(temp_means) if temp_means else np.nan)
            sal_mean7.append(np.nanmean(sal_means) if sal_means else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            temp_max7.append(np.nan)
            temp_min7.append(np.nan)
            temp_mean7.append(np.nan)
            sal_max7.append(np.nan)
            sal_min7.append(np.nan)
            sal_mean7.append(np.nan)

    gdf['temp_max_7j'] = temp_max7
    gdf['temp_min_7j'] = temp_min7
    gdf['temp_mean_7j'] = temp_mean7
    gdf['sal_max_7j'] = sal_max7
    gdf['sal_min_7j'] = sal_min7
    gdf['sal_mean_7j'] = sal_mean7

    gdf.to_file("grille_med_ouest.geojson", driver="GeoJSON") # Modifier si besoin 


if __name__ == "__main__":
    main()


b. En surface 

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
import os
from scipy.spatial import cKDTree

def get_netcdf_paths_for_period(dt, base_folder, days, stat_type):
    start_dt = dt - timedelta(days=days)
    files = []
    for day in (start_dt + timedelta(n) for n in range((dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    if not files:
        raise FileNotFoundError(f"Aucun fichier {stat_type} trouvé entre {start_dt} et {dt}")
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")

    ws = ds["TEMP"].isel(time=0, level=-1)# à modifier 
    vel = ds["SAL"].isel(time=0, level=-1)# à modifier 

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    # --- Clip vectorisé ---
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # --- Fallback si aucun pixel intersecté ---
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)
        
        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    ds.close()
    return ws_vals, vel_vals, ws_mean, vel_mean


def main():
    base_folder = #"/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Corse2"
    gdf = gpd.read_file() # à modifier 

    ws_max7, ws_min7, ws_mean7 = [], [], []
    vel_max7, vel_min7, vel_mean7 = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 7 jours WS/VEL pondérée"):
        dt = row["date"]
        try:
            # MAX sur 7 jours
            files_max = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="max")
            all_ws, all_vel, ws_wmean_list, vel_wmean_list = [], [], [], []
            for f in files_max:
                ws_vals, vel_vals, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
                if not np.isnan(ws_wmean):
                    ws_wmean_list.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_list.append(vel_wmean)
            ws_max7.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max7.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN sur 7 jours
            files_min = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min7.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min7.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE pondérée sur 7 jours
            files_mean = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="mean")
            ws_wmean_all, vel_wmean_all = [], []
            for f in files_mean:
                _, _, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                if not np.isnan(ws_wmean):
                    ws_wmean_all.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_all.append(vel_wmean)
            ws_mean7.append(np.nanmean(ws_wmean_all) if ws_wmean_all else np.nan)
            vel_mean7.append(np.nanmean(vel_wmean_all) if vel_wmean_all else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichiers manquants pour {dt}: {e}")
            ws_max7.append(np.nan)
            ws_min7.append(np.nan)
            ws_mean7.append(np.nan)
            vel_max7.append(np.nan)
            vel_min7.append(np.nan)
            vel_mean7.append(np.nan)

    gdf["temp_max_7j"] = ws_max7
    gdf["temp_min_7j"] = ws_min7
    gdf["temp_mean_7j"] = ws_mean7
    gdf["sal_max_7j"] = vel_max7
    gdf["sal_min_7j"] = vel_min7
    gdf["sal_mean_7j"] = vel_mean7

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON")# à modifier 


if __name__ == "__main__":
    main()


#### 3. Extraction 1 mois en amont (en utilisant les données daily)

a. En profondeur

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import os
from datetime import timedelta
from dateutil.relativedelta import relativedelta


def get_netcdf_paths_for_last_month(dt, base_folder, stat_type):
    """
    Retourne la liste des fichiers journaliers pour un mois glissant
    allant de (dt - 1 mois) à dt inclus.
    """
    start_dt = dt - relativedelta(months=1)
    end_dt = dt

    files = []
    for day in (start_dt + timedelta(n) for n in range((end_dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(
            f"Aucun fichier {stat_type} trouvé entre {start_dt} et {end_dt}"
        )

    return files


def get_temp_sal_for_poly(poly, depth_sampling_surface, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, 'crs'):
        ds = ds.rio.write_crs("EPSG:4326")

    bathy = ds['H0']
    temp = ds['TEMP']
    sal = ds['SAL']

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(bathy.rio.crs).iloc[0]

    transform = bathy.rio.transform()
    height, width = bathy.shape

    coords = []
    bathy_vals = []
    weights = []

    # --- Pixels intersectés par le polygone ---
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            intersection = poly_proj.intersection(pixel_poly)
            if not intersection.is_empty:
                b_val = bathy.values[j, i]
                if not np.isnan(b_val) and b_val > 0:
                    coords.append((j, i))
                    bathy_vals.append(b_val)
                    # fraction de surface intersectée
                    weights.append(intersection.area / pixel_poly.area)

    # --- Fallback : 3 pixels les plus proches si aucun pixel intersecté ---
    if not bathy_vals:
        pixel_distances = []
        for j in range(height):
            for i in range(width):
                b_val = bathy.values[j, i]
                if np.isnan(b_val) or b_val <= 0:
                    continue
                x_c, y_c = transform * (i + 0.5, j + 0.5)
                dist = Point(x_c, y_c).distance(poly_proj)
                pixel_distances.append((dist, j, i, b_val))

        pixel_distances.sort(key=lambda x: x[0])
        closest = pixel_distances[:3]
        coords = [(j, i) for _, j, i, _ in closest]
        bathy_vals = [b for _, _, _, b in closest]
        weights = [1.0 for _ in closest]

    # --- Calcul de la couche verticale ---
    layers_phys = [int(depth_sampling_surface / b * 60) for b in bathy_vals]
    layers_phys = [60 - l for l in layers_phys]
    layers_index = [max(0, min(l - 1, 59)) for l in layers_phys]

    # --- Extraction TEMP/SAL par pixel et moyenne pondérée ---
    temp_values, sal_values = [], []
    for (j, i), l in zip(coords, layers_index):
        if temp.ndim == 4:
            t_val = temp.values[0, l, j, i]
            s_val = sal.values[0, l, j, i]
        else:
            t_val = temp.values[l, j, i]
            s_val = sal.values[l, j, i]
        if not np.isnan(t_val):
            temp_values.append(t_val)
        if not np.isnan(s_val):
            sal_values.append(s_val)

    ds.close()

    if not temp_values:
        return [], [], np.nan, np.nan

    temp_values = np.array(temp_values)
    sal_values = np.array(sal_values)
    weights_arr = np.array(weights)

    temp_weighted_mean = np.nansum(temp_values * weights_arr) / np.nansum(weights_arr)
    sal_weighted_mean = np.nansum(sal_values * weights_arr) / np.nansum(weights_arr)

    return temp_values, sal_values, temp_weighted_mean, sal_weighted_mean


def main():
    base_folder = # "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/SAL-TEMP_latlon/Daily/Med-Ouest"
    gdf = gpd.read_file() # Fichier de polygones 

    temp_max1m, temp_min1m, temp_mean1m = [], [], []
    sal_max1m, sal_min1m, sal_mean1m = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 mois glissant"):
        dt = row['date'] # a modifier si besoin

        try:
            # --- MAX du max ---
            files_max = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="max")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_max:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # a modifier si besoin
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_max1m.append(np.nanmax(all_temp) if all_temp else np.nan)
            sal_max1m.append(np.nanmax(all_sal) if all_sal else np.nan)

            # --- MIN du min ---
            files_min = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="min")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_min:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # a modifier si besoin
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_min1m.append(np.nanmin(all_temp) if all_temp else np.nan)
            sal_min1m.append(np.nanmin(all_sal) if all_sal else np.nan)

            # --- MOYENNE des mean ---
            files_mean = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="mean")
            all_temp, all_sal, temp_means, sal_means = [], [], [], []
            for f in files_mean:
                t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # a modifier si besoin
                all_temp.extend(t_vals)
                all_sal.extend(s_vals)
                if not np.isnan(t_wmean):
                    temp_means.append(t_wmean)
                if not np.isnan(s_wmean):
                    sal_means.append(s_wmean)
            temp_mean1m.append(np.nanmean(temp_means) if temp_means else np.nan)
            sal_mean1m.append(np.nanmean(sal_means) if sal_means else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            temp_max1m.append(np.nan)
            temp_min1m.append(np.nan)
            temp_mean1m.append(np.nan)
            sal_max1m.append(np.nan)
            sal_min1m.append(np.nan)
            sal_mean1m.append(np.nan)

    gdf['temp_max_1m'] = temp_max1m
    gdf['temp_min_1m'] = temp_min1m
    gdf['temp_mean_1m'] = temp_mean1m
    gdf['sal_max_1m'] = sal_max1m
    gdf['sal_min_1m'] = sal_min1m
    gdf['sal_mean_1m'] = sal_mean1m

    gdf.to_file("grille_med_ouest.geojson", driver="GeoJSON") # a modifier si besoin


if __name__ == "__main__":
    main()


b. En surface

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import os
from scipy.spatial import cKDTree

def get_netcdf_paths_for_last_month(dt, base_folder, stat_type):
    start_dt = dt - relativedelta(months=1)
    end_dt = dt

    files = []
    for day in (start_dt + timedelta(n) for n in range((end_dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(f"Aucun fichier {stat_type} trouvé entre {start_dt} et {end_dt}")
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    ws = ds["TEMP"].isel(time=0, level=-1) # à modifier selon les couches de netcdf notamment si pas la couche time enlever time=0
    vel = ds["SAL"].isel(time=0, level=-1) # à modifier selon les couches de netcdf notamment si pas la couche time enlever time=0

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    # Clip vectorisé
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # Fallback si aucun pixel intersecté
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)

        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    ds.close()
    return ws_vals, vel_vals, ws_mean, vel_mean

def main():
    base_folder = # "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/SAL-TEMP_latlon/Daily/Med-Ouest"
    gdf = gpd.read_file() # à modifier 

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 mois WS/VEL pondérée"):
        dt = row["date"]# à modifier 
        try:
            # MAX
            files_max = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="max")
            all_ws, all_vel = [], []
            for f in files_max:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN
            files_min = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE pondérée
            files_mean = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="mean")
            ws_wmean_all, vel_wmean_all = [], []
            for f in files_mean:
                _, _, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                if not np.isnan(ws_wmean):
                    ws_wmean_all.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_all.append(vel_wmean)
            ws_mean.append(np.nanmean(ws_wmean_all) if ws_wmean_all else np.nan)
            vel_mean.append(np.nanmean(vel_wmean_all) if vel_wmean_all else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf["temp_max_1m"] = ws_max
    gdf["temp_min_1m"] = ws_min
    gdf["temp_mean_1m"] = ws_mean
    gdf["sal_max_1m"] = vel_max
    gdf["sal_min_1m"] = vel_min
    gdf["sal_mean_1m"] = vel_mean

    gdf.to_file("grille_med_ouest.geojson", driver="GeoJSON")# à modifier 

if __name__ == "__main__":
    main()


#### 4. Extraction 1 an en amont (en utilisant les données monthly et daily)

a. En profondeur

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import os
from datetime import timedelta
import pandas as pd

# -----------------------------
# Fonctions utilitaires
# -----------------------------

def get_monthly_paths(dt_start, dt_end, base_folder, stat_type):
    """Retourne les fichiers mensuels (min/max/mean) entre dt_start et le mois précédent dt_end."""
    files = []
    months = pd.date_range(start=dt_start, end=dt_end, freq='MS')  # Month Start
    for month in months[:-1]:  # tous les mois sauf le dernier
        year_folder = os.path.join(base_folder, str(month.year))
        fname = f"MARS3D_{month.strftime('%Y%m')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_daily_paths(dt_start, dt_end, base_folder, stat_type):
    """Retourne les fichiers journaliers (min/max/mean) entre dt_start et dt_end."""
    files = []
    for day in (dt_start + timedelta(n) for n in range((dt_end - dt_start).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_temp_sal_for_poly(poly, depth_sampling_surface, ncdf_path):
    
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, 'crs'):
        ds = ds.rio.write_crs("EPSG:4326")

    bathy = ds['H0']
    temp = ds['TEMP']
    sal = ds['SAL']

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(bathy.rio.crs).iloc[0]

    transform = bathy.rio.transform()
    height, width = bathy.shape

    coords = []
    bathy_vals = []
    weights = []

    # --- Pixels intersectés par le polygone ---
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            intersection = poly_proj.intersection(pixel_poly)
            if not intersection.is_empty:
                b_val = bathy.values[j, i]
                if not np.isnan(b_val) and b_val > 0:
                    coords.append((j, i))
                    bathy_vals.append(b_val)
                    weights.append(intersection.area / pixel_poly.area)

    # --- Fallback : 3 pixels les plus proches si aucun pixel intersecté ---
    if not bathy_vals:
        pixel_distances = []
        for j in range(height):
            for i in range(width):
                b_val = bathy.values[j, i]
                if np.isnan(b_val) or b_val <= 0:
                    continue
                x_c, y_c = transform * (i + 0.5, j + 0.5)
                dist = Point(x_c, y_c).distance(poly_proj)
                pixel_distances.append((dist, j, i, b_val))

        pixel_distances.sort(key=lambda x: x[0])
        closest = pixel_distances[:3]
        coords = [(j, i) for _, j, i, _ in closest]
        bathy_vals = [b for _, _, _, b in closest]
        weights = [1.0 for _ in closest]

    # --- Calcul de la couche verticale ---
    layers_phys = [int(depth_sampling_surface / b * 60) for b in bathy_vals]
    layers_phys = [60 - l for l in layers_phys]
    layers_index = [max(0, min(l - 1, 59)) for l in layers_phys]

    # --- Extraction TEMP/SAL par pixel et moyenne pondérée ---
    temp_values, sal_values = [], []
    for (j, i), l in zip(coords, layers_index):
        if temp.ndim == 4:
            t_val = temp.values[0, l, j, i]
            s_val = sal.values[0, l, j, i]
        else:
            t_val = temp.values[l, j, i]
            s_val = sal.values[l, j, i]
        if not np.isnan(t_val):
            temp_values.append(t_val)
        if not np.isnan(s_val):
            sal_values.append(s_val)

    ds.close()

    if not temp_values:
        return [], [], np.nan, np.nan

    temp_values = np.array(temp_values)
    sal_values = np.array(sal_values)
    weights_arr = np.array(weights)

    temp_weighted_mean = np.nansum(temp_values * weights_arr) / np.nansum(weights_arr)
    sal_weighted_mean = np.nansum(sal_values * weights_arr) / np.nansum(weights_arr)

    return temp_values, sal_values, temp_weighted_mean, sal_weighted_mean

# -----------------------------
# Main
# -----------------------------

def main():
    daily_base_folder = # Fichiers netcdf daily 
    monthly_base_folder = # Fichiers netcdf monthly 
    gdf = gpd.read_file() # à modifier 

    temp_max1y, temp_min1y, temp_mean1y = [], [], []
    sal_max1y, sal_min1y, sal_mean1y = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 an pondérée"):
        dt = row['date']
        dt_start = dt - timedelta(days=365)
        last_month_start = dt.replace(day=1)

        try:
            for stat in ['max', 'min', 'mean']:
                # Fichiers mensuels sauf dernier mois
                files_monthly = get_monthly_paths(dt_start, last_month_start, monthly_base_folder, stat)
                # Fichiers journaliers du dernier mois
                files_daily = get_daily_paths(last_month_start, dt, daily_base_folder, stat)
                files = files_monthly + files_daily

                all_temp, all_sal, temp_means, sal_means = [], [], [], []
                for f in files:
                    t_vals, s_vals, t_wmean, s_wmean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f) # à modifier 
                    all_temp.extend(t_vals)
                    all_sal.extend(s_vals)
                    if not np.isnan(t_wmean):
                        temp_means.append(t_wmean)
                    if not np.isnan(s_wmean):
                        sal_means.append(s_wmean)

                if stat == 'max':
                    temp_max1y.append(np.nanmax(all_temp) if all_temp else np.nan)
                    sal_max1y.append(np.nanmax(all_sal) if all_sal else np.nan)
                elif stat == 'min':
                    temp_min1y.append(np.nanmin(all_temp) if all_temp else np.nan)
                    sal_min1y.append(np.nanmin(all_sal) if all_sal else np.nan)
                elif stat == 'mean':
                    temp_mean1y.append(np.nanmean(temp_means) if temp_means else np.nan)
                    sal_mean1y.append(np.nanmean(sal_means) if sal_means else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            temp_max1y.append(np.nan)
            temp_min1y.append(np.nan)
            temp_mean1y.append(np.nan)
            sal_max1y.append(np.nan)
            sal_min1y.append(np.nan)
            sal_mean1y.append(np.nan)

    gdf['temp_max_1y'] = temp_max1y
    gdf['temp_min_1y'] = temp_min1y
    gdf['temp_mean_1y'] = temp_mean1y
    gdf['sal_max_1y'] = sal_max1y
    gdf['sal_min_1y'] = sal_min1y
    gdf['sal_mean_1y'] = sal_mean1y

    gdf.to_file("adne_extract_med_ouest.geojson", driver="GeoJSON") # à modifier 


if __name__ == "__main__":
    main()


b. En surface 

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
import os
from scipy.spatial import cKDTree
import pandas as pd

# ===============================================================
# FONCTIONS UTILITAIRES
# ===============================================================

def get_monthly_paths(dt_start, dt_end, base_folder, stat_type):
    months = pd.date_range(start=dt_start, end=dt_end, freq='MS')[:-1]
    files = []
    for month in months:
        year_folder = os.path.join(base_folder, str(month.year))
        fname = f"MARS3D_{month.strftime('%Y%m')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_daily_paths(dt_start, dt_end, base_folder, stat_type):
    files = []
    for day in (dt_start + timedelta(n) for n in range((dt_end - dt_start).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    ws = ds['TEMP'].isel(time=0, level=-1) # à modifier 
    vel = ds['SAL'].isel(time=0, level=-1) # à modifier 

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    # --- Clip vectorisé ---
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # --- Fallback 3 pixels les plus proches si aucun pixel ---
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)

        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ds.close()
    return ws_vals, vel_vals

# ===============================================================
# SCRIPT PRINCIPAL
# ===============================================================

def main():
    daily_base_folder = #"/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Corse2"
    monthly_base_folder = #"/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Monthly/Corse2"

    gdf = gpd.read_file() # à modifier 

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 an WS/VEL pondérée"):
        dt = row['date']
        dt_start = dt - timedelta(days=365)
        last_month_start = dt.replace(day=1)

        try:
            for stat in ['max', 'min', 'mean']:
                files_monthly = get_monthly_paths(dt_start, last_month_start, monthly_base_folder, stat)
                files_daily = get_daily_paths(last_month_start, dt, daily_base_folder, stat)
                files = files_monthly + files_daily

                all_ws, all_vel = [], []
                for f in files:
                    ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                    all_ws.extend(ws_vals)
                    all_vel.extend(vel_vals)

                if stat == 'max':
                    ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
                    vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)
                elif stat == 'min':
                    ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
                    vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)
                elif stat == 'mean':
                    ws_mean.append(np.nanmean(all_ws) if all_ws else np.nan)
                    vel_mean.append(np.nanmean(all_vel) if all_vel else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt} : {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf['temp_max_1y'] = ws_max
    gdf['temp_min_1y'] = ws_min
    gdf['temp_mean_1y'] = ws_mean
    gdf['sal_max_1y'] = vel_max
    gdf['sal_min_1y'] = vel_min
    gdf['sal_mean_1y'] = vel_mean

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON") # à modifier 

# ===============================================================
if __name__ == "__main__":
    main()


#### 5. Extraction 1 mois en amont (en utilisant les données monthly)

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import os
from datetime import timedelta
from dateutil.relativedelta import relativedelta


# -------------------------------------------------------------
# 1. Récupération du fichier mensuel du mois précédent
# -------------------------------------------------------------
def get_monthly_netcdf_path(dt, base_folder, stat_type):
    """
    Retourne le fichier mensuel correspondant au mois précédent.
    Exemple : dt = 2019-07-01 → récupère fichier de juin 2019.
    """
    prev_month = dt - relativedelta(months=1)
    yyyymm = prev_month.strftime("%Y%m")

    year_folder = os.path.join(base_folder, str(prev_month.year))
    fname = f"MARS3D_{yyyymm}_{stat_type}.nc"
    fpath = os.path.join(year_folder, fname)

    if not os.path.exists(fpath):
        raise FileNotFoundError(f"Fichier introuvable : {fpath}")

    return fpath


# -------------------------------------------------------------
# 2. Extraction TEMP/SAL pour un polygone et un NetCDF mensuel
# -------------------------------------------------------------
def get_temp_sal_for_poly(poly, depth_sampling_surface, ncdf_path):
    """
    Extraction TEMP/SAL pour un polygone et un fichier NetCDF mensuel.
    Moyenne pondérée par fraction de pixel intersecté.
    """
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, 'crs'):
        ds = ds.rio.write_crs("EPSG:4326")

    bathy = ds['H0']
    temp = ds['TEMP']
    sal = ds['SAL']

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(bathy.rio.crs).iloc[0]

    transform = bathy.rio.transform()
    height, width = bathy.shape

    coords = []
    bathy_vals = []
    weights = []

    # ---- Recherche des pixels intersectés ----
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)

            intersection = poly_proj.intersection(pixel_poly)
            if not intersection.is_empty:
                b_val = bathy.values[j, i]
                if not np.isnan(b_val) and b_val > 0:
                    coords.append((j, i))
                    bathy_vals.append(b_val)
                    weights.append(intersection.area / pixel_poly.area)

    # ---- Fallback si aucun pixel intersecté : prendre les 3 plus proches ----
    if not bathy_vals:
        pixel_distances = []
        for j in range(height):
            for i in range(width):
                b_val = bathy.values[j, i]
                if np.isnan(b_val) or b_val <= 0:
                    continue

                x_c, y_c = transform * (i + 0.5, j + 0.5)
                dist = Point(x_c, y_c).distance(poly_proj)
                pixel_distances.append((dist, j, i, b_val))

        pixel_distances.sort(key=lambda x: x[0])
        closest = pixel_distances[:3]
        coords = [(j, i) for _, j, i, _ in closest]
        bathy_vals = [b for _, _, _, b in closest]
        weights = [1.0] * len(coords)

    # ---- Détermination de la couche verticale ----
    layers_phys = [int(depth_sampling_surface / b * 60) for b in bathy_vals]
    layers_phys = [60 - l for l in layers_phys]
    layers_index = [max(0, min(l - 1, 59)) for l in layers_phys]

    # ---- Extraction TEMP/SAL pixel par pixel ----
    temp_values, sal_values = [], []
    for (j, i), l in zip(coords, layers_index):
        if temp.ndim == 4:
            t_val = temp.values[0, l, j, i]
            s_val = sal.values[0, l, j, i]
        else:
            t_val = temp.values[l, j, i]
            s_val = sal.values[l, j, i]

        if not np.isnan(t_val):
            temp_values.append(t_val)
        if not np.isnan(s_val):
            sal_values.append(s_val)

    ds.close()

    if not temp_values:
        return [], [], np.nan, np.nan

    temp_values = np.array(temp_values)
    sal_values = np.array(sal_values)
    weights = np.array(weights)

    temp_mean = np.nansum(temp_values * weights) / np.nansum(weights)
    sal_mean = np.nansum(sal_values * weights) / np.nansum(weights)

    return temp_values, sal_values, temp_mean, sal_mean


# -------------------------------------------------------------
# 3. Programme principal adapté aux fichiers mensuels
# -------------------------------------------------------------
def main():
    base_folder = # à modifier "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/SAL-TEMP_latlon/Monthly/Med-Ouest" 
    gdf = gpd.read_file() # fichier de polygones 

    temp_max1m, temp_min1m, temp_mean1m = [], [], []
    sal_max1m, sal_min1m, sal_mean1m = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction mensuelle"):
        dt = row['date']

        try:
            # ---- MAX ----
            f_max = get_monthly_netcdf_path(dt, base_folder, "max")
            t_vals, s_vals, _, _ = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f_max) # à modifier 
            temp_max1m.append(np.nanmax(t_vals) if len(t_vals) else np.nan)
            sal_max1m.append(np.nanmax(s_vals) if len(s_vals) else np.nan)

            # ---- MIN ----
            f_min = get_monthly_netcdf_path(dt, base_folder, "min")
            t_vals, s_vals, _, _ = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f_min) # à modifier 
            temp_min1m.append(np.nanmin(t_vals) if len(t_vals) else np.nan)
            sal_min1m.append(np.nanmin(s_vals) if len(s_vals) else np.nan)

            # ---- MEAN ----
            f_mean = get_monthly_netcdf_path(dt, base_folder, "mean")
            _, _, t_mean, s_mean = get_temp_sal_for_poly(row['geometry'], row['depth_sampling_surface'], f_mean) # à modifier 
            temp_mean1m.append(t_mean)
            sal_mean1m.append(s_mean)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            temp_max1m.append(np.nan)
            temp_min1m.append(np.nan)
            temp_mean1m.append(np.nan)
            sal_max1m.append(np.nan)
            sal_min1m.append(np.nan)
            sal_mean1m.append(np.nan)

    # ---- Sauvegarde ----
    gdf['temp_max'] = temp_max1m
    gdf['temp_min'] = temp_min1m
    gdf['temp_mean'] = temp_mean1m
    gdf['sal_max'] = sal_max1m
    gdf['sal_min'] = sal_min1m
    gdf['sal_mean'] = sal_mean1m

    gdf.to_file("grille_med_ouest.geojson", driver="GeoJSON") # à modifier 


if __name__ == "__main__":
    main()


## Extraction des variables vélocité et courant en surface à partir des données MARS 3D

#### 1. Extraction 24h en amont

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import box
import glob
import os
import re
from datetime import datetime, timedelta
from scipy.spatial import cKDTree

def get_netcdf_paths_for_period(dt, base_folder, hours=24):
    start_dt = dt - timedelta(hours=hours)
    files = []

    for year in range(start_dt.year, dt.year + 1):
        year_folder = os.path.join(base_folder, str(year))
        pattern = os.path.join(year_folder, "MARC_F2-MARS3D-MENOR1200_????????T????Z.nc")
        candidates = glob.glob(pattern)

        def extract_datetime_from_filename(f):
            match = re.search(r"_(\d{8}T\d{4})Z\.nc$", f)
            if not match:
                return None
            return datetime.strptime(match.group(1), "%Y%m%dT%H%M")

        for f in candidates:
            f_dt = extract_datetime_from_filename(f)
            if f_dt and start_dt <= f_dt <= dt:
                files.append((f, f_dt))

    files.sort(key=lambda x: x[1])
    if not files:
        raise FileNotFoundError(f"Aucun fichier trouvé entre {start_dt} et {dt}")

    return [f for f, _ in files]


def get_ws_vel_for_poly(poly, ncdf_path, depth_index=0):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, "crs"):
        ds = ds.rio.write_crs("EPSG:4326")

    ws = ds["WINDSTRESS"]
    vel = ds["VELOCITY"]

    # Clip raster avec polygone (vectorisé, rapide)
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    # Extraction des valeurs non-NaN
    ws_vals = ws_clip.values[0].flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]

    vel_vals = vel_clip.values[0, depth_index].flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # Fallback si aucun pixel intersecté
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        # Récupère les centres de pixels non-NaN pour fallback
        transform = ws.rio.transform()
        height, width = ws.shape[1:]
        xs = np.arange(width) + 0.5
        ys = np.arange(height) + 0.5
        xv, yv = np.meshgrid(xs, ys)
        x_coords, y_coords = transform * (xv, yv)

        # WINDSTRESS fallback
        if len(ws_vals) == 0:
            ws_all = ws.values[0]
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([x_coords[valid_idx], y_coords[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        # VELOCITY fallback
        if len(vel_vals) == 0:
            vel_all = vel.values[0, depth_index]
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([x_coords[valid_idx], y_coords[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ds.close()

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    return ws_vals, vel_vals, ws_mean, vel_mean


def main():
    base_folder = #/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/3H"
    gdf = gpd.read_file()# à modifier 

    ws_min, ws_max, ws_mean = [], [], []
    vel_min, vel_max, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction WINDSTRESS & VELOCITY sur 24h"):
        dt = row["datetime"]
        try:
            files = get_netcdf_paths_for_period(dt, base_folder, hours=24)
            all_ws, all_vel = [], []
            weighted_ws, weighted_vel = [], []

            for f in files:
                ws_vals, vel_vals, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
                if not np.isnan(ws_wmean):
                    weighted_ws.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    weighted_vel.append(vel_wmean)

            ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
            ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
            ws_mean.append(np.nanmean(weighted_ws) if weighted_ws else np.nan)

            vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)
            vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)
            vel_mean.append(np.nanmean(weighted_vel) if weighted_vel else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            ws_min.append(np.nan)
            ws_max.append(np.nan)
            ws_mean.append(np.nan)
            vel_min.append(np.nan)
            vel_max.append(np.nan)
            vel_mean.append(np.nan)

    gdf["wind_min_24h"] = ws_min
    gdf["wind_max_24h"] = ws_max
    gdf["wind_mean_24h"] = ws_mean
    gdf["vel_min_24h"] = vel_min
    gdf["vel_max_24h"] = vel_max
    gdf["vel_mean_24h"] = vel_mean

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON")# à modifier 


if __name__ == "__main__":
    main()


#### 2. Extraction 7 jours en amont

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
import os
from scipy.spatial import cKDTree

def get_netcdf_paths_for_period(dt, base_folder, days, stat_type):
    start_dt = dt - timedelta(days=days)
    files = []
    for day in (start_dt + timedelta(n) for n in range((dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    if not files:
        raise FileNotFoundError(f"Aucun fichier {stat_type} trouvé entre {start_dt} et {dt}")
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")

    ws = ds["WINDSTRESS"].isel(time=0)
    vel = ds["VELOCITY"].isel(time=0, level=-1)

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    # Clip vectorisé
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # Fallback si aucun pixel intersecté
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)
        
        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    ds.close()
    return ws_vals, vel_vals, ws_mean, vel_mean


def main():
    base_folder = #"/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Corse2"
    gdf = gpd.read_file()# à modifier 

    ws_max7, ws_min7, ws_mean7 = [], [], []
    vel_max7, vel_min7, vel_mean7 = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 7 jours WS/VEL pondérée"):
        dt = row["date"]
        try:
            # MAX sur 7 jours
            files_max = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="max")
            all_ws, all_vel, ws_wmean_list, vel_wmean_list = [], [], [], []
            for f in files_max:
                ws_vals, vel_vals, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
                if not np.isnan(ws_wmean):
                    ws_wmean_list.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_list.append(vel_wmean)
            ws_max7.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max7.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN sur 7 jours
            files_min = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min7.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min7.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE pondérée sur 7 jours
            files_mean = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="mean")
            ws_wmean_all, vel_wmean_all = [], []
            for f in files_mean:
                _, _, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                if not np.isnan(ws_wmean):
                    ws_wmean_all.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_all.append(vel_wmean)
            ws_mean7.append(np.nanmean(ws_wmean_all) if ws_wmean_all else np.nan)
            vel_mean7.append(np.nanmean(vel_wmean_all) if vel_wmean_all else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichiers manquants pour {dt}: {e}")
            ws_max7.append(np.nan)
            ws_min7.append(np.nan)
            ws_mean7.append(np.nan)
            vel_max7.append(np.nan)
            vel_min7.append(np.nan)
            vel_mean7.append(np.nan)

    gdf["wind_max_7j"] = ws_max7
    gdf["wind_min_7j"] = ws_min7
    gdf["wind_mean_7j"] = ws_mean7
    gdf["vel_max_7j"] = vel_max7
    gdf["vel_min_7j"] = vel_min7
    gdf["vel_mean_7j"] = vel_mean7

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON") # à modifier 


if __name__ == "__main__":
    main()


#### 3. Extraction 1 mois en amont

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import os
from scipy.spatial import cKDTree

def get_netcdf_paths_for_last_month(dt, base_folder, stat_type):
    start_dt = dt - relativedelta(months=1)
    end_dt = dt

    files = []
    for day in (start_dt + timedelta(n) for n in range((end_dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(f"Aucun fichier {stat_type} trouvé entre {start_dt} et {end_dt}")
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    ws = ds["WINDSTRESS"].isel(time=0)
    vel = ds["VELOCITY"].isel(time=0, level=-1)

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    # Clip vectorisé
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # Fallback si aucun pixel intersecté
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)

        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ws_mean = np.mean(ws_vals) if len(ws_vals) > 0 else np.nan
    vel_mean = np.mean(vel_vals) if len(vel_vals) > 0 else np.nan

    ds.close()
    return ws_vals, vel_vals, ws_mean, vel_mean

def main():
    base_folder = #"/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Corse2"
    gdf = gpd.read_file()# à modifier 

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 mois WS/VEL pondérée"):
        dt = row["date"]
        try:
            # MAX
            files_max = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="max")
            all_ws, all_vel = [], []
            for f in files_max:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN
            files_min = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals, _, _ = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE pondérée
            files_mean = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="mean")
            ws_wmean_all, vel_wmean_all = [], []
            for f in files_mean:
                _, _, ws_wmean, vel_wmean = get_ws_vel_for_poly(row["geometry"], f)
                if not np.isnan(ws_wmean):
                    ws_wmean_all.append(ws_wmean)
                if not np.isnan(vel_wmean):
                    vel_wmean_all.append(vel_wmean)
            ws_mean.append(np.nanmean(ws_wmean_all) if ws_wmean_all else np.nan)
            vel_mean.append(np.nanmean(vel_wmean_all) if vel_wmean_all else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt}: {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf["wind_max_1m"] = ws_max
    gdf["wind_min_1m"] = ws_min
    gdf["wind_mean_1m"] = ws_mean
    gdf["vel_max_1m"] = vel_max
    gdf["vel_min_1m"] = vel_min
    gdf["vel_mean_1m"] = vel_mean

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON")# à modifier 

if __name__ == "__main__":
    main()


#### 4. Extraction 1 an en amont

In [None]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point
from datetime import timedelta
import os
from scipy.spatial import cKDTree
import pandas as pd

# ===============================================================
# FONCTIONS UTILITAIRES
# ===============================================================

def get_monthly_paths(dt_start, dt_end, base_folder, stat_type):
    months = pd.date_range(start=dt_start, end=dt_end, freq='MS')[:-1]
    files = []
    for month in months:
        year_folder = os.path.join(base_folder, str(month.year))
        fname = f"MARS3D_{month.strftime('%Y%m')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_daily_paths(dt_start, dt_end, base_folder, stat_type):
    files = []
    for day in (dt_start + timedelta(n) for n in range((dt_end - dt_start).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    ws = ds['WINDSTRESS'].isel(time=0)
    vel = ds['VELOCITY'].isel(time=0, level=-1)

    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    # --- Clip vectorisé ---
    ws_clip = ws.rio.clip([poly], all_touched=True, drop=False)
    vel_clip = vel.rio.clip([poly], all_touched=True, drop=False)

    ws_vals = ws_clip.values.flatten()
    ws_vals = ws_vals[~np.isnan(ws_vals)]
    vel_vals = vel_clip.values.flatten()
    vel_vals = vel_vals[~np.isnan(vel_vals)]

    # --- Fallback 3 pixels les plus proches si aucun pixel ---
    if len(ws_vals) == 0 or len(vel_vals) == 0:
        transform = ws.rio.transform()
        h, w = ws.shape
        xs = np.arange(w) + 0.5
        ys = np.arange(h) + 0.5
        xv, yv = transform * np.meshgrid(xs, ys)

        if len(ws_vals) == 0:
            ws_all = ws.values
            valid_idx = ~np.isnan(ws_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = ws_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            ws_vals = values[idx]

        if len(vel_vals) == 0:
            vel_all = vel.values
            valid_idx = ~np.isnan(vel_all)
            coords = np.column_stack([xv[valid_idx], yv[valid_idx]])
            values = vel_all[valid_idx]
            tree = cKDTree(coords)
            px, py = poly.centroid.x, poly.centroid.y
            _, idx = tree.query([px, py], k=min(3, len(values)))
            vel_vals = values[idx]

    ds.close()
    return ws_vals, vel_vals

# ===============================================================
# SCRIPT PRINCIPAL
# ===============================================================

def main():
    daily_base_folder = # à modifier "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Corse2"
    monthly_base_folder = # à modifier "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Monthly/Corse2"

    gdf = gpd.read_file() # à modifier

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 an WS/VEL pondérée"):
        dt = row['date']
        dt_start = dt - timedelta(days=365)
        last_month_start = dt.replace(day=1)

        try:
            for stat in ['max', 'min', 'mean']:
                files_monthly = get_monthly_paths(dt_start, last_month_start, monthly_base_folder, stat)
                files_daily = get_daily_paths(last_month_start, dt, daily_base_folder, stat)
                files = files_monthly + files_daily

                all_ws, all_vel = [], []
                for f in files:
                    ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                    all_ws.extend(ws_vals)
                    all_vel.extend(vel_vals)

                if stat == 'max':
                    ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
                    vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)
                elif stat == 'min':
                    ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
                    vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)
                elif stat == 'mean':
                    ws_mean.append(np.nanmean(all_ws) if all_ws else np.nan)
                    vel_mean.append(np.nanmean(all_vel) if all_vel else np.nan)

        except FileNotFoundError as e:
            print(f"⚠️ Fichier manquant pour {dt} : {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf['ws_max_1y'] = ws_max
    gdf['ws_min_1y'] = ws_min
    gdf['ws_mean_1y'] = ws_mean
    gdf['vel_max_1y'] = vel_max
    gdf['vel_min_1y'] = vel_min
    gdf['vel_mean_1y'] = vel_mean

    gdf.to_file("adne_extract_corse.geojson", driver="GeoJSON")# à modifier

# ===============================================================
if __name__ == "__main__":
    main()
