## Extraction des variables force du vent et courant sur la zone Med Est

In [1]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import glob
import os
import re
from datetime import datetime, timedelta


def get_netcdf_paths_for_period(dt, base_folder, hours=24):
    start_dt = dt - timedelta(hours=hours)
    files = []

    for year in range(start_dt.year, dt.year + 1):
        year_folder = os.path.join(base_folder, str(year))
        pattern = os.path.join(year_folder, "MARC_F2-MARS3D-MENOR1200_????????T????Z.nc")
        candidates = glob.glob(pattern)

        def extract_datetime_from_filename(f):
            match = re.search(r"_(\d{8}T\d{4})Z\.nc$", f)
            if not match:
                return None
            return datetime.strptime(match.group(1), "%Y%m%dT%H%M")

        for f in candidates:
            f_dt = extract_datetime_from_filename(f)
            if f_dt and start_dt <= f_dt <= dt:
                files.append((f, f_dt))

    files.sort(key=lambda x: x[1])
    if not files:
        raise FileNotFoundError(f"Aucun fichier trouvé entre {start_dt} et {dt}")

    return [f for f, _ in files]


def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, 'crs'):
        ds = ds.rio.write_crs("EPSG:4326")

    ws = ds['WINDSTRESS']
    vel = ds['VELOCITY']

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    transform = ws.rio.transform()
    height, width = ws.shape[-2], ws.shape[-1]

    coords = []
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            if poly_proj.intersects(pixel_poly):
                coords.append((j, i))

    if not coords:
        pixel_centers = []
        for j in range(height):
            for i in range(width):
                x_c, y_c = transform * (i + 0.5, j + 0.5)
                pixel_centers.append((j, i, Point(x_c, y_c)))

        distances = [(pt.distance(poly_proj), j, i) for j, i, pt in pixel_centers]
        distances.sort(key=lambda x: x[0])
        coords = [(j, i) for _, j, i in distances[:3]]

    ws_values = [ws.values[0, j, i] for (j, i) in coords if not np.isnan(ws.values[0, j, i])]
    vel_values = [vel.values[0, 59, j, i] for (j, i) in coords if not np.isnan(vel.values[0, 59, j, i])]

    ds.close()
    return ws_values, vel_values


def main():
    base_folder = "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/3H"
    gdf = gpd.read_file("adne_extract_med_est.geojson")

    ws_min, ws_max, ws_mean = [], [], []
    vel_min, vel_max, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction WINDSTRESS & VELOCITY sur 24h"):
        dt = row['datetime']
        try:
            files = get_netcdf_paths_for_period(dt, base_folder, hours=24)
            all_ws, all_vel = [], []

            for f in files:
                ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)

            # WINDSTRESS
            if all_ws:
                ws_min.append(np.min(all_ws))
                ws_max.append(np.max(all_ws))
                ws_mean.append(np.mean(all_ws))
            else:
                ws_min.append(np.nan)
                ws_max.append(np.nan)
                ws_mean.append(np.nan)

            # VELOCITY
            if all_vel:
                vel_min.append(np.min(all_vel))
                vel_max.append(np.max(all_vel))
                vel_mean.append(np.mean(all_vel))
            else:
                vel_min.append(np.nan)
                vel_max.append(np.nan)
                vel_mean.append(np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            ws_min.append(np.nan)
            ws_max.append(np.nan)
            ws_mean.append(np.nan)
            vel_min.append(np.nan)
            vel_max.append(np.nan)
            vel_mean.append(np.nan)

    gdf['wind_min_24h'] = ws_min
    gdf['wind_max_24h'] = ws_max
    gdf['wind_mean_24h'] = ws_mean
    gdf['vel_min_24h'] = vel_min
    gdf['vel_max_24h'] = vel_max
    gdf['vel_mean_24h'] = vel_mean

    gdf.to_file("adne_extract_med_est.geojson", driver="GeoJSON")


if __name__ == "__main__":
    main()


Extraction WINDSTRESS & VELOCITY sur 24h: 100%|██████████| 397/397 [9:16:42<00:00, 84.14s/it]   


#### Extraction 7 jours en amont

In [1]:
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import box
import os
from datetime import timedelta

def get_netcdf_paths_for_period(dt, base_folder, days, stat_type):
    """Retourne la liste des fichiers journaliers pour les `days` jours avant dt (inclus)."""
    start_dt = dt - timedelta(days=days)
    files = []

    for day in (start_dt + timedelta(n) for n in range((dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(
            f"Aucun fichier {stat_type} trouvé entre {start_dt} et {dt}"
        )

    return files


def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    
    # WINDSTRESS : time=0
    ws = ds['WINDSTRESS'].isel(time=0)

    # VELOCITY : dernière couche de profondeur + time=0
    if 'time' in ds['VELOCITY'].dims and 'level' in ds['VELOCITY'].dims:
        vel = ds['VELOCITY'].isel(time=0, level=-1)
    else:
        raise ValueError("VELOCITY n'a pas les dimensions attendues (time, level, y, x)")

    # attribution CRS si manquant
    if ws.rio.crs is None:
        ws = ws.rio.write_crs("EPSG:4326")
    if vel.rio.crs is None:
        vel = vel.rio.write_crs("EPSG:4326")

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj_ws = poly_gs.to_crs(ws.rio.crs).iloc[0]
    poly_proj_vel = poly_gs.to_crs(vel.rio.crs).iloc[0]

    # WINDSTRESS extraction
    transform_ws = ws.rio.transform()
    height, width = ws.shape
    ws_values = []
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform_ws * (i, j)
            x_max, y_min = transform_ws * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            val = ws.values[j, i]
            if not np.isnan(val) and poly_proj_ws.intersects(pixel_poly):
                ws_values.append(val)

    # VELOCITY extraction
    transform_vel = vel.rio.transform()
    height, width = vel.shape
    vel_values = []
    for j in range(height):
        for i in range(width):
            x_min, y_max = transform_vel * (i, j)
            x_max, y_min = transform_vel * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            val = vel.values[j, i]
            if not np.isnan(val) and poly_proj_vel.intersects(pixel_poly):
                vel_values.append(val)

    ds.close()
    return ws_values, vel_values


def main():
    base_folder = "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Med-Est"
    gdf = gpd.read_file("adne_extract_med_est.geojson")

    ws_max7, ws_min7, ws_mean7 = [], [], []
    vel_max7, vel_min7, vel_mean7 = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 7 jours WS/VEL"):
        dt = row['date']

        try:
            # MAX du max
            files_max = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="max")
            all_ws, all_vel = [], []
            for f in files_max:
                ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_max7.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max7.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN du min
            files_min = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min7.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min7.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE des mean
            files_mean = get_netcdf_paths_for_period(dt, base_folder, days=7, stat_type="mean")
            all_ws, all_vel = [], []
            for f in files_mean:
                ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_mean7.append(np.nanmean(all_ws) if all_ws else np.nan)
            vel_mean7.append(np.nanmean(all_vel) if all_vel else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            ws_max7.append(np.nan)
            ws_min7.append(np.nan)
            ws_mean7.append(np.nan)
            vel_max7.append(np.nan)
            vel_min7.append(np.nan)
            vel_mean7.append(np.nan)

    gdf['wind_max_7j'] = ws_max7
    gdf['wind_min_7j'] = ws_min7
    gdf['wind_mean_7j'] = ws_mean7
    gdf['vel_max_7j'] = vel_max7
    gdf['vel_min_7j'] = vel_min7
    gdf['vel_mean_7j'] = vel_mean7

    gdf.to_file("adne_extract_med_est.geojson", driver="GeoJSON")


if __name__ == "__main__":
    main()


Extraction 7 jours WS/VEL: 100%|██████████| 397/397 [5:21:45<00:00, 48.63s/it]  


#### Extraction 1 mois en amont

In [2]:
# 1 mois 

from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import box
import os
from datetime import timedelta
from dateutil.relativedelta import relativedelta


def get_netcdf_paths_for_last_month(dt, base_folder, stat_type):
    """
    Retourne la liste des fichiers journaliers pour un mois glissant
    allant de (dt - 1 mois) à dt inclus.
    """
    start_dt = dt - relativedelta(months=1)
    end_dt = dt

    files = []
    for day in (start_dt + timedelta(n) for n in range((end_dt - start_dt).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)

    if not files:
        raise FileNotFoundError(
            f"Aucun fichier {stat_type} trouvé entre {start_dt} et {end_dt}"
        )

    return files


def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, "crs"):
        ds = ds.rio.write_crs("EPSG:4326")

    ws = ds["WINDSTRESS"]  # (time, y, x)
    vel = ds["VELOCITY"]   # (time, depth, y, x)

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    transform = ws.rio.transform()
    _, height, width = ws.shape  # time, y, x

    ws_vals = []
    vel_vals = []

    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            if poly_proj.intersects(pixel_poly):
                w_val = ws.values[0, j, i]  # 1 seule couche temps
                v_val = vel.values[0, -1, j, i]  # couche 60 (dernière profondeur)
                if not np.isnan(w_val):
                    ws_vals.append(w_val)
                if not np.isnan(v_val):
                    vel_vals.append(v_val)

    ds.close()
    return ws_vals, vel_vals


def main():
    base_folder = "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Med-Est"
    gdf = gpd.read_file("adne_extract_med_est.geojson")

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction sur 1 mois glissant"):
        dt = row["date"]

        try:
            # MAX du max
            files_max = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="max")
            all_ws, all_vel = [], []
            for f in files_max:
                ws_vals, vel_vals = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
            vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)

            # MIN du min
            files_min = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="min")
            all_ws, all_vel = [], []
            for f in files_min:
                ws_vals, vel_vals = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
            vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)

            # MOYENNE des mean
            files_mean = get_netcdf_paths_for_last_month(dt, base_folder, stat_type="mean")
            all_ws, all_vel = [], []
            for f in files_mean:
                ws_vals, vel_vals = get_ws_vel_for_poly(row["geometry"], f)
                all_ws.extend(ws_vals)
                all_vel.extend(vel_vals)
            ws_mean.append(np.nanmean(all_ws) if all_ws else np.nan)
            vel_mean.append(np.nanmean(all_vel) if all_vel else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf["wind_max_1m"] = ws_max
    gdf["wind_min_1m"] = ws_min
    gdf["wind_mean_1m"] = ws_mean
    gdf["vel_max_1m"] = vel_max
    gdf["vel_min_1m"] = vel_min
    gdf["vel_mean_1m"] = vel_mean

    gdf.to_file("adne_extract_med_est.geojson", driver="GeoJSON")


if __name__ == "__main__":
    main()


Extraction sur 1 mois glissant: 100%|██████████| 397/397 [8:41:44<00:00, 78.85s/it]  


#### Extraction 1 an en amont

In [3]:
# 1 an 
from tqdm import tqdm
import geopandas as gpd
import xarray as xr
import rioxarray
import numpy as np
from shapely.geometry import Point, box
import os
from datetime import timedelta
import pandas as pd

# -----------------------------
# Fonctions utilitaires
# -----------------------------

def get_monthly_paths(dt_start, dt_end, base_folder, stat_type):
    """Retourne les fichiers mensuels (min/max/mean) entre dt_start et le mois précédent dt_end."""
    files = []
    months = pd.date_range(start=dt_start, end=dt_end, freq='MS')  # Month Start
    for month in months[:-1]:  # tous les mois sauf le dernier
        year_folder = os.path.join(base_folder, str(month.year))
        fname = f"MARS3D_{month.strftime('%Y%m')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_daily_paths(dt_start, dt_end, base_folder, stat_type):
    """Retourne les fichiers journaliers (min/max/mean) entre dt_start et dt_end."""
    files = []
    for day in (dt_start + timedelta(n) for n in range((dt_end - dt_start).days + 1)):
        year_folder = os.path.join(base_folder, str(day.year))
        fname = f"MARS3D_{day.strftime('%Y%m%d')}_{stat_type}.nc"
        fpath = os.path.join(year_folder, fname)
        if os.path.exists(fpath):
            files.append(fpath)
    return files

def get_ws_vel_for_poly(poly, ncdf_path):
    ds = xr.open_dataset(ncdf_path, engine="netcdf4")
    if not hasattr(ds, 'crs'):
        ds = ds.rio.write_crs("EPSG:4326")

    ws = ds['WINDSTRESS']  # (time, y, x)
    vel = ds['VELOCITY']   # (time, depth, y, x)

    poly_gs = gpd.GeoSeries([poly], crs="EPSG:4326")
    poly_proj = poly_gs.to_crs(ws.rio.crs).iloc[0]

    transform = ws.rio.transform()
    height, width = ws.shape[1:]  # time, y, x

    ws_vals = []
    vel_vals = []

    for j in range(height):
        for i in range(width):
            x_min, y_max = transform * (i, j)
            x_max, y_min = transform * (i + 1, j + 1)
            pixel_poly = box(x_min, y_min, x_max, y_max)
            if poly_proj.intersects(pixel_poly):
                ws_val = ws.values[0, j, i]
                vel_val = vel.values[0, -1, j, i]  # dernière couche
                if not np.isnan(ws_val):
                    ws_vals.append(ws_val)
                if not np.isnan(vel_val):
                    vel_vals.append(vel_val)

    ds.close()
    return ws_vals, vel_vals

# -----------------------------
# Main
# -----------------------------

def main():
    daily_base_folder = "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Daily/Med-Est"
    monthly_base_folder = "/home/paulinev/Bureau/Marbec_data/BiodivMed/MARS3D/Med_MENOR/Aggregated/CUR-WIND_latlon/Monthly/Med-Est"
    gdf = gpd.read_file("adne_extract_med_est.geojson")

    ws_max, ws_min, ws_mean = [], [], []
    vel_max, vel_min, vel_mean = [], [], []

    for idx, row in tqdm(gdf.iterrows(), total=len(gdf), desc="Extraction 1 an"):
        dt = row['date']

        dt_start = dt - timedelta(days=365)
        last_month_start = dt.replace(day=1)

        try:
            for stat in ['max', 'min', 'mean']:
                # Fichiers mensuels sauf dernier mois
                files_monthly = get_monthly_paths(dt_start, last_month_start, monthly_base_folder, stat)
                # Fichiers journaliers du dernier mois
                files_daily = get_daily_paths(last_month_start, dt, daily_base_folder, stat)
                files = files_monthly + files_daily

                all_ws, all_vel = [], []
                for f in files:
                    ws_vals, vel_vals = get_ws_vel_for_poly(row['geometry'], f)
                    all_ws.extend(ws_vals)
                    all_vel.extend(vel_vals)

                if stat == 'max':
                    ws_max.append(np.nanmax(all_ws) if all_ws else np.nan)
                    vel_max.append(np.nanmax(all_vel) if all_vel else np.nan)
                elif stat == 'min':
                    ws_min.append(np.nanmin(all_ws) if all_ws else np.nan)
                    vel_min.append(np.nanmin(all_vel) if all_vel else np.nan)
                elif stat == 'mean':
                    ws_mean.append(np.nanmean(all_ws) if all_ws else np.nan)
                    vel_mean.append(np.nanmean(all_vel) if all_vel else np.nan)

        except FileNotFoundError as e:
            print(f"Fichier manquant pour {dt}: {e}")
            ws_max.append(np.nan)
            ws_min.append(np.nan)
            ws_mean.append(np.nan)
            vel_max.append(np.nan)
            vel_min.append(np.nan)
            vel_mean.append(np.nan)

    gdf['ws_max_1y'] = ws_max
    gdf['ws_min_1y'] = ws_min
    gdf['ws_mean_1y'] = ws_mean
    gdf['vel_max_1y'] = vel_max
    gdf['vel_min_1y'] = vel_min
    gdf['vel_mean_1y'] = vel_mean

    gdf.to_file("adne_extract_med_est.geojson", driver="GeoJSON")

if __name__ == "__main__":
    main()


Extraction 1 an: 100%|██████████| 397/397 [8:39:12<00:00, 78.47s/it]   
