In [5]:
import geopandas as gpd
import xarray as xr
import pandas as pd
import numpy as np

from scipy.spatial import cKDTree
from shapely.geometry import Point




In [6]:
gdf_occ = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs(4326)


In [7]:
gdf_m = gdf_occ.to_crs(3857)
area_M = gdf_m.buffer(200_000).unary_union  # 200 km
area_M = gpd.GeoSeries([area_M], crs=3857).to_crs(4326)


  area_M = gdf_m.buffer(200_000).unary_union  # 200 km


In [8]:
lon = np.arange(-180, 180, 0.25)
lat = np.arange(-90, 90, 0.25)

grid_points = [
    Point(x, y)
    for x in lon
    for y in lat
]

grid = gpd.GeoDataFrame(
    geometry=grid_points,
    crs=4326
)


In [9]:
grid_M = gpd.sjoin(
    grid,
    area_M.to_frame("geometry"),
    how="inner",
    predicate="intersects"
)

grid_M = grid_M.drop(columns="index_right")


In [10]:
len(grid_M)


232368

In [11]:
coords = np.column_stack([
    grid_M.geometry.x.values,
    grid_M.geometry.y.values
])


In [12]:
def sample_nc_to_points(nc_path, var_name, depth_slice=None):
    ds = xr.open_dataset(nc_path, decode_times=False)
    da = ds[var_name]

    # Profundidad
    if depth_slice and "depth" in da.dims:
        da = da.sel(depth=slice(*depth_slice)).mean("depth")

    # Promediar dimensiones extra
    extra_dims = [
        d for d in da.dims
        if d.lower() not in ["lat","latitude","lon","longitude"]
    ]
    if extra_dims:
        da = da.mean(extra_dims)

    # Detectar dimensiones espaciales
    lat_name = lon_name = None
    for d in da.dims:
        if d.lower() in ["lat","latitude"]:
            lat_name = d
        if d.lower() in ["lon","longitude"]:
            lon_name = d

    if lat_name is None or lon_name is None:
        raise ValueError(f"No lat/lon en {nc_path}")

    lats = da[lat_name].values
    lons = da[lon_name].values

    lon_grid, lat_grid = np.meshgrid(lons, lats)
    values = da.values.flatten()

    tree = cKDTree(
        np.column_stack([lon_grid.flatten(), lat_grid.flatten()])
    )

    _, idx = tree.query(coords, k=1)
    return values[idx]


In [13]:
grid_M["temp"] = sample_nc_to_points(
    "../data/ocean/temperature_celsius.nc",
    "t_an",
    depth_slice=(0, 200)
)

grid_M["salinity"] = sample_nc_to_points(
    "../data/ocean/salinity.nc",
    "s_an",
    depth_slice=(0, 200)
)

grid_M["oxygen"] = sample_nc_to_points(
    "../data/ocean/oxygen_dissolved.nc",
    "o_an",
    depth_slice=(0, 200)
)

grid_M["oxygen_util"] = sample_nc_to_points(
    "../data/ocean/oxygen_utilization.nc",
    "A_an",
    depth_slice=(0, 200)
)

grid_M["nitrate"] = sample_nc_to_points(
    "../data/ocean/nitrate.nc",
    "n_an",
    depth_slice=(0, 200)
)

grid_M["phosphate"] = sample_nc_to_points(
    "../data/ocean/phosphate.nc",
    "p_an",
    depth_slice=(0, 200)
)

grid_M["chl_trend"] = sample_nc_to_points(
    "../data/ocean/chlorophyll.nc",
    "trend"
)


In [14]:
micro = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg").to_crs(4326)

grid_M = gpd.sjoin_nearest(
    grid_M,
    micro[["geometry","microplastics_measurement"]],
    how="left",
    distance_col="dist_micro"
)





In [17]:
cols_to_drop = [c for c in grid_M.columns if c.startswith("index_")]
grid_M = grid_M.drop(columns=cols_to_drop)


In [18]:
species = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs(4326)
grid_M = gpd.sjoin_nearest(
    grid_M,
    species[["geometry","sci_name", "redlistCategory"]],
    how="left",
    distance_col="dist_species"
)




In [19]:
features = [
    "temp","salinity","oxygen",
    "nitrate","phosphate",
    "chl_trend","microplastics_measurement", "sci_name", "redlistCategory"
]

grid_ml = grid_M.dropna(subset=features)
len(grid_ml)


155755

In [20]:
grid_ml

Unnamed: 0,geometry,temp,salinity,oxygen,oxygen_util,nitrate,phosphate,chl_trend,microplastics_measurement,dist_micro,index_right,sci_name,redlistCategory,dist_species
882,POINT (-179.75 -49.5),8.285769,34.254517,285.344635,4.633673,13.224897,0.980094,1.746829,0.000000,25.025257,65841,Paragorgia arborea,NT,5.055860
887,POINT (-179.75 -48.25),8.720351,34.290123,281.873505,5.563910,12.121664,0.920304,1.809104,0.000000,26.046435,65841,Paragorgia arborea,NT,3.807236
894,POINT (-179.75 -46.5),10.074621,34.428631,272.665710,7.119972,9.487034,0.752345,2.863594,0.003141,27.218880,65841,Paragorgia arborea,NT,2.061966
908,POINT (-179.75 -43),12.744889,34.855434,254.801270,8.345373,5.953496,0.479302,1.250139,0.003141,23.724646,67862,Paragorgia arborea,NT,1.443087
909,POINT (-179.75 -42.75),13.654454,35.019848,249.214890,8.433300,5.189509,0.422229,1.124156,0.003141,23.475124,67862,Paragorgia arborea,NT,1.686713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036278,POINT (179.75 -40.5),15.218589,35.282078,239.940720,8.810746,4.693318,0.361250,0.510360,89.000000,1.405297,66814,Paragorgia arborea,NT,1.560236
1036284,POINT (179.75 -39),15.816326,35.369164,238.424316,8.015304,4.656509,0.355681,0.491933,89.000000,1.273601,66814,Paragorgia arborea,NT,1.592745
1036359,POINT (179.75 -20.25),24.510958,35.524223,202.673264,7.069147,0.559547,0.163544,0.859720,0.052411,1.773571,3285,Melanesobasis maculosa,NT,2.532011
1036360,POINT (179.75 -20),24.510958,35.524223,202.673264,7.069147,0.559547,0.163544,1.123910,0.052411,1.524075,3285,Melanesobasis maculosa,NT,2.321493


In [21]:
grid_ml.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 155755 entries, 882 to 1036361
Data columns (total 14 columns):
 #   Column                     Non-Null Count   Dtype   
---  ------                     --------------   -----   
 0   geometry                   155755 non-null  geometry
 1   temp                       155755 non-null  float32 
 2   salinity                   155755 non-null  float32 
 3   oxygen                     155755 non-null  float32 
 4   oxygen_util                155755 non-null  float32 
 5   nitrate                    155755 non-null  float32 
 6   phosphate                  155755 non-null  float32 
 7   chl_trend                  155755 non-null  float32 
 8   microplastics_measurement  155755 non-null  float64 
 9   dist_micro                 155755 non-null  float64 
 10  index_right                155755 non-null  int64   
 11  sci_name                   155755 non-null  object  
 12  redlistCategory            155755 non-null  object  
 13  dist_spe