In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, box


In [3]:
# Usar el mismo "idioma espacial"
CRS = "EPSG:4326"
# Elegir el tamaño de la grilla
GRID_SIZE = 0.5
# Dónde construir la grilla
noaa = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg").to_crs(CRS)


In [4]:
minx, miny, maxx, maxy = noaa.total_bounds


In [5]:
buffer = 1.0
minx -= buffer
miny -= buffer
maxx += buffer
maxy += buffer


In [6]:
xs = np.arange(minx, maxx, GRID_SIZE)
ys = np.arange(miny, maxy, GRID_SIZE)


In [7]:
polygons = []
grid_ids = []

for i, x in enumerate(xs):
    for j, y in enumerate(ys):
        polygons.append(box(x, y, x + GRID_SIZE, y + GRID_SIZE))
        grid_ids.append(f"cell_{i}_{j}")


In [8]:
grid = gpd.GeoDataFrame(
    {"grid_id": grid_ids},
    geometry=polygons,
    crs=CRS
)


In [9]:
grid["lon_center"] = grid.geometry.centroid.x
grid["lat_center"] = grid.geometry.centroid.y



  grid["lon_center"] = grid.geometry.centroid.x

  grid["lat_center"] = grid.geometry.centroid.y


In [10]:
noaa_joined = gpd.sjoin(
    noaa,
    grid,
    how="left",
    predicate="within"
)


In [None]:
noaa_features = (
    noaa_joined
    .groupby("grid_id")
    .agg(
        mp_mean_concentration=("microplastics_measurement", "mean"),
        mp_max_concentration=("microplastics_measurement", "max"),
        mp_count=("microplastics_measurement", "count")
        #mp_types_count=("plastic_type", "nunique")
    )
    .reset_index()
)


In [13]:
grid = grid.merge(noaa_features, on="grid_id", how="left")


In [29]:
iucn = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs(CRS)


In [30]:
iucn_joined = gpd.overlay(iucn, grid, how="intersection")


In [31]:
iucn_joined.columns


Index(['sci_name', 'presence', 'origin', 'seasonal', 'lon', 'lat',
       'redlistCategory', 'grid_id', 'lon_center', 'lat_center',
       'mp_mean_concentration', 'mp_max_concentration', 'mp_count',
       'geometry'],
      dtype='object')

In [32]:
status_map = {"CR":4, "EN":3, "VU":2, "NT":1}
iucn_joined["vuln_score"] = iucn_joined["redlistCategory"].map(status_map)


In [34]:
iucn_features = (
    iucn_joined
    .groupby("grid_id")
    .agg(
        iucn_species_count=("sci_name", "nunique"),
        iucn_vulnerability_index=("vuln_score", "mean")
    )
    .reset_index()
)


In [35]:
grid = grid.merge(iucn_features, on="grid_id", how="left")


In [38]:
ecotaxa = pd.read_csv("../data/metadata_img.csv")
ecotaxa["geometry"] = gpd.points_from_xy(ecotaxa.object_lon, ecotaxa.object_lat)
ecotaxa = gpd.GeoDataFrame(ecotaxa, crs=CRS)


  ecotaxa = pd.read_csv("../data/metadata_img.csv")


In [39]:
ecotaxa_joined = gpd.sjoin(ecotaxa, grid, predicate="within")


In [41]:
ecotaxa_features = (
    ecotaxa_joined
    .groupby("grid_id")
    .agg(
        mean_particle_size=("object_area","mean"),
        small_particle_ratio=("object_area", lambda x: (x<100).mean())
    )
    .reset_index()
)


In [42]:
grid = grid.merge(ecotaxa_features, on="grid_id", how="left")


In [43]:
grid

Unnamed: 0,grid_id,geometry,lon_center,lat_center,mp_mean_concentration,mp_max_concentration,mp_count,iucn_species_count,iucn_vulnerability_index,mean_particle_size,small_particle_ratio
0,cell_0_0,"POLYGON ((-180.4942 -72.69904, -180.4942 -72.1...",-180.7442,-72.44904,,,,,,,
1,cell_0_1,"POLYGON ((-180.4942 -72.19904, -180.4942 -71.6...",-180.7442,-71.94904,,,,,,,
2,cell_0_2,"POLYGON ((-180.4942 -71.69904, -180.4942 -71.1...",-180.7442,-71.44904,,,,,,,
3,cell_0_3,"POLYGON ((-180.4942 -71.19904, -180.4942 -70.6...",-180.7442,-70.94904,,,,,,,
4,cell_0_4,"POLYGON ((-180.4942 -70.69904, -180.4942 -70.1...",-180.7442,-70.44904,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
236743,cell_723_322,"POLYGON ((181.0058 88.30096, 181.0058 88.80096...",180.7558,88.55096,,,,,,,
236744,cell_723_323,"POLYGON ((181.0058 88.80096, 181.0058 89.30096...",180.7558,89.05096,,,,,,,
236745,cell_723_324,"POLYGON ((181.0058 89.30096, 181.0058 89.80096...",180.7558,89.55096,,,,,,,
236746,cell_723_325,"POLYGON ((181.0058 89.80096, 181.0058 90.30096...",180.7558,90.05096,,,,,,,


In [44]:
grid.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 236748 entries, 0 to 236747
Data columns (total 11 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   grid_id                   236748 non-null  object  
 1   geometry                  236748 non-null  geometry
 2   lon_center                236748 non-null  float64 
 3   lat_center                236748 non-null  float64 
 4   mp_mean_concentration     5454 non-null    float64 
 5   mp_max_concentration      5454 non-null    float64 
 6   mp_count                  5652 non-null    float64 
 7   iucn_species_count        5755 non-null    float64 
 8   iucn_vulnerability_index  5755 non-null    float64 
 9   mean_particle_size        74 non-null      float64 
 10  small_particle_ratio      74 non-null      float64 
dtypes: float64(9), geometry(1), object(1)
memory usage: 19.9+ MB
