In [16]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, box


In [17]:
# Usar el mismo "idioma espacial"
CRS = "EPSG:4326"
# Elegir el tamaño de la grilla
GRID_SIZE = 0.5
# Dónde construir la grilla
noaa = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg").to_crs(CRS)


In [18]:
minx, miny, maxx, maxy = noaa.total_bounds


In [19]:
buffer = 1.0
minx -= buffer
miny -= buffer
maxx += buffer
maxy += buffer


In [20]:
xs = np.arange(minx, maxx, GRID_SIZE)
ys = np.arange(miny, maxy, GRID_SIZE)


In [21]:
polygons = []
grid_ids = []

for i, x in enumerate(xs):
    for j, y in enumerate(ys):
        polygons.append(box(x, y, x + GRID_SIZE, y + GRID_SIZE))
        grid_ids.append(f"cell_{i}_{j}")


In [22]:
grid = gpd.GeoDataFrame(
    {"grid_id": grid_ids},
    geometry=polygons,
    crs=CRS
)


In [23]:
grid["lon_center"] = grid.geometry.centroid.x
grid["lat_center"] = grid.geometry.centroid.y



  grid["lon_center"] = grid.geometry.centroid.x

  grid["lat_center"] = grid.geometry.centroid.y


In [24]:
noaa_joined = gpd.sjoin(
    noaa,
    grid,
    how="left",
    predicate="within"
)


In [25]:
noaa_features = (
    noaa_joined
    .groupby("grid_id")
    .agg(
        mp_mean_concentration=("microplastics_measurement", "mean"),
        mp_max_concentration=("microplastics_measurement", "max"),
        mp_count=("microplastics_measurement", "count")
        #mp_types_count=("plastic_type", "nunique")
    )
    .reset_index()
)


In [26]:
grid = grid.merge(noaa_features, on="grid_id", how="left")


In [27]:
iucn = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs(CRS)


In [28]:
iucn_joined = gpd.overlay(iucn, grid, how="intersection")


In [29]:
iucn_joined.columns


Index(['sci_name', 'presence', 'origin', 'seasonal', 'lon', 'lat',
       'redlistCategory', 'grid_id', 'lon_center', 'lat_center',
       'mp_mean_concentration', 'mp_max_concentration', 'mp_count',
       'geometry'],
      dtype='object')

In [30]:
status_map = {"CR":4, "EN":3, "VU":2, "NT":1}
iucn_joined["vuln_score"] = iucn_joined["redlistCategory"].map(status_map)


In [31]:
iucn_features = (
    iucn_joined
    .groupby("grid_id")
    .agg(
        iucn_species_count=("sci_name", "nunique"),
        iucn_vulnerability_index=("vuln_score", "mean")
    )
    .reset_index()
)


In [32]:
grid = grid.merge(iucn_features, on="grid_id", how="left")


In [33]:
ecotaxa = pd.read_csv("../data/metadata_img.csv")
ecotaxa["geometry"] = gpd.points_from_xy(ecotaxa.object_lon, ecotaxa.object_lat)
ecotaxa = gpd.GeoDataFrame(ecotaxa, crs=CRS)


  ecotaxa = pd.read_csv("../data/metadata_img.csv")


In [34]:
ecotaxa_joined = gpd.sjoin(ecotaxa, grid, predicate="within")


In [35]:
ecotaxa_features = (
    ecotaxa_joined
    .groupby("grid_id")
    .agg(
        mean_particle_size=("object_area","mean"),
        small_particle_ratio=("object_area", lambda x: (x<100).mean())
    )
    .reset_index()
)


In [36]:
grid = grid.merge(ecotaxa_features, on="grid_id", how="left")


In [37]:
grid

Unnamed: 0,grid_id,geometry,lon_center,lat_center,mp_mean_concentration,mp_max_concentration,mp_count,iucn_species_count,iucn_vulnerability_index,mean_particle_size,small_particle_ratio
0,cell_0_0,"POLYGON ((-180.4942 -72.69904, -180.4942 -72.1...",-180.7442,-72.44904,,,,,,,
1,cell_0_1,"POLYGON ((-180.4942 -72.19904, -180.4942 -71.6...",-180.7442,-71.94904,,,,,,,
2,cell_0_2,"POLYGON ((-180.4942 -71.69904, -180.4942 -71.1...",-180.7442,-71.44904,,,,,,,
3,cell_0_3,"POLYGON ((-180.4942 -71.19904, -180.4942 -70.6...",-180.7442,-70.94904,,,,,,,
4,cell_0_4,"POLYGON ((-180.4942 -70.69904, -180.4942 -70.1...",-180.7442,-70.44904,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
236743,cell_723_322,"POLYGON ((181.0058 88.30096, 181.0058 88.80096...",180.7558,88.55096,,,,,,,
236744,cell_723_323,"POLYGON ((181.0058 88.80096, 181.0058 89.30096...",180.7558,89.05096,,,,,,,
236745,cell_723_324,"POLYGON ((181.0058 89.30096, 181.0058 89.80096...",180.7558,89.55096,,,,,,,
236746,cell_723_325,"POLYGON ((181.0058 89.80096, 181.0058 90.30096...",180.7558,90.05096,,,,,,,


In [38]:
grid.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 236748 entries, 0 to 236747
Data columns (total 11 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   grid_id                   236748 non-null  object  
 1   geometry                  236748 non-null  geometry
 2   lon_center                236748 non-null  float64 
 3   lat_center                236748 non-null  float64 
 4   mp_mean_concentration     5454 non-null    float64 
 5   mp_max_concentration      5454 non-null    float64 
 6   mp_count                  5652 non-null    float64 
 7   iucn_species_count        5755 non-null    float64 
 8   iucn_vulnerability_index  5755 non-null    float64 
 9   mean_particle_size        74 non-null      float64 
 10  small_particle_ratio      74 non-null      float64 
dtypes: float64(9), geometry(1), object(1)
memory usage: 19.9+ MB


# Intento 2

In [39]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import box


In [40]:
# pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.options.display.float_format = '{:.2f}'.format

In [41]:
# EXPOSICIÓN A MICROPLÁSTICOS

noaa = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg")
noaa = noaa.to_crs("EPSG:4326")
noaa

Unnamed: 0,microplastics_measurement,unit,concentration_class_range,mesh_size_mm,lat,lon,geometry
0,0.00,pieces/m3,0-0.0005,0.34,45.28,-60.29,POINT (-60.29 45.28)
1,0.00,pieces/m3,0.0005-0.005,0.34,40.93,-70.65,POINT (-70.65 40.93)
2,0.00,pieces/m3,0.0005-0.005,0.34,40.93,-70.65,POINT (-70.65 40.93)
3,0.00,pieces/m3,0-0.0005,0.34,40.30,-69.77,POINT (-69.77 40.3)
4,0.00,pieces/m3,0-0.0005,0.34,39.88,-67.15,POINT (-67.15 39.88)
...,...,...,...,...,...,...,...
19317,188.30,pieces kg-1 d.w.,150-200,0.00,-5.93,39.36,POINT (39.36 -5.93333)
19318,155.27,pieces kg-1 d.w.,150-200,0.00,-5.50,39.12,POINT (39.12 -5.5)
19319,58.07,pieces kg-1 d.w.,20-150,0.00,-6.45,39.47,POINT (39.46667 -6.45)
19320,210.00,pieces kg-1 d.w.,>200,0.00,-6.32,39.21,POINT (39.21 -6.32)


In [42]:
noaa['unit'].value_counts()

unit
pieces/m3           13726
pieces/10 mins       5125
pieces kg-1 d.w.      471
Name: count, dtype: int64

In [43]:
noaa_water = noaa[noaa['unit'] == 'pieces/m3']
noaa_sediment = noaa[noaa['unit'] == 'pieces kg-1 d.w.']
noaa_time = noaa[noaa['unit'] == 'pieces/10 mins']


In [44]:
# CREAR GRILLA (La grilla se define por el dataset que mide la EXPOSICIÓN.)

GRID_SIZE = 0.5  # grados
minx, miny, maxx, maxy = noaa.total_bounds

xs = np.arange(minx, maxx, GRID_SIZE)
ys = np.arange(miny, maxy, GRID_SIZE)

cells = []
ids = []

for i, x in enumerate(xs):
    for j, y in enumerate(ys):
        cells.append(box(x, y, x+GRID_SIZE, y+GRID_SIZE))
        ids.append(f"cell_{i}_{j}")

grid = gpd.GeoDataFrame(
    {"grid_id": ids},
    geometry=cells,
    crs="EPSG:4326"
)
grid


Unnamed: 0,grid_id,geometry
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1..."
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6..."
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1..."
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6..."
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1..."
...,...,...
232555,cell_719_318,"POLYGON ((180.0058 87.30096, 180.0058 87.80096..."
232556,cell_719_319,"POLYGON ((180.0058 87.80096, 180.0058 88.30096..."
232557,cell_719_320,"POLYGON ((180.0058 88.30096, 180.0058 88.80096..."
232558,cell_719_321,"POLYGON ((180.0058 88.80096, 180.0058 89.30096..."


In [45]:
grid

Unnamed: 0,grid_id,geometry
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1..."
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6..."
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1..."
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6..."
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1..."
...,...,...
232555,cell_719_318,"POLYGON ((180.0058 87.30096, 180.0058 87.80096..."
232556,cell_719_319,"POLYGON ((180.0058 87.80096, 180.0058 88.30096..."
232557,cell_719_320,"POLYGON ((180.0058 88.30096, 180.0058 88.80096..."
232558,cell_719_321,"POLYGON ((180.0058 88.80096, 180.0058 89.30096..."


In [46]:
# ASIGNAR PUNTOS DE NOAA A CELDAS DE GRID

noaa_in_grid = gpd.sjoin(noaa_water, grid, predicate="within")
noaa_in_grid

Unnamed: 0,microplastics_measurement,unit,concentration_class_range,mesh_size_mm,lat,lon,geometry,index_right,grid_id
0,0.00,pieces/m3,0-0.0005,0.34,45.28,-60.29,POINT (-60.29 45.28),77430,cell_239_233
1,0.00,pieces/m3,0.0005-0.005,0.34,40.93,-70.65,POINT (-70.65 40.93),70639,cell_218_225
2,0.00,pieces/m3,0.0005-0.005,0.34,40.93,-70.65,POINT (-70.65 40.93),70639,cell_218_225
3,0.00,pieces/m3,0-0.0005,0.34,40.30,-69.77,POINT (-69.77 40.3),71283,cell_220_223
4,0.00,pieces/m3,0-0.0005,0.34,39.88,-67.15,POINT (-67.15 39.88),72898,cell_225_223
...,...,...,...,...,...,...,...,...,...
19300,0.01,pieces/m3,0.005-1,0.00,-18.26,178.07,POINT (178.0695 -18.2603),231374,cell_716_106
19301,0.01,pieces/m3,0.005-1,0.00,-17.80,178.72,POINT (178.7219 -17.8049),231698,cell_717_107
19302,0.00,pieces/m3,0-0.0005,0.00,-17.80,178.72,POINT (178.721 -17.8049),231698,cell_717_107
19303,0.00,pieces/m3,0.0005-0.005,0.00,-17.80,178.73,POINT (178.729 -17.8049),231698,cell_717_107


In [47]:
# AGREGAR POR CELDA

noaa_features = (
    noaa_in_grid
    .groupby("grid_id")
    .agg(
        mp_pieces_m3=("microplastics_measurement", "max")
    )
    .reset_index()
)

noaa_features


Unnamed: 0,grid_id,mp_pieces_m3
0,cell_0_184,0.44
1,cell_0_211,1.52
2,cell_0_212,0.15
3,cell_0_213,0.91
4,cell_0_214,0.65
...,...,...
5402,cell_99_222,0.25
5403,cell_99_223,0.94
5404,cell_99_224,0.24
5405,cell_99_227,0.58


In [48]:
grid = grid.merge(noaa_features, on="grid_id", how="left")
grid

Unnamed: 0,grid_id,geometry,mp_pieces_m3
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1...",
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6...",
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1...",
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6...",
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1...",
...,...,...,...
232555,cell_719_318,"POLYGON ((180.0058 87.30096, 180.0058 87.80096...",
232556,cell_719_319,"POLYGON ((180.0058 87.80096, 180.0058 88.30096...",
232557,cell_719_320,"POLYGON ((180.0058 88.30096, 180.0058 88.80096...",
232558,cell_719_321,"POLYGON ((180.0058 88.80096, 180.0058 89.30096...",


In [49]:

tomex = pd.read_csv("../data/Raw/ToMEx_sp_ml.csv")
tomex

Unnamed: 0,DOI,Authors,Year,Species,Organism Group,Environment,Life Stage,In vitro/in vivo,Sex,Estimated Body Length (cm),Estimated Maximum Ingestible Size (mm),Experiment Type,Exposure Route,Particle Mix?,Negative Control,Reference Particle,Exposure Media,Solvent,Detergent,pH,Salinity (ppt),Temperature (Avg),Temperature (Min),Temperature (Max),Exposure Duration (days),Recovery (Days),Acute/Chronic,Number of Doses,Replicates,Sample Size,Dosing Frequency,Chemicals Added,Added Chemical Dose (nominal),Added Chemical Dose (measured),Plotted Dose Values,Unaligned Dose Values,Dose Metric,Alignment,Effect,Direction,Broad Endpoint Category,Specific Endpoint Category,Endpoint,Level of Biological Organization,Target Cell or Tissue,Effect Metric,Polymer,Shape,Density (g/cm^3),"Density, reported or estimated",Charge,Zeta Potential (mV),Zeta Potential Media,Functional Group,Particle Length (μm),Particle Width (μm),Size Category,Particle Surface Area (μm^2),Particle Volume (μm^3),Particle Mass (mg),Weathered or Biofouled?,Size Validated?,Polymer Validated?,Shape Validated,Particle Source,Sodium Azide Present?,Screened for Chemical Contamination?,Particle Cleaning?,Solvent Rinse,Background Contamination Monitored?,Concentration Validated?,Particle Behavior,Uptake Validated?,Uptake Validation Method,Tissue Distribution,Organisms Fed?
0,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.34,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.00,,,21.00,,Chronic,2,8.00,1,2.00,Not Reported,0,,316.04,316.04,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.00,2.00,1µm < 100µm,12.57,4.19,0.00,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
1,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.34,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.00,,,21.00,,Chronic,2,8.00,1,2.00,Not Reported,0,,2523.74,2523.74,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.00,2.00,1µm < 100µm,12.57,4.19,0.00,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
2,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.34,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.00,,,21.00,,Chronic,2,8.00,1,2.00,Not Reported,0,,316.04,316.04,Particles/mL,Unaligned,No,Not Reported,Fitness,Reproduction,Number of Offspring,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.00,2.00,1µm < 100µm,12.57,4.19,0.00,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
3,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.34,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.00,,,21.00,,Chronic,2,8.00,1,2.00,Not Reported,0,,2523.74,2523.74,Particles/mL,Unaligned,No,Not Reported,Fitness,Reproduction,Number of Offspring,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.00,2.00,1µm < 100µm,12.57,4.19,0.00,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
4,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,early,In Vivo,Not Reported,0.08,0.06,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.00,,,21.00,,Chronic,2,8.00,1,2.00,Not Reported,0,,316.04,316.04,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.00,2.00,1µm < 100µm,12.57,4.19,0.00,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3898,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.00,,,3.00,,Acute,5,1.00,10^7 cells/mL,0.00,Not Reported,0,,4.05,4.05,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,LOEC,Polystyrene,Sphere,1.05,reported,,,,,16.50,16.50,1µm < 100µm,855.30,2352.07,0.00,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3899,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.00,,,3.00,,Acute,5,1.00,10^7 cells/mL,0.00,Not Reported,0,,40.49,40.49,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.50,16.50,1µm < 100µm,855.30,2352.07,0.00,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3900,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.00,,,3.00,,Acute,5,1.00,10^7 cells/mL,0.00,Not Reported,0,,404.91,404.91,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.50,16.50,1µm < 100µm,855.30,2352.07,0.00,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3901,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.00,,,3.00,,Acute,5,1.00,10^7 cells/mL,0.00,Not Reported,0,,4049.12,4049.12,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.50,16.50,1µm < 100µm,855.30,2352.07,0.00,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No


In [50]:
eco_effect = tomex[
    tomex["Effect"] == "Yes"
]


In [51]:
eco_tox_threshold = eco_effect["Unaligned Dose Values"].median()
eco_tox_threshold

np.float64(62.5)

In [52]:
# Conversion unidades de NOAA
grid["mp_particles_ml"] = grid["mp_pieces_m3"] / 1_000_000


In [53]:
log_eco_tox_threshold = np.log10(eco_tox_threshold)
log_eco_tox_threshold

np.float64(1.7958800173440752)

In [54]:
import numpy as np

grid["log_mp_particles_ml"] = np.log10(
    grid["mp_particles_ml"] + 1e-12
)


In [55]:
grid["log_toxic_pressure"] = (
    grid["log_mp_particles_ml"] - log_eco_tox_threshold
)


In [56]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

grid["toxic_pressure_scaled"] = scaler.fit_transform(
    grid[["log_toxic_pressure"]]
)


In [57]:
def pressure_class(x):
    if x > -3:
        return "high"
    elif x > -6:
        return "medium"
    else:
        return "low"

grid["toxic_pressure_class"] = grid["log_toxic_pressure"].apply(pressure_class)
grid.head()

Unnamed: 0,grid_id,geometry,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1...",,,,,,low
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6...",,,,,,low
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1...",,,,,,low
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6...",,,,,,low
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1...",,,,,,low


In [58]:
grid["eco_toxic_pressure"] = (
    grid["mp_particles_ml"] / eco_tox_threshold
)

grid_with_noaa = grid[grid["mp_pieces_m3"].notna()]
grid_with_noaa



Unnamed: 0,grid_id,geometry,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class,eco_toxic_pressure
184,cell_0_184,"POLYGON ((-179.4942 20.30096, -179.4942 20.800...",0.44,0.00,-6.36,-8.15,0.47,low,0.00
211,cell_0_211,"POLYGON ((-179.4942 33.80096, -179.4942 34.300...",1.52,0.00,-5.82,-7.61,0.52,low,0.00
212,cell_0_212,"POLYGON ((-179.4942 34.30096, -179.4942 34.800...",0.15,0.00,-6.82,-8.62,0.44,low,0.00
213,cell_0_213,"POLYGON ((-179.4942 34.80096, -179.4942 35.300...",0.91,0.00,-6.04,-7.84,0.50,low,0.00
214,cell_0_214,"POLYGON ((-179.4942 35.30096, -179.4942 35.800...",0.65,0.00,-6.19,-7.98,0.49,low,0.00
...,...,...,...,...,...,...,...,...,...
231375,cell_716_107,"POLYGON ((178.5058 -18.19904, 178.5058 -17.699...",0.01,0.00,-8.15,-9.94,0.32,low,0.00
231655,cell_717_64,"POLYGON ((179.0058 -39.69904, 179.0058 -39.199...",89.00,0.00,-4.05,-5.85,0.67,medium,0.00
231698,cell_717_107,"POLYGON ((179.0058 -18.19904, 179.0058 -17.699...",0.01,0.00,-8.12,-9.92,0.33,low,0.00
231853,cell_717_262,"POLYGON ((179.0058 59.30096, 179.0058 59.80096...",0.26,0.00,-6.59,-8.38,0.45,low,0.00


In [59]:
grid_with_noaa["toxic_pressure_class"].value_counts()

toxic_pressure_class
low       5284
medium     114
high         9
Name: count, dtype: int64

In [60]:
iucn = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs("EPSG:4326")
iucn

Unnamed: 0,sci_name,presence,origin,seasonal,lon,lat,redlistCategory,geometry
0,Hubbsina turneri,1,1,1,-101.48,19.87,CR,POINT (-101.4795 19.8745)
1,Hubbsina turneri,1,1,1,-101.78,19.83,CR,POINT (-101.7773 19.8256)
2,Hubbsina turneri,1,1,1,-101.79,19.83,CR,POINT (-101.7876 19.8273)
3,Ictalurus mexicanus,1,1,1,-99.35,21.98,VU,POINT (-99.35417 21.98083)
4,Ictalurus mexicanus,1,1,1,-99.30,22.00,VU,POINT (-99.3 22)
...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,1,1,1,-3.00,5.14,VU,POINT (-3.00185 5.14407)
70269,Macrobrachium thysi,1,1,1,-3.52,5.50,VU,POINT (-3.52102 5.49955)
70270,Macrobrachium thysi,1,1,1,-3.51,5.49,VU,POINT (-3.51293 5.49003)
70271,Macrobrachium thysi,1,1,1,-4.12,5.40,VU,POINT (-4.12282 5.40212)


In [61]:
iucn_grid = gpd.overlay(iucn, grid, how="intersection")
iucn_grid

Unnamed: 0,sci_name,presence,origin,seasonal,lon,lat,redlistCategory,grid_id,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class,eco_toxic_pressure,geometry
0,Hubbsina turneri,1,1,1,-101.48,19.87,CR,cell_157_183,,,,,,low,,POINT (-101.4795 19.8745)
1,Hubbsina turneri,1,1,1,-101.78,19.83,CR,cell_156_183,,,,,,low,,POINT (-101.7773 19.8256)
2,Hubbsina turneri,1,1,1,-101.79,19.83,CR,cell_156_183,,,,,,low,,POINT (-101.7876 19.8273)
3,Ictalurus mexicanus,1,1,1,-99.35,21.98,VU,cell_161_187,,,,,,low,,POINT (-99.35417 21.98083)
4,Ictalurus mexicanus,1,1,1,-99.30,22.00,VU,cell_161_187,,,,,,low,,POINT (-99.3 22)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70271,Macrobrachium thysi,1,1,1,-3.00,5.14,VU,cell_353_153,,,,,,low,,POINT (-3.00185 5.14407)
70272,Macrobrachium thysi,1,1,1,-3.52,5.50,VU,cell_352_154,,,,,,low,,POINT (-3.52102 5.49955)
70273,Macrobrachium thysi,1,1,1,-3.51,5.49,VU,cell_352_154,,,,,,low,,POINT (-3.51293 5.49003)
70274,Macrobrachium thysi,1,1,1,-4.12,5.40,VU,cell_351_154,,,,,,low,,POINT (-4.12282 5.40212)


In [62]:
status_map = {"CR":4, "EN":3, "VU":2, "NT":1}

iucn_grid["vuln"] = (
    iucn_grid["redlistCategory"]
    .map(status_map)
    .fillna(0)
)
iucn_grid

Unnamed: 0,sci_name,presence,origin,seasonal,lon,lat,redlistCategory,grid_id,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class,eco_toxic_pressure,geometry,vuln
0,Hubbsina turneri,1,1,1,-101.48,19.87,CR,cell_157_183,,,,,,low,,POINT (-101.4795 19.8745),4
1,Hubbsina turneri,1,1,1,-101.78,19.83,CR,cell_156_183,,,,,,low,,POINT (-101.7773 19.8256),4
2,Hubbsina turneri,1,1,1,-101.79,19.83,CR,cell_156_183,,,,,,low,,POINT (-101.7876 19.8273),4
3,Ictalurus mexicanus,1,1,1,-99.35,21.98,VU,cell_161_187,,,,,,low,,POINT (-99.35417 21.98083),2
4,Ictalurus mexicanus,1,1,1,-99.30,22.00,VU,cell_161_187,,,,,,low,,POINT (-99.3 22),2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70271,Macrobrachium thysi,1,1,1,-3.00,5.14,VU,cell_353_153,,,,,,low,,POINT (-3.00185 5.14407),2
70272,Macrobrachium thysi,1,1,1,-3.52,5.50,VU,cell_352_154,,,,,,low,,POINT (-3.52102 5.49955),2
70273,Macrobrachium thysi,1,1,1,-3.51,5.49,VU,cell_352_154,,,,,,low,,POINT (-3.51293 5.49003),2
70274,Macrobrachium thysi,1,1,1,-4.12,5.40,VU,cell_351_154,,,,,,low,,POINT (-4.12282 5.40212),2


In [63]:
iucn_features = (
    iucn_grid
    .groupby("grid_id")
    .agg(
        vuln_index=("vuln", "mean")
    )
    .reset_index()
)
iucn_features

Unnamed: 0,grid_id,vuln_index
0,cell_0_108,4.00
1,cell_0_201,1.00
2,cell_0_54,1.00
3,cell_107_253,3.00
4,cell_10_103,3.00
...,...,...
5750,cell_77_107,2.00
5751,cell_90_97,2.00
5752,cell_9_100,2.00
5753,cell_9_101,3.00


In [64]:
grid = grid.merge(iucn_features, on="grid_id", how="left")
grid

Unnamed: 0,grid_id,geometry,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class,eco_toxic_pressure,vuln_index
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1...",,,,,,low,,
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6...",,,,,,low,,
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1...",,,,,,low,,
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6...",,,,,,low,,
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1...",,,,,,low,,
...,...,...,...,...,...,...,...,...,...,...
232555,cell_719_318,"POLYGON ((180.0058 87.30096, 180.0058 87.80096...",,,,,,low,,
232556,cell_719_319,"POLYGON ((180.0058 87.80096, 180.0058 88.30096...",,,,,,low,,
232557,cell_719_320,"POLYGON ((180.0058 88.30096, 180.0058 88.80096...",,,,,,low,,
232558,cell_719_321,"POLYGON ((180.0058 88.80096, 180.0058 89.30096...",,,,,,low,,


In [65]:
grid["eco_risk_score"] = (
    grid["toxic_pressure_scaled"] *
    grid["vuln_index"]
)
grid

Unnamed: 0,grid_id,geometry,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,toxic_pressure_class,eco_toxic_pressure,vuln_index,eco_risk_score
0,cell_0_0,"POLYGON ((-179.4942 -71.69904, -179.4942 -71.1...",,,,,,low,,,
1,cell_0_1,"POLYGON ((-179.4942 -71.19904, -179.4942 -70.6...",,,,,,low,,,
2,cell_0_2,"POLYGON ((-179.4942 -70.69904, -179.4942 -70.1...",,,,,,low,,,
3,cell_0_3,"POLYGON ((-179.4942 -70.19904, -179.4942 -69.6...",,,,,,low,,,
4,cell_0_4,"POLYGON ((-179.4942 -69.69904, -179.4942 -69.1...",,,,,,low,,,
...,...,...,...,...,...,...,...,...,...,...,...
232555,cell_719_318,"POLYGON ((180.0058 87.30096, 180.0058 87.80096...",,,,,,low,,,
232556,cell_719_319,"POLYGON ((180.0058 87.80096, 180.0058 88.30096...",,,,,,low,,,
232557,cell_719_320,"POLYGON ((180.0058 88.30096, 180.0058 88.80096...",,,,,,low,,,
232558,cell_719_321,"POLYGON ((180.0058 88.80096, 180.0058 89.30096...",,,,,,low,,,


In [66]:
grid.describe()

Unnamed: 0,mp_pieces_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_scaled,eco_toxic_pressure,vuln_index,eco_risk_score
count,5407.0,5407.0,5407.0,5407.0,5407.0,5407.0,5755.0,468.0
mean,410.55,0.0,-7.74,-9.54,0.36,0.0,1.75,0.7
std,12457.77,0.01,2.08,2.08,0.18,0.0,0.78,0.52
min,0.0,0.0,-12.0,-13.8,0.0,0.0,1.0,0.0
25%,0.0,0.0,-8.33,-10.13,0.31,0.0,1.0,0.38
50%,0.03,0.0,-7.49,-9.29,0.38,0.0,1.92,0.61
75%,0.33,0.0,-6.48,-8.27,0.46,0.0,2.0,0.88
max,800000.0,0.8,-0.1,-1.89,1.0,0.01,4.0,3.65


In [67]:
grid.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 232560 entries, 0 to 232559
Data columns (total 11 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   grid_id                232560 non-null  object  
 1   geometry               232560 non-null  geometry
 2   mp_pieces_m3           5407 non-null    float64 
 3   mp_particles_ml        5407 non-null    float64 
 4   log_mp_particles_ml    5407 non-null    float64 
 5   log_toxic_pressure     5407 non-null    float64 
 6   toxic_pressure_scaled  5407 non-null    float64 
 7   toxic_pressure_class   232560 non-null  object  
 8   eco_toxic_pressure     5407 non-null    float64 
 9   vuln_index             5755 non-null    float64 
 10  eco_risk_score         468 non-null     float64 
dtypes: float64(8), geometry(1), object(2)
memory usage: 19.5+ MB


# Intento 3. IUCN como base

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np


In [3]:
iucn = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs("EPSG:4326")


In [4]:
iucn = iucn[[
    "sci_name",
    "redlistCategory",
    "geometry"
]].copy()



In [5]:
iucn["iucn_id"] = iucn.index


In [6]:
status_map = {"CR":4, "EN":3, "VU":2, "NT":1}

iucn["vuln"] = (
    iucn["redlistCategory"]
    .map(status_map)
    .fillna(0)
)
iucn

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln
0,Hubbsina turneri,CR,POINT (-101.4795 19.8745),0,4
1,Hubbsina turneri,CR,POINT (-101.7773 19.8256),1,4
2,Hubbsina turneri,CR,POINT (-101.7876 19.8273),2,4
3,Ictalurus mexicanus,VU,POINT (-99.35417 21.98083),3,2
4,Ictalurus mexicanus,VU,POINT (-99.3 22),4,2
...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-3.00185 5.14407),70268,2
70269,Macrobrachium thysi,VU,POINT (-3.52102 5.49955),70269,2
70270,Macrobrachium thysi,VU,POINT (-3.51293 5.49003),70270,2
70271,Macrobrachium thysi,VU,POINT (-4.12282 5.40212),70271,2


In [7]:
noaa = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg").to_crs("EPSG:4326")


In [8]:
noaa_valid = noaa[noaa["microplastics_measurement"].notna()].copy()
noaa_nan   = noaa[noaa["microplastics_measurement"].isna()].copy()
noaa_nan

Unnamed: 0,microplastics_measurement,unit,concentration_class_range,mesh_size_mm,lat,lon,geometry
13219,,pieces/10 mins,2-40,,27.2049,-97.3645,POINT (-97.3645 27.2049)
13220,,pieces/10 mins,40-200,,27.4147,-97.3016,POINT (-97.3016 27.4147)
13221,,pieces/10 mins,40-200,,27.6057,-97.2077,POINT (-97.2077 27.6057)
13222,,pieces/10 mins,2-40,,26.0983,-97.1623,POINT (-97.1623 26.0983)
13223,,pieces/10 mins,1-2,,27.8322,-97.3784,POINT (-97.3784 27.8322)
...,...,...,...,...,...,...,...
18339,,pieces/10 mins,2-40,,29.3657,-94.8122,POINT (-94.8122 29.3657)
18340,,pieces/10 mins,2-40,,29.3351,-94.7287,POINT (-94.7287 29.3351)
18341,,pieces/10 mins,40-200,,29.3360,-94.7352,POINT (-94.7352 29.336)
18342,,pieces/10 mins,2-40,,32.7114,-96.9773,POINT (-96.9773 32.7114)


In [9]:
noaa = noaa_valid[[
    "microplastics_measurement",
    "geometry"
]].copy()

noaa = noaa.rename(columns={
    "microplastics_measurement": "mp_pieces_m3"
})


In [10]:
# Asignar microplásticos por cercanía
# Pasar a metros
iucn_m = iucn.to_crs("EPSG:3857")
noaa_m = noaa.to_crs("EPSG:3857")


index_right  → índice del punto NOAA

mp_pieces_m3 → medición NOAA

dist_m       → distancia (igual para todos los empates)


In [11]:
iucn_noaa = gpd.sjoin_nearest(
    iucn_m,
    noaa_m,
    how="left",
    distance_col="dist_m"
)
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13101,0.000000,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13103,1410.437236,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842
...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829


In [12]:
iucn_noaa_clean = (
    iucn_noaa
    .drop_duplicates(subset="iucn_id", keep="first")
)


In [13]:
iucn_noaa_clean["iucn_id"].value_counts().max()


np.int64(1)

In [14]:
iucn_noaa = iucn_noaa_clean
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13102,705.218618,237235.709534
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13033,2115.655853,176956.192726
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13033,2115.655853,170585.675721
...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829


In [15]:
# Ajustar exposición por distancia
iucn_noaa["distance_km"] = iucn_noaa["dist_m"] / 1000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [16]:
# evitar asumir que un muestreo lejano afecta igual.
DECAY_KM = 50  

iucn_noaa["mp_effective_m3"] = (
    iucn_noaa["mp_pieces_m3"] *
    np.exp(- iucn_noaa["distance_km"] / DECAY_KM)
)
iucn_noaa

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273,244.627454,5.290743e+00
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842,237.016490,6.160624e+00
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13102,705.218618,237235.709534,237.235710,6.133672e+00
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13033,2115.655853,176956.192726,176.956193,6.143602e+01
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13033,2115.655853,170585.675721,170.585676,6.978413e+01
...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09


In [17]:
tomex = pd.read_csv("../data/Raw/ToMEx_sp_ml.csv")


In [18]:
tomex_effect = tomex[tomex["Effect"] == "Yes"]


In [19]:
eco_tox_threshold = tomex_effect["Unaligned Dose Values"].median()
log_eco_tox_threshold = np.log10(eco_tox_threshold)


In [20]:
iucn_noaa["mp_particles_ml"] = (
    iucn_noaa["mp_effective_m3"] / 1_000_000
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [21]:
iucn_noaa["log_mp_particles_ml"] = np.log10(
    iucn_noaa["mp_particles_ml"] + 1e-12
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [22]:
iucn_noaa["log_toxic_pressure"] = (
    iucn_noaa["log_mp_particles_ml"] - log_eco_tox_threshold
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [23]:
def pressure_class(x):
    if x > -3:
        return "high"
    elif x > -6:
        return "medium"
    else:
        return "low"

iucn_noaa["toxic_pressure_class"] = iucn_noaa["log_toxic_pressure"].apply(pressure_class)
iucn_noaa.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_class
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273,244.627454,5.290743,5e-06,-5.276483,-7.072363,low
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842,237.01649,6.160624,6e-06,-5.210375,-7.006255,low
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13102,705.218618,237235.709534,237.23571,6.133672,6e-06,-5.212279,-7.008159,low
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13033,2115.655853,176956.192726,176.956193,61.436019,6.1e-05,-4.211577,-6.007457,low
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13033,2115.655853,170585.675721,170.585676,69.78413,7e-05,-4.156243,-5.952123,medium


In [24]:
iucn_noaa["eco_risk_score"] = (
    iucn_noaa["log_toxic_pressure"] *
    iucn_noaa["vuln"]
)

def risk_class(x):
    if x <= -27.6:
        return "low"
    elif x <= -10.9:
        return "medium"
    else:
        return "high"

iucn_noaa["eco_risk_class"] = iucn_noaa["eco_risk_score"].apply(risk_class)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [25]:
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure_class,eco_risk_score,eco_risk_class
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273,244.627454,5.290743e+00,5.290743e-06,-5.276483,-7.072363,low,-28.289453,low
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842,237.016490,6.160624e+00,6.160624e-06,-5.210375,-7.006255,low,-28.025021,low
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13102,705.218618,237235.709534,237.235710,6.133672e+00,6.133672e-06,-5.212279,-7.008159,low,-28.032637,low
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13033,2115.655853,176956.192726,176.956193,6.143602e+01,6.143602e-05,-4.211577,-6.007457,low,-12.014914,medium
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13033,2115.655853,170585.675721,170.585676,6.978413e+01,6.978413e-05,-4.156243,-5.952123,medium,-11.904247,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10,1.809228e-16,-11.999921,-13.795801,low,-27.591603,medium
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10,3.291880e-16,-11.999857,-13.795737,low,-27.591474,medium
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10,3.278966e-16,-11.999858,-13.795738,low,-27.591475,medium
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09,1.139946e-15,-11.999505,-13.795385,low,-27.590770,medium


In [26]:
iucn_noaa["eco_risk_score"].describe()

count    70273.000000
mean       -18.532688
std         10.087369
min        -55.183520
25%        -27.591760
50%        -13.795880
75%        -10.943509
max         -2.628144
Name: eco_risk_score, dtype: float64

In [27]:
iucn_noaa["eco_risk_class"].value_counts()

eco_risk_class
medium    45723
high      16961
low        7589
Name: count, dtype: int64

In [28]:
iucn_noaa.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 70273 entries, 0 to 70272
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype   
---  ------                --------------  -----   
 0   sci_name              70273 non-null  object  
 1   redlistCategory       70273 non-null  object  
 2   geometry              70273 non-null  geometry
 3   iucn_id               70273 non-null  int64   
 4   vuln                  70273 non-null  int64   
 5   index_right           70273 non-null  int64   
 6   mp_pieces_m3          70273 non-null  float64 
 7   dist_m                70273 non-null  float64 
 8   distance_km           70273 non-null  float64 
 9   mp_effective_m3       70273 non-null  float64 
 10  mp_particles_ml       70273 non-null  float64 
 11  log_mp_particles_ml   70273 non-null  float64 
 12  log_toxic_pressure    70273 non-null  float64 
 13  toxic_pressure_class  70273 non-null  object  
 14  eco_risk_score        70273 non-null  float64 
 15 

In [29]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 100))

iucn_noaa["eco_risk_index"] = scaler.fit_transform(
    iucn_noaa[["eco_risk_score"]]
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [30]:

def risk_class(x):
    if x <= 20:
        return "very low"
    elif x <= 50:
        return "low"
    elif x <= 80:
        return "medium"
    else:
        return "high"

iucn_noaa["eco_risk_class"] = iucn_noaa["eco_risk_index"].apply(risk_class)
iucn_noaa["eco_risk_class"].value_counts()

eco_risk_class
medium      33813
high        29159
low          6321
very low      980
Name: count, dtype: int64

In [31]:
iucn_noaa.columns

Index(['sci_name', 'redlistCategory', 'geometry', 'iucn_id', 'vuln',
       'index_right', 'mp_pieces_m3', 'dist_m', 'distance_km',
       'mp_effective_m3', 'mp_particles_ml', 'log_mp_particles_ml',
       'log_toxic_pressure', 'toxic_pressure_class', 'eco_risk_score',
       'eco_risk_class', 'eco_risk_index'],
      dtype='object')

In [32]:
new_order = [
    "iucn_id",
    "sci_name",
    "vuln",
    "log_mp_particles_ml",
    "distance_km",
    "log_toxic_pressure",
    "eco_risk_score",
    "eco_risk_index",
    "eco_risk_class",
    "geometry"
]

dataset = iucn_noaa[new_order]



In [33]:
dataset

Unnamed: 0,iucn_id,sci_name,vuln,log_mp_particles_ml,distance_km,log_toxic_pressure,eco_risk_score,eco_risk_index,eco_risk_class,geometry
0,0,Hubbsina turneri,4,-5.276483,244.627454,-7.072363,-28.289453,51.172818,medium,POINT (-11296646.266 2258169.64)
1,1,Hubbsina turneri,4,-5.210375,237.016490,-7.006255,-28.025021,51.675967,medium,POINT (-11329797.21 2252382.257)
2,2,Hubbsina turneri,4,-5.212279,237.235710,-7.008159,-28.032637,51.661475,medium,POINT (-11330943.801 2252583.424)
3,3,Ictalurus mexicanus,2,-4.211577,176.956193,-6.007457,-12.014914,82.139277,high,POINT (-11060055.613 2509223.803)
4,4,Ictalurus mexicanus,2,-4.156243,170.585676,-5.952123,-11.904247,82.349850,high,POINT (-11054025.436 2511525.235)
...,...,...,...,...,...,...,...,...,...,...
70268,70268,Macrobrachium thysi,2,-11.999921,959.001335,-13.795801,-27.591603,52.500656,medium,POINT (-334164.413 573406.106)
70269,70269,Macrobrachium thysi,2,-11.999857,929.073411,-13.795737,-27.591474,52.500901,medium,POINT (-391958.153 613149.338)
70270,70270,Macrobrachium thysi,2,-11.999858,929.269937,-13.795738,-27.591475,52.500899,medium,POINT (-391057.579 612084.684)
70271,70271,Macrobrachium thysi,2,-11.999505,866.968072,-13.795385,-27.590770,52.502240,medium,POINT (-458950.223 602254.212)


In [34]:
# Guardar

dataset.to_file("../data/dataset_unificado.gpkg", layer="ecol_risk", driver="GPKG")
dataset.to_parquet("../data/dataset_unificado.parquet")


In [35]:
dataset_csv = dataset.copy()
dataset_csv["geometry"] = dataset_csv.geometry.to_wkt()
dataset_csv.to_csv("../data/dataset_unificado_csv.csv", index=False)


  dataset_csv["geometry"] = dataset_csv.geometry.to_wkt()
