In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

# Paso 1: Dataset Base: IUCN.

In [3]:
# Leemos el dataset, que viene en puntos
# El CRS EPSG:4326 significa latitud/longitud
iucn = gpd.read_file("../data/GeoDataFrame/gdf_species.gpkg").to_crs("EPSG:4326") 

In [4]:
# Seleccionamos las columnas que nos interesan
iucn = iucn[[
    "sci_name",
    "redlistCategory",
    "geometry"
]].copy()

In [5]:
# Creamos una columna con el ID
iucn["iucn_id"] = iucn.index

In [6]:
# Creamos columna con el índice de vulneravilidad:

status_map = {"CR":4, "EN":3, "VU":2, "NT":1}

iucn["vuln"] = (
    iucn["redlistCategory"]
    .map(status_map)
    .fillna(0)
)
iucn

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln
0,Hubbsina turneri,CR,POINT (-101.4795 19.8745),0,4
1,Hubbsina turneri,CR,POINT (-101.7773 19.8256),1,4
2,Hubbsina turneri,CR,POINT (-101.7876 19.8273),2,4
3,Ictalurus mexicanus,VU,POINT (-99.35417 21.98083),3,2
4,Ictalurus mexicanus,VU,POINT (-99.3 22),4,2
...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-3.00185 5.14407),70268,2
70269,Macrobrachium thysi,VU,POINT (-3.52102 5.49955),70269,2
70270,Macrobrachium thysi,VU,POINT (-3.51293 5.49003),70270,2
70271,Macrobrachium thysi,VU,POINT (-4.12282 5.40212),70271,2


Salida:

iucn = dataset base, ≥60k filas

# Paso 2. NOAA (microplásticos)

In [7]:
# NOAA también son puntos
noaa = gpd.read_file("../data/GeoDataFrame/gdf_microplastics.gpkg").to_crs("EPSG:4326")
noaa

Unnamed: 0,microplastics_measurement,unit,concentration_class_range,mesh_size_mm,lat,lon,geometry
0,0.000000,pieces/m3,0-0.0005,0.3350,45.280000,-60.290000,POINT (-60.29 45.28)
1,0.002276,pieces/m3,0.0005-0.005,0.3350,40.930000,-70.650000,POINT (-70.65 40.93)
2,0.004320,pieces/m3,0.0005-0.005,0.3350,40.930000,-70.650000,POINT (-70.65 40.93)
3,0.000000,pieces/m3,0-0.0005,0.3350,40.300000,-69.770000,POINT (-69.77 40.3)
4,0.000000,pieces/m3,0-0.0005,0.3350,39.880000,-67.150000,POINT (-67.15 39.88)
...,...,...,...,...,...,...,...
19317,188.300000,pieces kg-1 d.w.,150-200,0.0007,-5.933333,39.360000,POINT (39.36 -5.93333)
19318,155.270000,pieces kg-1 d.w.,150-200,0.0007,-5.500000,39.120000,POINT (39.12 -5.5)
19319,58.070000,pieces kg-1 d.w.,20-150,0.0007,-6.450000,39.466667,POINT (39.46667 -6.45)
19320,210.000000,pieces kg-1 d.w.,>200,0.0007,-6.320000,39.210000,POINT (39.21 -6.32)


Hay filas en las que "microplastics measurement" es NaN y no nos interesan. Nos quedamos solo con las filas con datos medidos

In [8]:
noaa_valid = noaa[noaa["microplastics_measurement"].notna()].copy()
noaa_nan = noaa[noaa["microplastics_measurement"].isna()].copy()
noaa_nan

Unnamed: 0,microplastics_measurement,unit,concentration_class_range,mesh_size_mm,lat,lon,geometry
13219,,pieces/10 mins,2-40,,27.2049,-97.3645,POINT (-97.3645 27.2049)
13220,,pieces/10 mins,40-200,,27.4147,-97.3016,POINT (-97.3016 27.4147)
13221,,pieces/10 mins,40-200,,27.6057,-97.2077,POINT (-97.2077 27.6057)
13222,,pieces/10 mins,2-40,,26.0983,-97.1623,POINT (-97.1623 26.0983)
13223,,pieces/10 mins,1-2,,27.8322,-97.3784,POINT (-97.3784 27.8322)
...,...,...,...,...,...,...,...
18339,,pieces/10 mins,2-40,,29.3657,-94.8122,POINT (-94.8122 29.3657)
18340,,pieces/10 mins,2-40,,29.3351,-94.7287,POINT (-94.7287 29.3351)
18341,,pieces/10 mins,40-200,,29.3360,-94.7352,POINT (-94.7352 29.336)
18342,,pieces/10 mins,2-40,,32.7114,-96.9773,POINT (-96.9773 32.7114)


Convertimos "noaa_valid" en el nuevo dataset de "noaa"

In [9]:
noaa = noaa_valid[[
    "microplastics_measurement",
    "geometry"
]].copy()

# Renombramos la columna para tener a la vista las unidades de medida
noaa = noaa.rename(columns={
    "microplastics_measurement": "mp_pieces_m3"
})


#### Asignar microplásticos por cercanía

In [10]:
# Pasar a metros
# El CRS EPSG:3857 signiifica proyección métrica
iucn_m = iucn.to_crs("EPSG:3857")
noaa_m = noaa.to_crs("EPSG:3857")

In [11]:
# Nearest spatial join
iucn_noaa = gpd.sjoin_nearest(
    iucn_m,
    noaa_m,
    how="left",
    distance_col="dist_m"
)
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13102,705.218618,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13101,0.000000,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13103,1410.437236,244627.454273
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13102,705.218618,237016.489842
...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829


Ahora cada especie tiene:

mp_pieces_m3 del NOAA más cercano

dist_m (metros)

In [12]:
# Resolvemos duplicados por microplasticos que estan a la misma distancia, nos quedamos con
iucn_noaa_clean = (
    iucn_noaa
    .sort_values("mp_pieces_m3", ascending=False)
    .drop_duplicates(subset="iucn_id", keep="first")
)
iucn_noaa_clean["iucn_id"].value_counts().max() # Comprobación de que no hay duplicados (debe salir 1)

np.int64(1)

Cuando una especie (iucn_id) tiene varias filas (duplicados), nos quedamos con aquella cuyo valor de mp_pieces_m3 es el más alto.

In [13]:
iucn_noaa_clean = iucn_noaa_clean.sort_values("iucn_id")


In [14]:
iucn_noaa = iucn_noaa_clean
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13104,2115.655853,237016.489842
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13104,2115.655853,237235.709534
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13032,2820.874471,176956.192726
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13032,2820.874471,170585.675721
...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829


In [None]:
# Guardado del datset unificado
iucn_noaa.to_csv("../data/processed/iucn_noaa_juntos.csv", index=False)
iucn_noaa.to_file("../data/processed/iucn_noaa.gpkg", driver="GPKG")


# Paso 4. Ajustar exposición por distancia

In [16]:
# Pasamos distancia a kilometros
iucn_noaa["distance_km"] = iucn_noaa["dist_m"] / 1000

### mp_effective = mp_pieces_m3 x e ^ −(distance_km/DECAY_KM)
	​


mp_pieces_m3 es el valor máximo de impacto posible, cuando la distancia es 0 km

distance_km cuanto mayor es esta distancia, menos representativa es esa medición para la especie

DECAY_KM A una distancia igual a km, el efecto se reduce a 1/e ≈ 37%.

e (Número de Euler ~ 2.718) Aparece en procesos donde hay decaimiento continuo y suave

e ^ −(distance_km/DECAY_KM) función de decaimiento exponencial. Asume que el efecto se pierde progresivamente.

In [17]:
''' Explicación gráfica de lo que hace matemáticamente el término exponencial de la fórmula.

impacto
1.0 |\
    | \
0.5 |  \
    |    \
0.1 |      \
    |         \____
    +----------------
        distancia (km)'''


  ''' Explicación gráfica de lo que hace matemáticamente el término exponencial de la fórmula.


' Explicación gráfica de lo que hace matemáticamente el término exponencial de la fórmula.\n\nimpacto\n1.0 |    | 0.5 |      |    0.1 |          |         \\____\n    +----------------\n        distancia (km)'

In [18]:
# Decaimiento exponencial. si el microplástico está a mayor distancia, el impacto va a ser menor
# Evitar asumir que un muestreo lejano afecta igual.

DECAY_KM = 50 

iucn_noaa["mp_effective_m3"] = (
    iucn_noaa["mp_pieces_m3"] *
    np.exp(- iucn_noaa["distance_km"] / DECAY_KM)
)
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273,244.627454,1.587223e+01
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13104,2115.655853,237016.489842,237.016490,1.848187e+01
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13104,2115.655853,237235.709534,237.235710,1.840102e+01
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13032,2820.874471,176956.192726,176.956193,8.191469e+01
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13032,2820.874471,170585.675721,170.585676,9.304551e+01
...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09


# Paso 5. TOMEx: de concentracion (dosis) a toxicidad

In [19]:
pd.set_option("display.max_columns", None)


In [20]:
# Cargar TOMEx species
tomex = pd.read_csv("../data/Raw/ToMEx_sp_ml.csv")
tomex

Unnamed: 0,DOI,Authors,Year,Species,Organism Group,Environment,Life Stage,In vitro/in vivo,Sex,Estimated Body Length (cm),Estimated Maximum Ingestible Size (mm),Experiment Type,Exposure Route,Particle Mix?,Negative Control,Reference Particle,Exposure Media,Solvent,Detergent,pH,Salinity (ppt),Temperature (Avg),Temperature (Min),Temperature (Max),Exposure Duration (days),Recovery (Days),Acute/Chronic,Number of Doses,Replicates,Sample Size,Dosing Frequency,Chemicals Added,Added Chemical Dose (nominal),Added Chemical Dose (measured),Plotted Dose Values,Unaligned Dose Values,Dose Metric,Alignment,Effect,Direction,Broad Endpoint Category,Specific Endpoint Category,Endpoint,Level of Biological Organization,Target Cell or Tissue,Effect Metric,Polymer,Shape,Density (g/cm^3),"Density, reported or estimated",Charge,Zeta Potential (mV),Zeta Potential Media,Functional Group,Particle Length (μm),Particle Width (μm),Size Category,Particle Surface Area (μm^2),Particle Volume (μm^3),Particle Mass (mg),Weathered or Biofouled?,Size Validated?,Polymer Validated?,Shape Validated,Particle Source,Sodium Azide Present?,Screened for Chemical Contamination?,Particle Cleaning?,Solvent Rinse,Background Contamination Monitored?,Concentration Validated?,Particle Behavior,Uptake Validated?,Uptake Validation Method,Tissue Distribution,Organisms Fed?
0,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.341121,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.0,,,21.0,,Chronic,2,8.0,1,2.0,Not Reported,0,,316.036244,316.036244,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.0,2.0,1µm < 100µm,12.566371,4.18879,4.398230e-09,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
1,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.341121,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.0,,,21.0,,Chronic,2,8.0,1,2.0,Not Reported,0,,2523.742669,2523.742669,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.0,2.0,1µm < 100µm,12.566371,4.18879,4.398230e-09,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
2,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.341121,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.0,,,21.0,,Chronic,2,8.0,1,2.0,Not Reported,0,,316.036244,316.036244,Particles/mL,Unaligned,No,Not Reported,Fitness,Reproduction,Number of Offspring,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.0,2.0,1µm < 100µm,12.566371,4.18879,4.398230e-09,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
3,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,adult,In Vivo,Not Reported,0.50,0.341121,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.0,,,21.0,,Chronic,2,8.0,1,2.0,Not Reported,0,,2523.742669,2523.742669,Particles/mL,Unaligned,No,Not Reported,Fitness,Reproduction,Number of Offspring,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.0,2.0,1µm < 100µm,12.566371,4.18879,4.398230e-09,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
4,10.7717/peerj.4601,Aljaibachi & Callaghan,2018,Daphnia magna,Crustacea,Freshwater,early,In Vivo,Not Reported,0.08,0.061585,Particle Only,water,No,Y,N,artificial_medium,Not Reported,Not Reported,,,20.0,,,21.0,,Chronic,2,8.0,1,2.0,Not Reported,0,,316.036244,316.036244,Particles/mL,Unaligned,Yes,increase,Fitness,Mortality,Mortality,Organism,Not Reported,,Polystyrene,Sphere,1.05,reported,,,,COOH,2.0,2.0,1µm < 100µm,12.566371,4.18879,4.398230e-09,No,N,N,N,commercial,No,N,not_cleaned,none,N,N,Not Evaluated,Y,microscopy flourescent particles,gut,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3898,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.0,,,3.0,,Acute,5,1.0,10^7 cells/mL,0.0,Not Reported,0,,4.049116,4.049116,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,LOEC,Polystyrene,Sphere,1.05,reported,,,,,16.5,16.5,1µm < 100µm,855.298600,2352.07115,2.469675e-06,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3899,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.0,,,3.0,,Acute,5,1.0,10^7 cells/mL,0.0,Not Reported,0,,40.491163,40.491163,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.5,16.5,1µm < 100µm,855.298600,2352.07115,2.469675e-06,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3900,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.0,,,3.0,,Acute,5,1.0,10^7 cells/mL,0.0,Not Reported,0,,404.911625,404.911625,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.5,16.5,1µm < 100µm,855.298600,2352.07115,2.469675e-06,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No
3901,10.1016/j.fsi.2021.04.014,Zwollo,2021,Oncorhynchus mykiss,Fish,Freshwater,Not Reported,In Vitro,Not Reported,,,Particle Only,water,No,Yes,No,artificial_medium,DMSO,Not Reported,,,18.0,,,3.0,,Acute,5,1.0,10^7 cells/mL,0.0,Not Reported,0,,4049.116254,4049.116254,Particles/mL,Unaligned,Yes,decrease,Immune,Immune Cells,B cell abundance (immature/mature),Cell,B cell,,Polystyrene,Sphere,1.05,reported,,,,,16.5,16.5,1µm < 100µm,855.298600,2352.07115,2.469675e-06,No,Yes,No,No,commercial,Yes,No,not_cleaned,none,No,No,none,Yes,microscopy,anterior kidney,No


In [21]:
(tomex["Plotted Dose Values"] == tomex["Unaligned Dose Values"]).all()


np.True_

In [22]:
tomex["Sample Size"].value_counts()

Sample Size
1                1054
10                502
20                483
5                 260
6                 150
30                149
4                 141
16                115
3                 110
8                 104
15                 96
50                 74
1000               49
24                 47
300                47
200                39
100                38
2                  35
10^7 cells/mL      35
14                 24
70                 24
9                  17
60                 14
40                 12
12.5               12
17.5               12
65                  8
370                 8
17                  7
18                  5
11                  4
1500                4
12                  4
23                  4
25                  4
68                  2
172                 2
58                  2
19                  2
90                  2
144                 2
28                  2
900                 2
21                  1
Name: count, dtype: 

In [23]:
# Función para convertir a float; si no se puede, devuelve NaN
def parse_sample_size(x):
    try:
        return float(x)
    except:
        return np.nan

# Crear columna numérica
tomex["sample_size_num"] = tomex["Sample Size"].apply(parse_sample_size)

# Filtrar solo filas con números positivos
tomex_clean = tomex[tomex["sample_size_num"].notna() & (tomex["sample_size_num"] > 0)].copy()

tomex_clean["dosis_per_individual"] = tomex_clean["Unaligned Dose Values"] / tomex_clean["sample_size_num"]




In [24]:
# Nos quedeamos con los experimentos en los que se ha observado efecto

tomex_effect = tomex_clean[tomex_clean["Effect"] == "Yes"]

In [25]:
tomex_effect["dosis_per_individual"].describe()


count    1.231000e+03
mean     1.328518e+08
std      2.670751e+09
min      8.461820e-05
25%      5.000000e-01
50%      2.600000e+01
75%      7.500000e+02
max      8.120000e+10
Name: dosis_per_individual, dtype: float64

In [26]:
## Cálculo de umbral ecotoxicologico. Cuando no existe criterio externo el percentil 50 es el default metodológico

eco_tox_threshold = tomex_effect["dosis_per_individual"].median() # Calcula mediana de valores de dosis tóxicas
log_eco_tox_threshold = np.log10(eco_tox_threshold)
log_eco_tox_threshold

np.float64(1.414973347970818)

Hemos calculado un valor central de dosis tóxica
(el 50 % de los valores están por debajo y el 50 % por encima)
Se utilizará como valor de referencia estadístico para comparar patrones relativos de riesgo

Tomamos la dosis tóxica mediana observada en Tomex como un umbral de referencia y la transformamos a escala logarítmica para su análisis

En ecotoxicología esto se usa para:

clasificar (alto vs bajo riesgo relativo)

comparar especies

analizar patrones

In [27]:
# Convertimos unidades de NOAA (particles/m3 -> particles/ml)
iucn_noaa["mp_particles_ml"] = (
    iucn_noaa["mp_effective_m3"] / 1_000_000
)

Para evitar ver ceros, normal al dividir entre un millón, convertimos los valores a escala logarítmica

In [28]:
iucn_noaa["log_mp_particles_ml"] = np.log10(
    iucn_noaa["mp_particles_ml"] + 1e-12
)

In [29]:
# Presión tóxica relativa
iucn_noaa["log_toxic_pressure"] = (
    iucn_noaa["log_mp_particles_ml"] - log_eco_tox_threshold
)

La presión tóxica relativa se calculó como la diferencia entre la concentración logarítmica de microplásticos y el umbral ecotoxicológico de referencia, representando el número de órdenes de magnitud por los que la exposición se sitúa por encima o por debajo del valor central de toxicidad

Las concentraciones ambientales de microplásticos se sitúan, en general, por debajo de las dosis experimentales reportadas en estudios ecotoxicológicos. Por tanto, los valores de presión tóxica relativa resultan negativos y se interpretan como una medida de proximidad al umbral, no como evidencia de toxicidad directa

Los límites se establecieron de forma exploratoria para diferenciar especies más o menos próximas al umbral ecotoxicológico, dada la escala logarítmica de las concentraciones

In [30]:
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273,244.627454,1.587223e+01,1.587223e-05,-4.799362,-6.214335
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13104,2115.655853,237016.489842,237.016490,1.848187e+01,1.848187e-05,-4.733254,-6.148227
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13104,2115.655853,237235.709534,237.235710,1.840102e+01,1.840102e-05,-4.735158,-6.150131
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13032,2820.874471,176956.192726,176.956193,8.191469e+01,8.191469e-05,-4.086638,-5.501612
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13032,2820.874471,170585.675721,170.585676,9.304551e+01,9.304551e-05,-4.031305,-5.446278
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10,1.809228e-16,-11.999921,-13.414895
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10,3.291880e-16,-11.999857,-13.414830
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10,3.278966e-16,-11.999858,-13.414831
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09,1.139946e-15,-11.999505,-13.414479


# Paso 6. Cálculo del riesgo ecológico

In [31]:
iucn_noaa["toxic_pressure"] = 10 ** iucn_noaa["log_toxic_pressure"]

In [32]:
iucn_noaa["eco_risk_score"] = (
    iucn_noaa["toxic_pressure"] *
    iucn_noaa["vuln"]
)

def risk_class(x):
    if x <= -27.6:
        return "low"
    elif x <= -10.9:
        return "medium"
    else:
        return "high"

iucn_noaa["eco_risk_class"] = iucn_noaa["eco_risk_score"].apply(risk_class)
iucn_noaa


Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure,eco_risk_score,eco_risk_class
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273,244.627454,1.587223e+01,1.587223e-05,-4.799362,-6.214335,6.104704e-07,2.441882e-06,high
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13104,2115.655853,237016.489842,237.016490,1.848187e+01,1.848187e-05,-4.733254,-6.148227,7.108413e-07,2.843365e-06,high
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13104,2115.655853,237235.709534,237.235710,1.840102e+01,1.840102e-05,-4.735158,-6.150131,7.077315e-07,2.830926e-06,high
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13032,2820.874471,176956.192726,176.956193,8.191469e+01,8.191469e-05,-4.086638,-5.501612,3.150565e-06,6.301130e-06,high
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13032,2820.874471,170585.675721,170.585676,9.304551e+01,9.304551e-05,-4.031305,-5.446278,3.578673e-06,7.157347e-06,high
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10,1.809228e-16,-11.999921,-13.414895,3.846850e-14,7.693699e-14,high
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10,3.291880e-16,-11.999857,-13.414830,3.847420e-14,7.694840e-14,high
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10,3.278966e-16,-11.999858,-13.414831,3.847415e-14,7.694830e-14,high
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09,1.139946e-15,-11.999505,-13.414479,3.850538e-14,7.701077e-14,high


In [33]:
iucn_noaa["eco_risk_score"].describe()

count    7.027300e+04
mean     3.512512e-06
std      1.170971e-04
min      3.846154e-14
25%      7.692308e-14
50%      7.808434e-12
75%      2.062438e-10
max      1.451458e-02
Name: eco_risk_score, dtype: float64

In [34]:
iucn_noaa["eco_risk_class"].value_counts()

eco_risk_class
high    70273
Name: count, dtype: int64

# Paso 7. Crear Dataset

In [35]:
iucn_noaa.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 70273 entries, 0 to 70272
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   sci_name             70273 non-null  object  
 1   redlistCategory      70273 non-null  object  
 2   geometry             70273 non-null  geometry
 3   iucn_id              70273 non-null  int64   
 4   vuln                 70273 non-null  int64   
 5   index_right          70273 non-null  int64   
 6   mp_pieces_m3         70273 non-null  float64 
 7   dist_m               70273 non-null  float64 
 8   distance_km          70273 non-null  float64 
 9   mp_effective_m3      70273 non-null  float64 
 10  mp_particles_ml      70273 non-null  float64 
 11  log_mp_particles_ml  70273 non-null  float64 
 12  log_toxic_pressure   70273 non-null  float64 
 13  toxic_pressure       70273 non-null  float64 
 14  eco_risk_score       70273 non-null  float64 
 15  eco_risk_class  

In [36]:
# Escalamos los valores para que sean más intuitivos de riesgo ecológico
scaler = MinMaxScaler(feature_range=(0, 100))



iucn_noaa["eco_risk_index"] = scaler.fit_transform(
    iucn_noaa[["eco_risk_score"]]
)

In [37]:
iucn_noaa

Unnamed: 0,sci_name,redlistCategory,geometry,iucn_id,vuln,index_right,mp_pieces_m3,dist_m,distance_km,mp_effective_m3,mp_particles_ml,log_mp_particles_ml,log_toxic_pressure,toxic_pressure,eco_risk_score,eco_risk_class,eco_risk_index
0,Hubbsina turneri,CR,POINT (-11296646.266 2258169.64),0,4,13104,2115.655853,244627.454273,244.627454,1.587223e+01,1.587223e-05,-4.799362,-6.214335,6.104704e-07,2.441882e-06,high,1.682364e-02
1,Hubbsina turneri,CR,POINT (-11329797.21 2252382.257),1,4,13104,2115.655853,237016.489842,237.016490,1.848187e+01,1.848187e-05,-4.733254,-6.148227,7.108413e-07,2.843365e-06,high,1.958971e-02
2,Hubbsina turneri,CR,POINT (-11330943.801 2252583.424),2,4,13104,2115.655853,237235.709534,237.235710,1.840102e+01,1.840102e-05,-4.735158,-6.150131,7.077315e-07,2.830926e-06,high,1.950401e-02
3,Ictalurus mexicanus,VU,POINT (-11060055.613 2509223.803),3,2,13032,2820.874471,176956.192726,176.956193,8.191469e+01,8.191469e-05,-4.086638,-5.501612,3.150565e-06,6.301130e-06,high,4.341240e-02
4,Ictalurus mexicanus,VU,POINT (-11054025.436 2511525.235),4,2,13032,2820.874471,170585.675721,170.585676,9.304551e+01,9.304551e-05,-4.031305,-5.446278,3.578673e-06,7.157347e-06,high,4.931141e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70268,Macrobrachium thysi,VU,POINT (-334164.413 573406.106),70268,2,11468,0.038661,959001.335033,959.001335,1.809228e-10,1.809228e-16,-11.999921,-13.414895,3.846850e-14,7.693699e-14,high,2.650813e-10
70269,Macrobrachium thysi,VU,POINT (-391958.153 613149.338),70269,2,11468,0.038661,929073.410690,929.073411,3.291880e-10,3.291880e-16,-11.999857,-13.414830,3.847420e-14,7.694840e-14,high,2.651599e-10
70270,Macrobrachium thysi,VU,POINT (-391057.579 612084.684),70270,2,11468,0.038661,929269.937379,929.269937,3.278966e-10,3.278966e-16,-11.999858,-13.414831,3.847415e-14,7.694830e-14,high,2.651592e-10
70271,Macrobrachium thysi,VU,POINT (-458950.223 602254.212),70271,2,11468,0.038661,866968.071829,866.968072,1.139946e-09,1.139946e-15,-11.999505,-13.414479,3.850538e-14,7.701077e-14,high,2.655896e-10


In [38]:
iucn_noaa["eco_risk_index"].describe()

count    7.027300e+04
mean     2.419988e-02
std      8.067545e-01
min      0.000000e+00
25%      2.649855e-10
50%      5.353217e-08
75%      1.420677e-06
max      1.000000e+02
Name: eco_risk_index, dtype: float64

In [39]:
q20, q50, q80 = iucn_noaa["eco_risk_index"].quantile([0.2, 0.5, 0.8])
def risk_class(x):
    if x <= q20:
        return "very low"
    elif x <= q50:
        return "low"
    elif x <= q80:
        return "medium"
    else:
        return "high"

iucn_noaa["eco_risk_class"] = iucn_noaa["eco_risk_index"].apply(risk_class)

iucn_noaa["eco_risk_class"].value_counts()

eco_risk_class
very low    21797
medium      21081
high        14055
low         13340
Name: count, dtype: int64

In [40]:
iucn_noaa.columns

Index(['sci_name', 'redlistCategory', 'geometry', 'iucn_id', 'vuln',
       'index_right', 'mp_pieces_m3', 'dist_m', 'distance_km',
       'mp_effective_m3', 'mp_particles_ml', 'log_mp_particles_ml',
       'log_toxic_pressure', 'toxic_pressure', 'eco_risk_score',
       'eco_risk_class', 'eco_risk_index'],
      dtype='object')

In [41]:
new_order = [
    "iucn_id",
    "sci_name",
    "vuln",
    "mp_particles_ml",
    "log_mp_particles_ml",
    "distance_km",
    "log_toxic_pressure",
    "eco_risk_score",
    "eco_risk_index",
    "eco_risk_class",
    "geometry"
]

dataset = iucn_noaa[new_order]


In [42]:
dataset

Unnamed: 0,iucn_id,sci_name,vuln,mp_particles_ml,log_mp_particles_ml,distance_km,log_toxic_pressure,eco_risk_score,eco_risk_index,eco_risk_class,geometry
0,0,Hubbsina turneri,4,1.587223e-05,-4.799362,244.627454,-6.214335,2.441882e-06,1.682364e-02,high,POINT (-11296646.266 2258169.64)
1,1,Hubbsina turneri,4,1.848187e-05,-4.733254,237.016490,-6.148227,2.843365e-06,1.958971e-02,high,POINT (-11329797.21 2252382.257)
2,2,Hubbsina turneri,4,1.840102e-05,-4.735158,237.235710,-6.150131,2.830926e-06,1.950401e-02,high,POINT (-11330943.801 2252583.424)
3,3,Ictalurus mexicanus,2,8.191469e-05,-4.086638,176.956193,-5.501612,6.301130e-06,4.341240e-02,high,POINT (-11060055.613 2509223.803)
4,4,Ictalurus mexicanus,2,9.304551e-05,-4.031305,170.585676,-5.446278,7.157347e-06,4.931141e-02,high,POINT (-11054025.436 2511525.235)
...,...,...,...,...,...,...,...,...,...,...,...
70268,70268,Macrobrachium thysi,2,1.809228e-16,-11.999921,959.001335,-13.414895,7.693699e-14,2.650813e-10,low,POINT (-334164.413 573406.106)
70269,70269,Macrobrachium thysi,2,3.291880e-16,-11.999857,929.073411,-13.414830,7.694840e-14,2.651599e-10,low,POINT (-391958.153 613149.338)
70270,70270,Macrobrachium thysi,2,3.278966e-16,-11.999858,929.269937,-13.414831,7.694830e-14,2.651592e-10,low,POINT (-391057.579 612084.684)
70271,70271,Macrobrachium thysi,2,1.139946e-15,-11.999505,866.968072,-13.414479,7.701077e-14,2.655896e-10,low,POINT (-458950.223 602254.212)


In [43]:
# Guardar

dataset.to_file("../data/dataset.gpkg", layer="ecol_risk", driver="GPKG")
dataset.to_parquet("../data/dataset.parquet")

dataset_csv = dataset.copy()
dataset_csv["geometry"] = dataset_csv.geometry.to_wkt()
dataset_csv.to_csv("../data/dataset_csv.csv", index=False)



KeyboardInterrupt

