In [255]:
import pandas as pd
import numpy as np



In [256]:
noaa = pd.read_csv('data/GeoDataFrame/dataframe/df_mar.csv')
iucn = pd.read_csv('data/GeoDataFrame/dataframe/df_sp.csv')
ecotaxa = pd.read_csv('data/metadata_img.csv')
tomexhuman = pd.read_csv('data/processed/human_summary_processed.csv')
tomexspecies = pd.read_csv('data/processed/species_summary_processed.csv')

  ecotaxa = pd.read_csv('data/metadata_img.csv')


In [257]:
print("LISTA COMPLETA DE VARIABLES")
for i, columna in enumerate(ecotaxa.columns):
    print(f"{i+1:2d}. {columna}")

LISTA COMPLETA DE VARIABLES
 1. object_id
 2. object_lat
 3. object_lon
 4. object_date
 5. object_time
 6. object_link
 7. object_depth_min
 8. object_depth_max
 9. object_annotation_status
10. object_annotation_person_name
11. object_annotation_person_email
12. object_annotation_date
13. object_annotation_time
14. object_annotation_category
15. object_annotation_hierarchy
16. complement_info
17. object_lat_end
18. object_lon_end
19. object_area
20. object_mean
21. object_stddev
22. object_mode
23. object_min
24. object_max
25. object_x
26. object_y
27. object_xm
28. object_ym
29. object_perim.
30. object_bx
31. object_by
32. object_width
33. object_height
34. object_major
35. object_minor
36. object_angle
37. object_circ.
38. object_feret
39. object_intden
40. object_median
41. object_skew
42. object_kurt
43. object_%area
44. object_xstart
45. object_ystart
46. object_area_exc
47. object_fractal
48. object_skelarea
49. object_slope
50. object_histcum1
51. object_histcum2
52. object_h

In [258]:
GRID_SIZE = 0.5  # grados
LAT_MIN, LAT_MAX = -90, 90
LON_MIN, LON_MAX = -180, 180

In [259]:
lat_bins = np.arange(LAT_MIN, LAT_MAX, GRID_SIZE)
lon_bins = np.arange(LON_MIN, LON_MAX, GRID_SIZE)

grid = pd.DataFrame([
    {
        "grid_id": f"{lat}_{lon}",
        "lat_bin": lat,
        "lon_bin": lon,
        "lat_center": lat + GRID_SIZE / 2,
        "lon_center": lon + GRID_SIZE / 2,
    }
    for lat in lat_bins
    for lon in lon_bins
])


In [260]:
# Asignar puntos NOAA a la grilla
def assign_bin(series, grid_size):
    return np.floor(series / grid_size) * grid_size

noaa["lat_bin"] = assign_bin(noaa["lat"], GRID_SIZE)
noaa["lon_bin"] = assign_bin(noaa["lon"], GRID_SIZE)

noaa["grid_id"] = (
    noaa["lat_bin"].astype(str) + "_" + noaa["lon_bin"].astype(str)
)

In [261]:
noaa['mesh_size_mm']

0        0.3350
1        0.3350
2        0.3350
3        0.3350
4        0.3350
          ...  
19317    0.0007
19318    0.0007
19319    0.0007
19320    0.0007
19321    0.0007
Name: mesh_size_mm, Length: 19322, dtype: float64

In [262]:
# Agregar features por celda
noaa_features = (
    noaa
    .groupby("grid_id")
    .agg(
        mp_mean_concentration=("microplastics_measurement", "mean"),
        mp_max_concentration=("microplastics_measurement", "max"),
        mp_count=("microplastics_measurement", "count"),
        mp_type_count=("mesh_size_mm", "nunique")
    )
    .reset_index()
)


In [263]:
# IUCN – VULNERABILIDAD (PUNTOS)
iucn["lat_bin"] = assign_bin(iucn["lat"], GRID_SIZE)
iucn["lon_bin"] = assign_bin(iucn["lon"], GRID_SIZE)

iucn["grid_id"] = (
    iucn["lat_bin"].astype(str) + "_" + iucn["lon_bin"].astype(str)
)


In [264]:
# Categorías → pesos numéricos
risk_weights = {
    "CR": 3,
    "EN": 2,
    "VU": 1,
    "NT": 0
    }

iucn["risk_weight"] = iucn["redlistCategory"].map(risk_weights)

In [265]:
# Agregar por celda
iucn_features = (
    iucn
    .groupby("grid_id")
    .agg(
        iucn_species_count=("sci_name", "nunique"),
        iucn_mean_risk=("risk_weight", "mean"),
        iucn_max_risk=("risk_weight", "max")
    )
    .reset_index()
)


In [266]:
# ECOTAXA – MORFOLOGÍA
ecotaxa["lat_bin"] = assign_bin(ecotaxa["object_lat"], GRID_SIZE)
ecotaxa["lon_bin"] = assign_bin(ecotaxa["object_lon"], GRID_SIZE)

ecotaxa["grid_id"] = (
    ecotaxa["lat_bin"].astype(str) + "_" + ecotaxa["lon_bin"].astype(str)
)


In [267]:
def normalize_shape_ecotaxa(s):
    if pd.isna(s):
        return np.nan
    s = s.lower()
    if "fiber" in s or "fibre" in s:
        return "Fiber"
    if "fragment" in s:
        return "Fragment"
    if "sphere" in s or "bead" in s:
        return "Sphere"
    return "Other"

ecotaxa["shape_norm"] = ecotaxa["object_annotation_category"].apply(
    normalize_shape_ecotaxa
)

In [268]:
eco_shape_counts = (
    ecotaxa
    .groupby(["grid_id", "shape_norm"])
    .size()
    .reset_index(name="count")
)


In [269]:
eco_total_counts = (
    eco_shape_counts
    .groupby("grid_id")["count"]
    .sum()
    .reset_index(name="total_count")
)


In [270]:
eco_shape_ratios = (
    eco_shape_counts
    .merge(eco_total_counts, on="grid_id", how="left")
)

eco_shape_ratios["shape_ratio"] = (
    eco_shape_ratios["count"] / eco_shape_ratios["total_count"]
)


In [271]:
eco_shape_wide = (
    eco_shape_ratios
    .pivot(
        index="grid_id",
        columns="shape_norm",
        values="shape_ratio"
    )
    .fillna(0)
    .reset_index()
)


In [272]:
eco_features = (
    ecotaxa
    .groupby("grid_id")
    .agg(
        eco_count=("object_area", "count"),
        eco_mean_size=("object_area", "mean"),
        eco_small_ratio=("object_area", lambda x: (x < 10000).mean()),
        eco_fiber_ratio=("object_annotation_category", lambda x: (x == "fiber").mean())
    )
    .reset_index()
)

eco_features["ecotaxa_present"] = 1



In [273]:
tomexhuman['Effect'].unique()

array(['No', 'Yes'], dtype=object)

In [274]:
def effect_binary(s):
    if pd.isna(s):
        return np.nan
    return 1 if s == "Yes" else 0


In [275]:

tomexspecies["effect_bin"] = tomexspecies["Effect"].apply(effect_binary)
tomexhuman["effect_bin"] = tomexhuman["Effect"].apply(effect_binary)


In [276]:
# Paso 3 — Riesgo por FORMA (Shape)
# Esto nos permitirá asignar peso alto a fibers, por ejemplo.
shape_risk = (
    tomexspecies
    .groupby("Shape")
    .agg(
        shape_effect_rate=("effect_bin", "mean"),
        n_studies=("effect_bin", "count")
    )
    .reset_index()
)

In [277]:
# Construir PESOS DE RIESGO operativos
SHAPE_RISK_WEIGHTS = (
    shape_risk
    .set_index("Shape")["shape_effect_rate"]
    .to_dict()
)


In [278]:
# Paso 5 — Riesgo continuo por TAMAÑO REAL (μm), esto conecta con eco_mean_size
# Esto demuestra: A menor tamaño → mayor tasa de efecto (si ocurre).
size_risk = (
    tomexspecies
    .groupby("Size Category")
    .agg(
        size_effect_rate=("effect_bin", "mean"),
        n_studies=("effect_bin", "count")
    )
    .reset_index()
)

SIZE_RISK_WEIGHTS = (
    size_risk
    .set_index("Size Category")["size_effect_rate"]
    .to_dict()
)



In [279]:
# Riesgo por POLÍMERO
# No se proyecta directamente a la grilla aún, pero: sirve para análisis y sirve para justificar pesos futuros
polymer_risk = (
    tomexspecies
    .dropna(subset=["Polymer", "effect_bin"])
    .groupby("Polymer")
    .agg(
        polymer_effect_rate=("effect_bin", "mean"),
        n_studies=("effect_bin", "count")
    )
    .reset_index()
    .sort_values("polymer_effect_rate", ascending=False)
)


In [280]:
eco_shape_wide.columns


Index(['grid_id', 'Fiber', 'Fragment', 'Other'], dtype='object', name='shape_norm')

In [281]:
for shape in ["Fiber", "Fragment", "Sphere"]:
    if shape not in eco_shape_wide.columns:
        eco_shape_wide[shape] = 0.0


In [282]:
eco_shape_wide["hazard_morphology"] = (
    eco_shape_wide["Fiber"]    * SHAPE_RISK_WEIGHTS.get("Fiber", 0)
  + eco_shape_wide["Fragment"] * SHAPE_RISK_WEIGHTS.get("Fragment", 0)
  + eco_shape_wide["Sphere"]   * SHAPE_RISK_WEIGHTS.get("Sphere", 0)
)


In [283]:
grid_features = (
    grid
    .merge(noaa_features, on="grid_id", how="left")
    .merge(iucn_features, on="grid_id", how="left")
    .merge(eco_features, on="grid_id", how="left")
)



“La integración de la información experimental se realizó a través de un índice de peligrosidad aplicado a variables espaciales agregadas, evitando la asignación directa de datos no georreferenciados.”

In [284]:
grid_features.columns


Index(['grid_id', 'lat_bin', 'lon_bin', 'lat_center', 'lon_center',
       'mp_mean_concentration', 'mp_max_concentration', 'mp_count',
       'mp_type_count', 'iucn_species_count', 'iucn_mean_risk',
       'iucn_max_risk', 'eco_count', 'eco_mean_size', 'eco_small_ratio',
       'eco_fiber_ratio', 'ecotaxa_present'],
      dtype='object')

In [285]:
grid_features


Unnamed: 0,grid_id,lat_bin,lon_bin,lat_center,lon_center,mp_mean_concentration,mp_max_concentration,mp_count,mp_type_count,iucn_species_count,iucn_mean_risk,iucn_max_risk,eco_count,eco_mean_size,eco_small_ratio,eco_fiber_ratio,ecotaxa_present
0,-90.0_-180.0,-90.0,-180.0,-89.75,-179.75,,,,,,,,,,,,
1,-90.0_-179.5,-90.0,-179.5,-89.75,-179.25,,,,,,,,,,,,
2,-90.0_-179.0,-90.0,-179.0,-89.75,-178.75,,,,,,,,,,,,
3,-90.0_-178.5,-90.0,-178.5,-89.75,-178.25,,,,,,,,,,,,
4,-90.0_-178.0,-90.0,-178.0,-89.75,-177.75,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259195,89.5_177.5,89.5,177.5,89.75,177.75,,,,,,,,,,,,
259196,89.5_178.0,89.5,178.0,89.75,178.25,,,,,,,,,,,,
259197,89.5_178.5,89.5,178.5,89.75,178.75,,,,,,,,,,,,
259198,89.5_179.0,89.5,179.0,89.75,179.25,,,,,,,,,,,,


In [286]:
grid_features["has_noaa"] = grid_features["mp_count"].notna().astype(int)
grid_features["has_iucn"] = grid_features["iucn_species_count"].notna().astype(int)
grid_features["has_ecotaxa"] = grid_features["eco_count"].notna().astype(int)


In [287]:
grid_features["has_any_data"] = (
    (grid_features["has_noaa"] == 1)
    | (grid_features["has_iucn"] == 1)
    | (grid_features["has_ecotaxa"] == 1)
).astype(int)


In [288]:
count_cols = [
    "eco_count",
    "mp_count",
    "mp_type_count",
    "iucn_species_count"
]

grid_features[count_cols] = grid_features[count_cols].fillna(0)


In [289]:
grid_features = grid_features.merge(
    eco_shape_wide[["grid_id", "hazard_morphology"]],
    on="grid_id",
    how="left"
)

grid_features["hazard_morphology"] = (
    grid_features["hazard_morphology"].fillna(0)
)

grid_features["hazard_pressure"] = (
    grid_features["mp_mean_concentration"].fillna(0)
)

grid_features["hazard_index"] = (
    grid_features["hazard_pressure"]
    + grid_features["hazard_morphology"]
)



In [290]:
analysis_df = grid_features[grid_features["has_any_data"] == 1].copy()


In [291]:
analysis_df[["hazard_pressure", "hazard_morphology"]].describe()
analysis_df[["hazard_pressure", "hazard_morphology"]].corr()

Unnamed: 0,hazard_pressure,hazard_morphology
hazard_pressure,1.0,-0.002359
hazard_morphology,-0.002359,1.0


In [292]:
np.allclose(
    analysis_df["hazard_index"].values,
    grid_features.loc[analysis_df.index, "hazard_index"].values
)

True

### Camino A, Análisis por capas