# Setting up

In [1]:
import geopandas as gpd
import os
import pandas as pd
from geopy.distance import geodesic
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

os.chdir("/media/marieke/Shared/Chap-1/Model/Scripts/Chap_1_2018-2024")

# Load data

In [2]:
# IMPORTS FICHIERS

# Buffers by replicate group
buff = gpd.read_file("./data/processed_data/eDNA/mtdt_5.gpkg")

# Shore from GeoJSON
shore_gdf = gpd.read_file("./data/raw_data/predictors/Dist_shore/med_regions.geojson")  # GeoJSON of the coast

# Compute buffer centroids

In [3]:
# Convert Polygon to EPSG:2154
buff = buff.to_crs(epsg=2154)

# Save original buffer geometry for later restoration
original_buffers = buff.geometry.copy()

# Compute centroids and retain associated data
points_gdf = buff.copy()
points_gdf['geometry'] = buff.geometry.centroid  # Replace geometry with centroids



# Compute distance to shore

In [7]:
# REPROJECTION EN 2154
points_gdf = points_gdf.to_crs(epsg=2154)
shore_gdf = shore_gdf.to_crs(epsg=2154)






# FUSION POLYGONES REGIONS
shore = shore_gdf.unary_union

# Validate geometries
points_gdf = points_gdf[points_gdf.geometry.is_valid & ~points_gdf.geometry.is_empty]
if shore.is_empty:
    raise ValueError("The shoreline geometry is empty after merging. Check the input GeoJSON file.")







# CALCUL DISTANCE EN METRES AVEC DISTANCE DE SHAPELY
# Handle potential null distances robustly
def calculate_distance(point, shore):
    if point.is_empty or not point.is_valid:
        return float('nan')  # Return NaN for invalid geometries
    return point.distance(shore)

points_gdf["dist_shore_m"] = points_gdf.geometry.apply(lambda point: calculate_distance(point, shore))





# Replace distance = 0 by 1
points_gdf["dist_shore_m"] = points_gdf["dist_shore_m"].replace(0, 1)



####################### Important methodological considerations #################################################
# Important methodological considerations : 
# Distance to shore is computed from replicates group's buffer centroids.
# When a centroid is on land (which happened for ~ 67/792 centroids) we set the distance to shore to 1 meter 
# because it was actually not sampled on land (obviously) and it ended up there 
# only because we simplified transects into straight lines between transect start and end points.
##################################################################################################################





# AFFICHAGE RESULTATS
print(points_gdf["dist_shore_m"])






# ENREGISTREMENT

# Restore original buffer geometries before saving
points_gdf['geometry'] = original_buffers

# Save to GeoPackage
points_gdf.to_file("./data/processed_data/predictors/mtdt_5_dist-shore.gpkg", driver="GPKG")



0       43.899096
1        1.000000
2        4.632290
3      109.018054
4        1.000000
          ...    
787     20.480453
788      1.000000
789    300.710796
790    101.061584
791      1.000000
Name: dist_shore_m, Length: 792, dtype: float64


In [8]:
# Check number of 0s and number of 1s to see if properly replaced

counts = points_gdf["dist_shore_m"].value_counts()
print(counts)


dist_shore_m
1.000000       66
59.256330      11
297.272206     10
57.604770       8
1026.162217     6
               ..
885.984862      1
685.946551      1
850.939588      1
395.758102      1
101.061584      1
Name: count, Length: 646, dtype: int64
