In [123]:
import folium
import geopandas as gpd
from tqdm import tqdm
import pandas as pd
from shapely.geometry import Point
from geopandas.tools import sjoin_nearest
import geojson
import numpy as np

In [None]:
# Load your shapefile using geopandas
shapefile_path = "../../data/raw/parametres_ponctuels/teneur_rmqs_pedo.shp"
soil_sites = gpd.read_file(shapefile_path)


In [None]:
# Create a Folium map centered on France
map_france = folium.Map(
    location=[46.2276, 2.2137], zoom_start=6
)  # Coordinates for France

# Add the GeoDataFrame to the map
folium.GeoJson(
    soil_sites,
    name="geojson",
    popup=True,
    marker=folium.Circle(
        radius=8, fill_color="violet", fill_opacity=0.8, color="black", weight=4
    ),
).add_to(map_france)

# Add a layer control panel to the map
folium.LayerControl().add_to(map_france)

# Save the map or display it
# map_france.save('map_of_france_with_shapefile.html')  # Saves the map to an HTML file
map_france  # Uncomment to display the map in a Jupyter notebook

In [None]:
def find_closest_geometry(coords, shapefile):
    # Create a list to store the results
    results = pd.DataFrame()

    # Iterate over each coordinate
    for i in tqdm(range(len(coords))):
        print(i)
        print(coords[i])
        nearest = sjoin_nearest(shapefile, coords[i], distance_col="distance")
        results = pd.concat([results, nearest])

    return df

In [None]:
find_closest_geometry(df_gpdf, gdf)

## Load Coordinates

In [None]:
# Get site coordinates
site_coordinates = pd.read_csv("../00_process_nfi_data/nfi_final_sites_with_idp.csv")
site_coordinates[:3]

In [None]:
import geopandas

df_gpdf = geopandas.GeoDataFrame(
    site_coordinates,
    geometry=geopandas.points_from_xy(site_coordinates.x_fr, site_coordinates.y_fr),
    crs="EPSG:2154",
)[["idp", "geometry"]]

df_gpdf

***

In [None]:
df_sites = geopandas.GeoDataFrame(
    site_coordinates,
    geometry=geopandas.points_from_xy(site_coordinates.x_fr, site_coordinates.y_fr),
    crs="EPSG:2154",
)[["idp", "geometry"]]

df_sites[:3]

In [None]:
soil_sites[:3]

In [None]:
df_all = pd.DataFrame()

for i in tqdm(range(len(df_sites))):
    tmp_soils = soil_sites.copy()
    i_site = df_sites.head(i)

    # convert to a meter projection
    tmp_soils.copy().to_crs(epsg=3857, inplace=True)
    i_site.copy().to_crs(epsg=3857, inplace=True)

    # Get distance between site and soil polygon
    tmp_soils["rmqs_dist"] = tmp_soils.geometry.apply(
        lambda x: i_site.distance(x).min()
    )

    # Get closest soil polygon
    tmp_soils = (
        tmp_soils.sort_values(by="rmqs_dist")
        .head(1)
        .drop(columns=["geometry"])
        .reset_index(drop=True)
    )

    # Concatenate with site idp (reset index first to avoid issues)
    i_site = i_site.reset_index(drop=True)
    i_df = pd.concat([i_site["idp"], tmp_soils], axis=1)
    df_all = pd.concat([df_all, i_df])

In [None]:
df_all

In [None]:
tmp_soils["rmqs_dist"] = tmp_soils.geometry.apply(lambda x: i_site.distance(x).min())

In [None]:
i_site[["idp"]]

***

In [569]:
# Load soil data shapefile
shapefile_path = "../../data/raw/parametres_ponctuels/teneur_rmqs_pedo.shp"
soil_sites = gpd.read_file(shapefile_path)
# soil_sites.to_crs(epsg=3857, inplace=True)

In [570]:
# Get site coordinates
site_coordinates = pd.read_csv("../00_process_nfi_data/nfi_final_sites_with_idp.csv")
df_sites = site_coordinates.copy()[["idp", "x_fr", "y_fr"]]
df_sites = geopandas.GeoDataFrame(
    df_sites,
    geometry=geopandas.points_from_xy(df_sites.x_fr, df_sites.y_fr),
    crs="EPSG:2154",
)

In [220]:
# # Load site coordinates
# df_sites = geopandas.GeoDataFrame(
#     site_coordinates,
#     geometry=geopandas.points_from_xy(site_coordinates.x_fr, site_coordinates.y_fr),
#     crs="EPSG:2154",
# )[["idp", "geometry", "x_fr", "y_fr"]]
# df_sites.copy().to_crs(epsg=3857, inplace=True)
# df_sites.head(1)

In [240]:
# Attach grouping variable to df_sites repeating 1 to 10
df_sites["group"] = np.arange(len(df_sites)) % 10 + 1

# # Put df_sites into groups of ten and then create a list of the groups
groups = df_sites.groupby("group")
groups = [group for _, group in groups]
groups[0]

Unnamed: 0,idp,x_fr,y_fr,group
0,632691,267415.027897,6.820144e+06,1
10,713058,958311.094824,6.396701e+06,1
20,734911,477297.253701,6.822424e+06,1
30,741957,740940.094784,7.036101e+06,1
40,769165,638765.886586,6.533296e+06,1
...,...,...,...,...
39980,1131012,919329.773635,6.278157e+06,1
39990,1131165,593044.875913,6.203004e+06,1
40000,1131248,572069.624116,6.442935e+06,1
40010,1131342,834709.550002,6.780446e+06,1


In [450]:
soil_sites

Unnamed: 0,id_site,date_compl,code_dept,no_campagn,argile_0,argile_1,argile_2,limon_fin_,limon_fin0,limon_fin1,...,na_ech_0,na_ech_1,na_ech_2,na_tot_0,na_tot_1,na_tot_2,p_ass_0,p_ass_1,p_ass_2,geometry
0,745,2000-06-27,45,1,,391.0,493.0,,111.0,95.0,...,,0.0380,0.0460,,0.090,0.090,,0.078,0.0060,POINT (282468.827 6104710.143)
1,743,2000-07-29,45,1,,81.0,280.0,,59.0,62.0,...,,0.0180,0.0320,,0.220,0.200,,0.084,0.0250,POINT (234750.599 6104702.172)
2,741,2000-08-01,45,1,,307.0,387.0,,319.0,282.0,...,,0.0780,0.1040,,0.470,0.440,,0.053,0.0080,POINT (187033.875 6104432.885)
3,742,2000-09-07,45,1,,154.0,183.0,,77.0,71.0,...,,0.0190,0.0360,,0.200,0.190,,0.183,0.0360,POINT (210891.863 6104600.169)
4,800,2000-09-20,45,1,,54.0,63.0,,77.0,97.0,...,,0.0600,0.0120,,0.290,0.340,,0.108,0.0680,POINT (282408.009 6080811.431)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2143,2343,2009-06-12,2B,1,,202.0,259.0,,285.0,348.0,...,,0.2030,0.7060,,2.150,2.100,,0.006,0.0025,POINT (1052556.698 5159827.114)
2144,2350,2009-06-12,2A,1,,110.0,,,94.0,,...,,0.3410,,,2.350,,,0.080,,POINT (984957.595 5122644.476)
2145,2334,2009-06-15,2A,1,,167.0,,,141.0,,...,,0.1310,,,1.540,,,0.021,,POINT (968872.028 5188669.220)
2146,1397,2009-06-18,73,1,,186.0,214.0,,136.0,126.0,...,,0.0308,0.0313,,0.892,0.847,,0.013,0.0060,POINT (649100.346 5767225.265)


In [592]:
def extract_closest_soil_polygon_parallel(group_in, soil_sites):
    df_all = pd.DataFrame()

    # Loop over every location in the group
    for i in tqdm(range(len(group_in))):
        # for i in range(len(group_in)):

        # Get copy of soil data
        tmp_soils = soil_sites.copy().reset_index(drop=True)

        # Slice group_in at ith location
        df_ith = pd.DataFrame(groups[0].iloc[i]).T.reset_index(drop=True)

        # Turn into geodataframe
        df_ith = geopandas.GeoDataFrame(
            df_ith,
            geometry=geopandas.points_from_xy(df_ith.x_fr, df_ith.y_fr),
            crs="EPSG:2154",
        )[["idp", "geometry"]]

        # Turn to meter projection
        df_ith.to_crs(epsg=3857, inplace=True)
        tmp_soils.to_crs(epsg=3857, inplace=True)

        # Calculate distances and find the minimum
        point_to_compare = df_ith.geometry[0]
        distances = tmp_soils.distance(point_to_compare, align=False)
        min_dist_index = distances.idxmin()

        # Extract the row with the minimum distance
        closest_row = pd.DataFrame(tmp_soils.loc[min_dist_index]).T
        df_ith["rmqs_distance"] = distances[min_dist_index]

        # print(f"----------------------------------")
        # print(f"point_to_compare idp {df_ith['idp'][0]}: ", end="\t")
        # print(point_to_compare)
        # print(
        #     f"closest point site_id {closest_row.iloc[0]['id_site']}:\t{closest_row.iloc[0]['geometry']} is {df_ith.iloc[0]['rmqs_distance']} away"
        # )
        # display(closest_row)

        # Reset index and concatenate (drop all geometry columns because they are in wrong EPSG)
        closest_row = closest_row.drop(columns=["geometry"]).reset_index(drop=True)
        df_ith = df_ith.reset_index(drop=True)

        df_ith = pd.concat([df_ith[["idp", "rmqs_distance"]], closest_row], axis=1)

        # Attach to df_all
        df_all = pd.concat([df_all, df_ith], axis=0)

        # if i == 3:
        #     break

    return df_all

In [593]:
# Test for one group
df_tmp = extract_closest_soil_polygon_parallel(groups[0][:10], soil_sites)
# df_tmp.insert(3, "rmqs_dist", df_tmp.pop("rmqs_dist"))
df_tmp

100%|██████████| 10/10 [00:00<00:00, 156.70it/s]


Unnamed: 0,idp,rmqs_distance,id_site,date_compl,code_dept,no_campagn,argile_0,argile_1,argile_2,limon_fin_,...,n_tot_2,na_ech_0,na_ech_1,na_ech_2,na_tot_0,na_tot_1,na_tot_2,p_ass_0,p_ass_1,p_ass_2
0,632691.0,5710.529608,546,2006-10-19,22,1,,187.0,177.0,,...,0.78,,0.102,0.057,,0.53,0.53,,0.181,0.022
0,713058.0,11162.362385,1773,2005-08-31,5,1,,235.0,,,...,,,0.031,,,0.96,,,0.021,
0,734911.0,12432.576757,559,2006-06-29,61,1,,288.0,150.0,,...,0.58,,0.106,0.059,,0.61,0.61,,0.022,0.003
0,741957.0,10649.880518,43,2003-02-20,59,1,,166.0,204.0,,...,0.678,,0.067,0.083,,0.73,0.68,,0.107,0.051
0,769165.0,15033.195627,1420,2006-11-07,23,1,,139.0,145.0,,...,1.18,,0.034,0.024,,0.96,0.94,,0.012,0.005
0,801068.0,2268.90351,1767,2007-03-15,26,1,,98.0,110.0,,...,0.37,,0.021,0.03,,0.12,0.12,,0.005,0.005
0,818182.0,12877.548312,1137,2003-05-16,58,1,,73.0,55.0,,...,0.39,,0.02,0.02,,1.2,1.15,,0.029,0.021
0,828947.0,14682.153076,1387,2006-09-20,42,1,,174.0,134.0,,...,0.7,,0.028,0.022,,0.97,1.09,,0.225,0.038
0,843230.0,7412.26533,1550,2007-06-20,43,1,,137.0,,,...,,,0.037,,,1.22,,,0.016,
0,857883.0,11142.663382,114,2008-06-17,8,1,,199.0,228.0,,...,0.92,,0.049,0.039,,0.47,0.46,,0.02,0.012


In [594]:
df_tmp2 = pd.DataFrame(df_tmp).reset_index(drop=True)
df_final = pd.merge(df_sites, df_tmp2, on="idp", how="right")
df_final_geometry = geopandas.GeoDataFrame(
    df_final,
    geometry=geopandas.points_from_xy(df_final.x_fr, df_final.y_fr),
    crs="EPSG:2154",
)

# df_final
df_final_geometry

Unnamed: 0,idp,x_fr,y_fr,rmqs_distance,id_site,date_compl,code_dept,no_campagn,argile_0,argile_1,...,na_ech_0,na_ech_1,na_ech_2,na_tot_0,na_tot_1,na_tot_2,p_ass_0,p_ass_1,p_ass_2,geometry
0,632691,267415.027897,6820144.0,5710.529608,546,2006-10-19,22,1,,187.0,...,,0.102,0.057,,0.53,0.53,,0.181,0.022,POINT (267415.028 6820144.094)
1,713058,958311.094824,6396701.0,11162.362385,1773,2005-08-31,5,1,,235.0,...,,0.031,,,0.96,,,0.021,,POINT (958311.095 6396700.692)
2,734911,477297.253701,6822424.0,12432.576757,559,2006-06-29,61,1,,288.0,...,,0.106,0.059,,0.61,0.61,,0.022,0.003,POINT (477297.254 6822424.140)
3,741957,740940.094784,7036101.0,10649.880518,43,2003-02-20,59,1,,166.0,...,,0.067,0.083,,0.73,0.68,,0.107,0.051,POINT (740940.095 7036100.997)
4,769165,638765.886586,6533296.0,15033.195627,1420,2006-11-07,23,1,,139.0,...,,0.034,0.024,,0.96,0.94,,0.012,0.005,POINT (638765.887 6533296.104)
5,801068,870402.944033,6397456.0,2268.90351,1767,2007-03-15,26,1,,98.0,...,,0.021,0.03,,0.12,0.12,,0.005,0.005,POINT (870402.944 6397455.775)
6,818182,721618.229602,6642508.0,12877.548312,1137,2003-05-16,58,1,,73.0,...,,0.02,0.02,,1.2,1.15,,0.029,0.021,POINT (721618.230 6642508.032)
7,828947,752795.452206,6548321.0,14682.153076,1387,2006-09-20,42,1,,174.0,...,,0.028,0.022,,0.97,1.09,,0.225,0.038,POINT (752795.452 6548321.094)
8,843230,748189.699584,6476423.0,7412.26533,1550,2007-06-20,43,1,,137.0,...,,0.037,,,1.22,,,0.016,,POINT (748189.700 6476423.351)
9,857883,834342.954757,6971335.0,11142.663382,114,2008-06-17,8,1,,199.0,...,,0.049,0.039,,0.47,0.46,,0.02,0.012,POINT (834342.955 6971335.345)


In [596]:
from folium.plugins import MousePosition

# Create a Folium map centered on France
map_france = folium.Map(
    location=[46.2276, 2.2137], zoom_start=6
)  # Coordinates for France

# Add the GeoDataFrame to the map
folium.GeoJson(
    soil_sites_copy,
    name="geojson",
    popup=folium.GeoJsonPopup(fields=["id_site", "geom"]),
    marker=folium.Circle(
        radius=8, fill_color="black", fill_opacity=0.8, color="black", weight=4
    ),
).add_to(map_france)

# Add the GeoDataFrame to the map
df_final_geometry["id_site"] = df_final_geometry["id_site"].astype(str)
df_final_geometry["rmqs_distance"] = df_final_geometry["rmqs_distance"].astype(str)
df_final_geometry["idp"] = df_final_geometry["idp"].astype(str)

folium.GeoJson(
    df_final_geometry,
    name="geojson",
    popup=folium.GeoJsonPopup(
        fields=["id_site", "rmqs_distance", "idp", "x_fr", "y_fr"]
    ),
    marker=folium.Circle(
        radius=8, fill_color="red", fill_opacity=0.8, color="red", weight=4
    ),
).add_to(map_france)

# Add a layer control panel to the map
folium.LayerControl().add_to(map_france)

# Add MousePosition plugin to show coordinates
MousePosition().add_to(map_france)

# Save the map or display it
# map_france.save('map_of_france_with_shapefile.html')  # Saves the map to an HTML file
map_france  # Uncomment to display the map in a Jupyter notebook