In [56]:
import geopandas as gpd
import pandas as pd
import numpy as np

from tqdm import tqdm

training_folder = r'../'
gis_folder = training_folder + r'gis/corse/'
data_folder = training_folder + r'data/corse/'

# chargement des données

In [57]:
nodes = gpd.read_file(gis_folder + r'osmdrive/nodes/nodes.shp')
stops = gpd.read_file(gis_folder + r'stops_corse.shp')
communes = gpd.read_file(gis_folder + r'communes_corse.shp')

In [58]:
nodes.crs = {'init' :'epsg:4326'}
stops.crs = {'init' :'epsg:4326'}
communes.crs = {'init' :'epsg:4326'}

nodes = nodes.to_crs({'init': 'epsg:2154'}) 
stops = stops.to_crs({'init': 'epsg:2154'}) 
communes = communes.to_crs({'init': 'epsg:2154'}) 

In [59]:
nodes.head(1)

Unnamed: 0,highway,osmid,geometry
0,,14468319,POINT (1226992.573164942 6070912.43982139)


In [60]:
stops.head(1)

Unnamed: 0,name,stop,geometry
0,"Gare de Campo dell'Oro (Ajaccio), corse",0,POINT (1179810.002044853 6109555.658381497)


In [61]:
communes.head(1)

Unnamed: 0,insee,geometry
0,2B047,"POLYGON ((1194340.897468092 6154934.151709693,..."


# Matrices des distances 
## stop -> node et commune -> node

In [62]:
nodetuples = nodes[['osmid', 'geometry']].values.tolist()
stoptuples = stops[['stop', 'geometry']].values.tolist()
communetuples = communes[['insee', 'geometry']].values.tolist()

## stop node

In [63]:
stop_node_distances = []

for stop_id, stop_geometry in tqdm(stoptuples):
    for node_id, node_geometry in nodetuples:
        distance = node_geometry.distance(stop_geometry)
        stop_node_distances.append((node_id, stop_id, distance))
        
# on transforme les listes de tuples en DataFrames
stop_node_distance_dataframe = pd.DataFrame(
    stop_node_distances, 
    columns = ['stop_node', 'stop', 'distance']
)

stop_node_distance_dataframe.sort_values(by='distance', inplace=True)

100%|██████████████████████████████████████████████████████████████████████████████████| 44/44 [00:05<00:00,  8.73it/s]


In [64]:
stop_node_distance_dataframe.head()

Unnamed: 0,stop_node,stop,distance
504391,1039032311,35,5.995474
22723,276984551,1,14.60591
613994,2223467332,43,16.357759
506676,6484401341,35,23.935246
598853,1117866543,41,25.052728


## commune node
On fait la même chose avec les communes

In [65]:
commune_node_distances = []
        
for commune_id, commune_geometry in tqdm(communetuples):
    for node_id, node_geometry in nodetuples:
        distance = node_geometry.distance(commune_geometry)
        commune_node_distances.append((node_id, commune_id, distance))
        
commune_node_distance_dataframe = pd.DataFrame(
    commune_node_distances, 
    columns = ['commune_node', 'commune', 'distance']
)

commune_node_distance_dataframe.sort_values(by='distance', inplace=True)

commune_node_distance_dataframe.head() # quand le nœud est dans la commune, la distance est nulle

100%|████████████████████████████████████████████████████████████████████████████████| 360/360 [01:06<00:00,  4.21it/s]


Unnamed: 0,commune_node,commune,distance
6763946,1779713597,2A163,0.0
1530168,300445272,2A247,0.0
1530169,300445275,2A247,0.0
1530170,300445298,2A247,0.0
1530171,300445313,2A247,0.0


In [66]:
# la table est assez large, on va la stocker au format hdf, plus rapide et leger que CSV
len(commune_node_distance_dataframe)

906880

In [69]:
def top(df, n=5, column='data'):
    return df.sort_values(by=column)[:n]
commune_node_distance_dataframe = commune_node_distance_dataframe.groupby('commune').apply(top, n=20, column='distance')

In [70]:
commune_node_distance_dataframe.to_hdf(data_folder + 'distance_to_node.hdf', 'communes', index=False, mode='w')
stop_node_distance_dataframe.to_hdf(data_folder + 'distance_to_node.hdf', 'stops', index=False, mode='a')