In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

from tqdm import tqdm

training_folder = r'../'
gis_folder = training_folder + r'gis/corse/'
data_folder = training_folder + r'data/corse/'

# chargement des données

In [2]:
nodes = gpd.read_file(gis_folder + r'osmdrive/nodes/nodes.shp')
stops = gpd.read_file(gis_folder + r'stops_corse.shp')
communes = gpd.read_file(gis_folder + r'communes_corse.shp')

In [3]:
nodes.head(1)

Unnamed: 0,highway,osmid,geometry
0,,14468319,POINT (9.3061536 41.5501734)


In [4]:
stops.head(1)

Unnamed: 0,name,stop,geometry
0,"Gare de Campo dell'Oro (Ajaccio), corse",0,POINT (8.777424999999999 41.928633)


In [5]:
communes.head(1)

Unnamed: 0,insee,geometry
0,2B047,"POLYGON ((8.993128003604662 42.32553598698841,..."


# Matrices des distances 
## stop -> node et commune -> node

In [6]:
nodetuples = nodes[['osmid', 'geometry']].values.tolist()
stoptuples = stops[['stop', 'geometry']].values.tolist()
communetuples = communes[['insee', 'geometry']].values.tolist()

## stop node

In [7]:
stop_node_distances = []

for stop_id, stop_geometry in tqdm(stoptuples):
    for node_id, node_geometry in nodetuples:
        distance = node_geometry.distance(stop_geometry)
        stop_node_distances.append((node_id, stop_id, distance))
        
# on transforme les listes de tuples en DataFrames
stop_node_distance_dataframe = pd.DataFrame(
    stop_node_distances, 
    columns = ['stop_node', 'stop', 'distance']
)

stop_node_distance_dataframe.sort_values(by='distance', inplace=True)

100%|██████████████████████████████████████████████████████████████████████████████████| 44/44 [00:04<00:00, 10.93it/s]


In [8]:
stop_node_distance_dataframe.head()

Unnamed: 0,stop_node,stop,distance
381020,1039032311,35,9.1e-05
15987,276984551,1,0.000149
470229,2223467332,43,0.000197
453143,1117866543,41,0.00025
369439,2219924837,33,0.000303


## commune node
On fait la même chose avec les communes

In [9]:
commune_node_distances = []
        
for commune_id, commune_geometry in tqdm(communetuples):
    for node_id, node_geometry in nodetuples:
        distance = node_geometry.distance(commune_geometry)
        commune_node_distances.append((node_id, commune_id, distance))
        
commune_node_distance_dataframe = pd.DataFrame(
    commune_node_distances, 
    columns = ['commune_node', 'commune', 'distance']
)

commune_node_distance_dataframe.sort_values(by='distance', inplace=True)

commune_node_distance_dataframe.head() # quand le nœud est dans la commune, la distance est nulle

100%|████████████████████████████████████████████████████████████████████████████████| 360/360 [00:52<00:00,  5.49it/s]


Unnamed: 0,commune_node,commune,distance
4592264,276560530,2A004,0.0
4499746,2232176836,2B121,0.0
2000721,60359368,2B159,0.0
2000722,60359376,2B159,0.0
3277220,2504344476,2A038,0.0


In [16]:
# la table est assez large, on va la stocker au format hdf, plus rapide et leger que CSV
len(commune_node_distance_dataframe)

5180760

In [14]:
commune_node_distance_dataframe.to_hdf(data_folder + 'distance_to_node.hdf', 'communes')
stop_node_distance_dataframe.to_hdf(data_folder + 'distance_to_node.hdf', 'stops')