In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
import osmnx
from datetime import datetime,timedelta
from shapely.geometry import Point, LineString
import os
from fastprogress.fastprogress import master_bar, progress_bar
pd.set_option('mode.chained_assignment','raise')



In [2]:
parameters = pd.read_csv('parameter.csv')
parameters = parameters.set_index('variable')
walk_speed = parameters.loc['walk_speed','value']# 1.25 # m/s
max_walk_t = parameters.loc['max_travel_t','value']

In [3]:
def load_street_network(dir_street, city):
    '''
    This function loads the street network stored in a gpkg format.
    
           Parameters:
                    dir_street (str): directory where to find the street network.
                    city (str): city for which the street network is requested.

            Returns:
                    net (nx.MultiGraph): street network.
                    gdf_nodes_proj (gpd.GeoDataFrame): Nodes of the street network.
                    gdf_edges_proj (gpd.GeoDataFrame): Edges of the street network.
    '''
    gdf_nodes = gpd.read_file(dir_street + city + '_streets.gpkg', 
                              layer = 'nodes')
    gdf_edges = gpd.read_file(dir_street + city + '_streets.gpkg',
                              layer = 'edges')
    
    gdf_nodes_proj = gdf_nodes.to_crs('EPSG:28992')
    gdf_edges_proj = gdf_edges.to_crs('EPSG:28992')
    gdf_nodes = gdf_nodes.set_index('osmid')
    gdf_edges = gdf_edges.set_index(['u', 'v', 'key'])
    
    assert gdf_nodes.index.is_unique and gdf_edges.index.is_unique
    graph_attrs = {'crs': 'EPSG:4326', 'simplified': False}
    
    net = osmnx.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs)
    net = osmnx.project_graph(net, to_crs = 'EPSG:28992')
    if net.is_directed():
        net = osmnx.utils_graph.get_undirected(net)
    
    return net,gdf_nodes_proj,gdf_edges_proj

In [4]:
def find_nearest_edge(centres,gdf_nodes_proj,gdf_edges_proj,max_walk_t_direct,walk_speed):
    '''
    This function finds the nearest edge for each point in a GeoDataFrame, 
        measures the distance to the two nodes of the edge, 
        and stores that information in the GeoDataFrame.
    
           Parameters:
                    centres (gpd.GeoDataFrame): centres of the spatial units.
                    gdf_nodes_proj (gpd.GeoDataFrame): Nodes of the street network.
                    gdf_edges_proj (gpd.GeoDataFrame): Edges of the street network.
                    max_walk_t_direct (float): Maximum walking time between two spatial units (s).
                    walk_speed (float): Walking speed (m/s).

            Returns:
                    centres (gpd.GeoDataFrame): centres of the spatial units, with a column specifying the closest edge.
    '''

    centres = centres.copy()
    centres = centres.sjoin_nearest(gdf_edges_proj,
                                    max_distance = max_walk_t_direct*walk_speed,
                                    how = 'left')

    centres = centres[['id_unit','geometry','u','v']]

    centres = centres.rename(columns = {'u':'node_1',
                                        'v':'node_2'})

    centres['x_centroid'] = centres.geometry.x
    centres['y_centroid'] = centres.geometry.y
    gdf_nodes_proj['x_node'] = gdf_nodes_proj.geometry.x
    gdf_nodes_proj['y_node'] = gdf_nodes_proj.geometry.y

    centres = centres.merge(gdf_nodes_proj[['osmid','x_node','y_node']],
                            left_on = 'node_1',
                            right_on = 'osmid',
                            how = 'left').drop(columns = 'osmid')

    centres = centres.rename(columns = {'x_node':'x_node_1','y_node':'y_node_1'})

    centres = centres.merge(gdf_nodes_proj[['osmid','x_node','y_node']],
                            left_on = 'node_2',
                            right_on = 'osmid',
                            how = 'left').drop(columns = 'osmid')

    centres = centres.rename(columns = {'x_node':'x_node_2','y_node':'y_node_2'})

    centres['distance_1'] = np.sqrt((centres['x_centroid'] - centres['x_node_1'])**2 + 
                                    (centres['y_centroid'] - centres['y_node_1'])**2)
    centres['distance_2'] = np.sqrt((centres['x_centroid'] - centres['x_node_2'])**2 + 
                                    (centres['y_centroid'] - centres['y_node_2'])**2)
    
    centres['highway'] = 'centroid'
    
    # The sjoin_nearest method can provide several edges (if equidistant).
    # Keeping only the ones being the closest to the centroid.
    centres = centres.sort_values(by = ['distance_1','distance_2']).drop_duplicates(subset = 'id_unit')
    
    return centres

In [5]:
def connecting_pois_to_streets(poi, net, poi_id, city):
    '''
    This function creates the edges between pois and the neighboring nodes, 
    and adds them to the street network.
    
           Parameters:
                    net (nx.MultiGraph): street network.
                    poi (gpd.GeoDataFrame): Points-of-interest.
                    poi_id (str): type of poi: 'stop', 'centroid'...
                    city (str): Name of the city  considered.

            Returns:
                    net (nx.MultiGraph): network with pois connected to the streets.
    '''
    poi = poi.loc[poi['distance_1'].notna()].copy()
    
    list_nodes = list(net.nodes)
    
    if not poi.loc[poi[poi_id].isin(list_nodes)].empty:
        # If the stop_id are also node_id of some nodes in the street network,
        # we cannot use the stop id to identify stops in the street network (overlapping).
        raise Exception('Some {0} are equal to some node_id in the street network in the city of {1}'.format(poi_id, city))
    
    # Creating a column to initiate the stop nodes in the street network.
    # The column matches the format needed for the networkx function "add_nodes_from".
    # Function zip creates a tuple.
    poi['x'] = poi['x_centroid'].copy()
    poi['y'] = poi['y_centroid'].copy()
    
    poi.loc[:,'define_node'] = pd.Series(zip(poi.loc[:,poi_id],
                                             poi.loc[:,['x','y','highway']].to_dict('records')))
    # Adding the stops to the street network.
    net.add_nodes_from(poi['define_node'].to_list())
    poi = poi.drop(columns = ['define_node','x','y'])
    
    # Creating a column to initiate the edges linking the stops to the closest
    # nodes in the street network.
    # The column matches the format needed for 
    # the networkx function 'add_weighted_edges_from'.

    poi.loc[:,'define_edge1'] = pd.Series(data = zip(poi[poi_id],
                                                 poi['node_1'], 
                                                 [0] * len(poi),
                                                 poi[['distance_1',
                                                      'highway']].rename(columns = {'distance_1':'length'}).to_dict('records')),
                                          index = poi.index)

    poi.loc[:,'define_edge2'] = pd.Series(data = zip(poi[poi_id],
                                                     poi['node_2'],
                                                     [0] * len(poi),
                                                     poi[['distance_2',
                                                          'highway']].rename(columns = {'distance_2': 'length'}).to_dict('records')),
                                          index = poi.index)
    net.add_edges_from(poi['define_edge1'].to_list())
    net.add_edges_from(poi['define_edge2'].to_list())

    poi = poi.drop(columns = ['define_edge1',
                              'define_edge2'])
    
    # The network must be undirected, as the edges have no direction for pedestrians.
    if net.is_directed():
        net = osmnx.utils_graph.get_undirected(net)
        
    return net

In [6]:
def find_walk_t(net, poi_o, poi_d, max_walk_t, walk_speed,mb):  
    '''
    This function finds walk path possible between pois, 
    looking at the street network, and determine walking time. 
    For stops to stops, it stores the transfers possible into a file.
    
           Parameters:
                    net (nx.MultiGraph): street network, containing all pois.
                    poi_o (pd.Series): ids of the origin pois.
                    poi_d (pd.Series): ids of the destination pois.
                    max_walk_t (float): maximum walking time allowed.
                    walk_speed (float): walk speed used to compute walk times.
                    mb: progress bar.
            Returns:
                    walk_t (pd.DataFrame): walking time from poi_o to poi_d.
    '''
    
    # If we compute the walking times from a set of points to the same set of points,
    # we intialize the result by setting the walking time from a point to itself to 0.
    # This is important if a point is not connected to the street network.
    # In such case, the walking time from that point to itself would not be defined.
    if poi_o.equals(poi_d):
        walk_t = pd.DataFrame({'from_{}'.format(poi_o.name): poi_o,
                                'to_{}'.format(poi_d.name): poi_d,
                                'walk_t': [0]*len(poi_o)})
    
    else: 
        walk_t = pd.DataFrame({'from_{}'.format(poi_o.name): pd.Series([], dtype='int'),
                                'to_{}'.format(poi_d.name): pd.Series([], dtype='int'),
                                'walk_t': pd.Series([], dtype='float')})

    for k in progress_bar(range(len(poi_o)), parent=mb):
        i = poi_o.iloc[k]
        
        if not net.has_node(i):
            continue
        
        # Finds the shortest path from each poi to all nodes in the street network.
        nodes_reachable = nx.single_source_dijkstra_path_length(net,
                                                                source = i, 
                                                                cutoff = max_walk_t * walk_speed,
                                                                weight = 'length')

        nodes_reachable = pd.DataFrame.from_dict(nodes_reachable,
                                                 orient ='index',
                                                 columns = ['distance'])

        nodes_reachable = nodes_reachable.reset_index().rename(columns = {'index': 'node_id'})
  
        # Spots the poi reachable among the nodes reachable.
        poi_reachable = nodes_reachable.loc[nodes_reachable['node_id'].isin(poi_d)]

        # Rename the nodes as the poi's name.
        poi_reachable = poi_reachable.assign(from_ = i)
        #loc[:,'from_{}'.format(poi_o.name)] = i
        poi_reachable = poi_reachable.rename(columns = {'node_id':'to_{}'.format(poi_d.name),
                                                        'from_':'from_{}'.format(poi_o.name)})
        poi_reachable.loc[:,'walk_t'] = poi_reachable['distance'] / walk_speed
        poi_reachable = poi_reachable.drop(columns = 'distance')
        walk_t = pd.concat([walk_t, 
                            poi_reachable],
                           ignore_index = True)
    
    walk_t = walk_t.drop_duplicates()

    return walk_t

In [7]:
dir_centroid = '../data/processed_data/zones_delineation/'
dir_street = '../data/raw_data/street_data/'
dir_shortest_path = '../data/processed_data/shortest_path/'

list_files = os.listdir(dir_centroid)
list_files.remove('edited')
list_files.remove('README_zones_delineation.mkd')
# Initialization of the progress bar.
mb = master_bar(range(len(list_files)))

for i in mb:
    file = list_files[i]
    city_name = file[3:-5]
    mb.main_bar.comment = city_name
    if not os.path.isfile(dir_shortest_path + city_name + '_walk_t_unit_to_unit.csv'):
         
        net,gdf_nodes_proj,gdf_edges_proj = load_street_network(dir_street, city_name)
        centres = gpd.read_file(dir_centroid + file, layer = 'centroid')
        centres = find_nearest_edge(centres,gdf_nodes_proj,gdf_edges_proj,max_walk_t,walk_speed)
        centres['highway'] = 'centroid'

        net = connecting_pois_to_streets(centres,net, 'id_unit', city_name)

        walk_t = find_walk_t(net, centres['id_unit'].copy(), 
                             centres['id_unit'].copy(),
                             max_walk_t, walk_speed,mb)
        
        mb.write(f'Finished for {city_name}.')
        walk_t.to_csv(dir_shortest_path +  city_name + '_walk_t_unit_to_unit.csv', 
                      index = False)