# libraries and general settings

In [None]:
# general libraries
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from collections import defaultdict

# analysis libraries
import osmnx as ox
import networkx as nx

In [None]:
# directories
datasets_dir = "../../datasets/"

In [None]:
# directories to store the results
results_dir = datasets_dir + "matrix_computation/"
subfolders = ["original", "buffer_100", "buffer_200"]

os.makedirs(results_dir, exist_ok=True)

for folder in subfolders:
    os.makedirs(os.path.join(results_dir, folder), exist_ok=True)


In [None]:
# OSM data points
data = gpd.read_file(datasets_dir + "geopoints_data.geojson")
data.polygon_id = data.polygon_id.astype(int)

# POLYGONS data points
polygons_unproj = gpd.read_file(datasets_dir + "bologna_polygons.geojson")
polygons_proj = polygons_unproj.to_crs('EPSG:32632') # osmnx requires unprojected coordinates
polygons_proj = polygons_proj.set_crs('EPSG:32632')
# check reference coordinates system
print(f"OSM data coordinates: {data.crs}")
print(f"POLYGONS data coordinates projected: {polygons_proj.crs}")
print(f"POLYGONS data coordinates unprojected: {polygons_unproj.crs}")

# functions settings: matrix computation
the process is divided into two steps: 
1. construct the correspondence between osm nodes and osmnx nodes: indeed the street distance matrix is computed on nodes belogning to a graph but we actually want to use our osm nodes -> we need to construct a correspondence based on shorest distance
2. street computation between every pair of nodes inside the graph
3. matrix construction using osm nodes as reference

In [None]:
def matrix_computation(polygon, graph, subset):
    
    # and project the corrsponding graph to the same coordinates of the data points
    current_graph = ox.project_graph(graph, to_crs=subset.crs) 
    
    # for each node of subset (here given by the coordinates subset.geometry.x.values, subset.geometry.y.values) the function identify 
    # inside current_graph the closest node to the point and then returns the same graph node and the corrsponding distance
    osmx_ids, distance_m = ox.distance.nearest_nodes(
        current_graph,
        subset.geometry.x.values,
        subset.geometry.y.values,
        return_dist=True)

    # and then we convert the distance into km
    distance_km = distance_m / 1000
    osm_ids = subset['node_id'].to_list()

    # then we build a correspondence between osm nodes and osmnx nodes 
    osmx_to_osm = defaultdict(list)
    for osm_id, osmx_id, d_km in zip(osm_ids, osmx_ids, distance_km):
        osmx_to_osm[osmx_id].append((osm_id, d_km))

    # now we turn the graph through the networkx library
    nx_current_graph = nx.Graph(current_graph) 

    # here we initialize the street distance matrix using a default values of 10.0 km (we expect nodes to be all closer -> in this way it is easier to look for unreachable nodes)
    distance_matrix = pd.DataFrame(
        10.0,
        index=osm_ids,
        columns=osm_ids)

    # ACTUAL COMPUTATIONS

    # for each pair of nodes in the graph
    for source_node in set(osmx_ids):
        for target_node in set(osmx_ids):

            #we compute the shortest path distance between the two nodes
            graph_dist_km = nx.shortest_path_length(
                G=nx_current_graph,
                source=source_node,
                target=target_node,
                weight='length'
            ) / 1000 # and convert it to km
            
            # then here re-assign the values inside the distance matrix using the osm ids reference
            for source_osm_id, d_source in osmx_to_osm[source_node]:
                for target_osm_id, d_target in osmx_to_osm[target_node]:

                    # we sum to the street distance also the distance lenghts between origin/end osmnx nodes and osm nodes
                    total_distance = round(d_source + graph_dist_km + d_target, 3)
                        
                    # assign the current distance result inside the distance matrix
                    distance_matrix.at[source_osm_id, target_osm_id] = total_distance 

    np.fill_diagonal(distance_matrix.values, 0.0) # set the diagonal as zero (to underlined the idea of distance matrix) 
    
    return distance_matrix

# STREET DISTANCE MATRIX COMPUTATION ORIGINAL POLYGONS

In [None]:
for el in polygons_proj.id.unique():
    
    geom = polygons_unproj.loc[int(el)].geometry
    subset_data = data[data['polygon_id'] == el]
    
    # GRAPH EXTRACTION
    current_graph = ox.graph_from_polygon(polygon=geom, network_type='all', simplify=True)
    
    # MATRIX CONSTRUCTION
    current_matrix = matrix_computation(el, current_graph, subset_data)
    print(f"distance matrix for polygon {el} computed with {len(current_matrix.columns)} elements")
    
    # SAVE THE RESULTS
    current_name = results_dir + "original/" + "polygon" + str(el) + ".csv" # EXAMPLE PATH: datasets/matrix_computation/polygon4.csv
    current_matrix.to_csv(current_name, columns=current_matrix.columns, index=True) # to ensure we're keeping the distance matrix reference ids
    print(f"distance matrix for polygon {el} stored at: {current_name}")
    

# STREET DISTANCE MATRIX COMPUTATION BUFFERED 100M

In [None]:
for el in polygons_proj.id.unique():
    
    geom_proj = polygons_proj.loc[int(el)].geometry.buffer(100) # add a 100 meters buffer to the polygon
    buffered_polygon = gpd.GeoDataFrame(geometry=[geom_proj], crs="EPSG:32632")
    
    buffered_polygon_unproj = buffered_polygon.to_crs('EPSG:4326') # osmnx requires unprojected coordinates
    #buffered_polygon_unproj = buffered_polygon_unproj.set_crs('EPSG:4326')
    
    # obtain the new subset 
    subset_data_buffered = gpd.sjoin(data, buffered_polygon, predicate="within")
    
    # GRAPH EXTRACTION
    current_graph = ox.graph_from_polygon(polygon=buffered_polygon_unproj.geometry.iloc[0], network_type='all', simplify=True)
    #ox.plot.plot_graph(current_graph)
    
    
    # MATRIX CONSTRUCTION
    current_matrix = matrix_computation(el, current_graph, subset_data_buffered)
    print(f"distance matrix for polygon {el} computed with {len(current_matrix.columns)} elements")
    
    # SAVE THE RESULTS
    current_name = results_dir + "buffer_100/" + "polygon" + str(el) + ".csv" # EXAMPLE PATH: datasets/matrix_computation/polygon4.csv
    current_matrix.to_csv(current_name, columns=current_matrix.columns, index=True) # to ensure we're keeping the distance matrix reference ids
    print(f"distance matrix for polygon {el} stored at: {current_name}")
    print("")
    

# STREET DISTANCE MATRIX COMPUTATION BUFFERED 200M

In [None]:
for el in polygons_proj.id.unique():
    
    geom_proj = polygons_proj.loc[int(el)].geometry.buffer(200) # add a 200 meters buffer to the polygon
    buffered_polygon = gpd.GeoDataFrame(geometry=[geom_proj], crs="EPSG:32632")
    
    buffered_polygon_unproj = buffered_polygon.to_crs('EPSG:4326') # osmnx requires unprojected coordinates
    
    # obtain the new subset 
    subset_data_buffered = gpd.sjoin(data, buffered_polygon, predicate="within")
    
    # GRAPH EXTRACTION
    current_graph = ox.graph_from_polygon(polygon=buffered_polygon_unproj.geometry.iloc[0], network_type='all', simplify=True)
    #ox.plot.plot_graph(current_graph)
    
    
    # MATRIX CONSTRUCTION
    current_matrix = matrix_computation(el, current_graph, subset_data_buffered)
    print(f"distance matrix for polygon {el} computed with {len(current_matrix.columns)} elements")
    
    # SAVE THE RESULTS
    current_name = results_dir + "buffer_200/" + "polygon" + str(el) + ".csv" # EXAMPLE PATH: datasets/matrix_computation/polygon4.csv
    current_matrix.to_csv(current_name, columns=current_matrix.columns, index=True) # to ensure we're keeping the distance matrix reference ids
    print(f"distance matrix for polygon {el} stored at: {current_name}")
    print("")