# Performing spatial operations on OpenStreetMap data

Import the packages needed to read the OpenStreetMap data.

In [1]:
from libadalina_analytics.graph_extraction.readers import OpenStreetMapReader
from libadalina_core.readers import geopackage_to_dataframe
import pathlib
import os
from libadalina_analytics.utils import Timing # to monitor time

In [2]:
base_path = pathlib.Path(os.environ.get("SAMPLES_DIR", ""))

with Timing('Time loading datasets: {}'):
    osm_df = OpenStreetMapReader().read(str(base_path / 'milano' / 'gis_osm_roads_free_1.shp'))

    population = geopackage_to_dataframe(
        str(base_path / "population-north-italy" / "nord-italia.gpkg"),
        "census2021"
    )[['T', 'geometry']]

Time loading datasets: 3.348461866378784


In [3]:
 from libadalina_analytics.graph_extraction.builders import build_graph
 from libadalina_core.spatial_operators import AggregationFunction, AggregationType

 with Timing('Time building graph: {}'):
    graph = build_graph(osm_df,
                        name='milan_road',
                        joined_df=population,
                        buffer_radius_meters=50,
                        aggregate_functions=[
                            AggregationFunction("T", AggregationType.SUM, 'population', proportional='geometry_right')
                        ]
                        )
    print(f'Number of nodes: {len(graph.nodes)}, Number of edges: {len(graph.edges)}')

https://artifacts.unidata.ucar.edu/repository/unidata-all added as a remote repository with the name: repo-1
Ivy Default Cache set to: /home/marco/.ivy2/cache
The jars for the packages stored in: /home/marco/.ivy2/jars
org.apache.sedona#sedona-spark-3.3_2.12 added as a dependency
org.datasyslab#geotools-wrapper added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-cebcd20a-2bfc-4503-bc96-e3618cc38b5e;1.0
	confs: [default]
	found org.apache.sedona#sedona-spark-3.3_2.12;1.7.1 in central
	found org.apache.sedona#sedona-common;1.7.1 in central
	found org.apache.commons#commons-math3;3.6.1 in central
	found org.locationtech.jts#jts-core;1.20.0 in central
	found org.wololo#jts2geojson;0.16.1 in central
	found org.locationtech.spatial4j#spatial4j;0.8 in central


:: loading settings :: url = jar:file:/home/marco/Workspace/miniconda/v3/envs/adalina-analytics/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


	found com.google.geometry#s2-geometry;2.0.0 in central
	found com.google.guava#guava;25.1-jre in central
	found com.google.code.findbugs#jsr305;3.0.2 in central
	found org.checkerframework#checker-qual;2.0.0 in central
	found com.google.errorprone#error_prone_annotations;2.1.3 in central
	found com.google.j2objc#j2objc-annotations;1.1 in central
	found org.codehaus.mojo#animal-sniffer-annotations;1.14 in central
	found com.uber#h3;4.1.1 in central
	found net.sf.geographiclib#GeographicLib-Java;1.52 in central
	found com.github.ben-manes.caffeine#caffeine;2.9.2 in central
	found org.checkerframework#checker-qual;3.10.0 in central
	found com.google.errorprone#error_prone_annotations;2.5.1 in central
	found org.apache.sedona#sedona-spark-common-3.3_2.12;1.7.1 in central
	found org.apache.sedona#shade-proto;1.7.1 in central
	found org.xerial#sqlite-jdbc;3.41.2.2 in central
	found commons-lang#commons-lang;2.6 in central
	found graphframes#graphframes;0.8.3-spark3.4-s_2.12 in spark-package

25/08/16 14:47:05 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
                                                                                

25/08/16 14:47:26 WARN JoinQuery: UseIndex is true, but no index exists. Will build index on the fly.
25/08/16 14:47:26 WARN TaskSetManager: Stage 8 contains a task of very large size (1664 KiB). The maximum recommended task size is 1000 KiB.


                                                                                

Number of nodes: 334976, Number of edges: 407751
Time building graph: 46.139728307724


In [4]:
from libadalina_analytics.graph_extraction.utils.search_address import get_node_of_address
import networkx as nx

with Timing('Time computing shortest path: {}'):
    source = get_node_of_address(graph, 'luini')
    destination = get_node_of_address(graph, 'gorini')

    shortest_path = nx.shortest_path(graph, source, destination, weight='population')
    cost = nx.path_weight(graph, shortest_path, 'population')
    print(f'Shortest path from {source} to {destination} with population weight: {cost}')

Shortest path from 50364484-2778-4d72-8aad-a32ce4988763 to b1f1fbde-5856-40ad-95a5-4e6cb569fda6 with population weight: 5610.460230326131
Time computing shortest path: 0.3617713451385498
